Add scripts for converting and plotting metrics from JSON to CSV

2025-11-08 22:09:08 -06:00 · 2025-11-08 22:09:08 -06:00 · e94e51f4a9
commit e94e51f4a9
parent d90240391c
2 changed files with 225 additions and 0 deletions
--- a/scripts/convert_metrics.py
+++ b/scripts/convert_metrics.py
@ -0,0 +1,107 @@
+# python
+"""
+convert_metrics.py
+
+Usage examples:
+  python convert_metrics.py --input-dir output_test
+  python convert_metrics.py --input-dir output_test --out combined_metrics.csv --per-file
+"""
+
+import argparse
+import json
+from pathlib import Path
+from typing import Any, Dict, List
+import pandas as pd
+
+
+def flatten(d: Dict[str, Any], parent_key: str = "", sep: str = "_") -> Dict[str, Any]:
+    """Flatten nested dict into single-level dict with keys joined by `sep`."""
+    items: Dict[str, Any] = {}
+    for k, v in d.items():
+        new_key = f"{parent_key}{sep}{k}" if parent_key else k
+        if isinstance(v, dict):
+            items.update(flatten(v, new_key, sep=sep))
+        else:
+            items[new_key] = v
+    return items
+
+
+def is_metrics_file(path: Path) -> bool:
+    """Quick heuristic: filename contains 'metrics' or JSON has collection_type == metrics."""
+    if "metrics" in path.name.lower():
+        return True
+    try:
+        data = json.loads(path.read_text())
+        if isinstance(data, dict) and data.get("collection_type") == "metrics":
+            return True
+        if isinstance(data, list) and any(isinstance(item, dict) and item.get("collection_type") == "metrics" for item in data):
+            return True
+    except Exception:
+        pass
+    return False
+
+
+def load_metric_records(path: Path) -> List[Dict[str, Any]]:
+    """Load JSON file and return list of metric records (handles single dict or list)."""
+    text = path.read_text()
+    data = json.loads(text)
+    if isinstance(data, list):
+        return data
+    if isinstance(data, dict):
+        return [data]
+    raise ValueError(f"Unsupported JSON structure in {path}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Convert metric JSON files to CSV")
+    parser.add_argument("--input-dir", "-i", type=str, default="output_test", help="Directory to search for metric files")
+    parser.add_argument("--out", "-o", type=str, default="metrics_combined.csv", help="Output CSV path for combined metrics")
+    parser.add_argument("--recursive", "-r", action="store_true", help="Search recursively")
+    parser.add_argument("--per-file", action="store_true", help="Also write one CSV per metric file (same folder)")
+    args = parser.parse_args()
+
+    base = Path(args.input_dir)
+    if not base.exists():
+        raise SystemExit(f"Input directory does not exist: {base}")
+
+    pattern = "**/*.json" if args.recursive else "*.json"
+    json_files = sorted(base.glob(pattern))
+
+    rows: List[Dict[str, Any]] = []
+    processed = 0
+    for p in json_files:
+        if not p.is_file():
+            continue
+        if not is_metrics_file(p):
+            continue
+        try:
+            records = load_metric_records(p)
+        except Exception as e:
+            print(f"Skipping {p} (failed to parse): {e}")
+            continue
+
+        per_file_rows: List[Dict[str, Any]] = []
+        for rec in records:
+            flat = flatten(rec)
+            # add provenance
+            flat["_source_file"] = str(p)
+            per_file_rows.append(flat)
+            rows.append(flat)
+
+        if args.per_file and per_file_rows:
+            df_pf = pd.DataFrame(per_file_rows)
+            out_path = p.with_suffix(".csv")
+            df_pf.to_csv(out_path, index=False)
+        processed += 1
+
+    if not rows:
+        print("No metric files found.")
+        return
+
+    df = pd.DataFrame(rows)
+    df.to_csv(args.out, index=False)
+    print(f"Wrote combined CSV to {args.out} ({len(df)} rows) from {processed} metric files.")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/plot_metrics.py
+++ b/scripts/plot_metrics.py
@ -0,0 +1,118 @@
+# python
+"""
+plot_metrics.py
+
+Usage examples:
+  python plot_metrics.py --csv metrics_combined.csv
+  python plot_metrics.py --csv metrics_combined.csv --time-col tick --out myplot.png
+  python plot_metrics.py --csv metrics_combined.csv --cols entity_counts_cells,entity_counts_food
+"""
+
+import argparse
+from pathlib import Path
+import sys
+
+import pandas as pd
+import matplotlib.pyplot as plt
+
+
+COMMON_TIME_COLS = ["tick", "time", "step", "tick_number", "t"]
+DEFAULT_PLOT_COLS = ["entity_counts_cells", "entity_counts_food"]
+
+
+def find_column(df: pd.DataFrame, candidates):
+    # return the first matching column name from candidates (case-insensitive, substring match)
+    cols = {c.lower(): c for c in df.columns}
+    for cand in candidates:
+        cand_l = cand.lower()
+        # exact match
+        if cand_l in cols:
+            return cols[cand_l]
+        # substring match
+        for k, orig in cols.items():
+            if cand_l in k:
+                return orig
+    return None
+
+
+def main():
+    p = argparse.ArgumentParser(description="Plot entity counts over time from a metrics CSV")
+    p.add_argument("--csv", "-c", type=str, default="metrics_combined.csv", help="Path to CSV file")
+    p.add_argument("--time-col", "-t", type=str, default=None, help="Name of the time column (optional)")
+    p.add_argument("--cols", type=str, default=None, help="Comma-separated column names to plot (default: entity_counts_cells,entity_counts_food)")
+    p.add_argument("--out", "-o", type=str, default="metrics_counts_plot.png", help="Output image path")
+    args = p.parse_args()
+
+    csv_path = Path(args.csv)
+    if not csv_path.exists():
+        print(f"CSV not found: {csv_path}", file=sys.stderr)
+        sys.exit(1)
+
+    df = pd.read_csv(csv_path)
+
+    # detect time column
+    time_col = None
+    if args.time_col:
+        if args.time_col in df.columns:
+            time_col = args.time_col
+        else:
+            print(f"Specified time column `{args.time_col}` not found in CSV columns.", file=sys.stderr)
+            sys.exit(1)
+    else:
+        time_col = find_column(df, COMMON_TIME_COLS)
+        if time_col is None:
+            print("Could not auto-detect a time column. Provide one with `--time-col`.", file=sys.stderr)
+            sys.exit(1)
+
+    # determine plot columns
+    if args.cols:
+        cols = [c.strip() for c in args.cols.split(",") if c.strip()]
+        missing = [c for c in cols if c not in df.columns]
+        if missing:
+            print(f"Columns not found in CSV: {missing}", file=sys.stderr)
+            sys.exit(1)
+    else:
+        cols = []
+        for want in DEFAULT_PLOT_COLS:
+            found = find_column(df, [want])
+            if found:
+                cols.append(found)
+        if not cols:
+            print(f"Could not find default columns `{DEFAULT_PLOT_COLS}`. Provide `--cols` explicitly.", file=sys.stderr)
+            sys.exit(1)
+
+    # prepare data
+    df = df[[time_col] + cols].copy()
+    df[time_col] = pd.to_numeric(df[time_col], errors="coerce")
+    for c in cols:
+        df[c] = pd.to_numeric(df[c], errors="coerce")
+    df = df.dropna(subset=[time_col])
+    if df.empty:
+        print("No numeric time values found after cleaning.", file=sys.stderr)
+        sys.exit(1)
+
+    df = df.sort_values(by=time_col)
+
+    # plot
+    plt.figure(figsize=(10, 5))
+    for c in cols:
+        plt.plot(df[time_col], df[c], label=c, linewidth=2)
+    plt.xlabel(time_col)
+    plt.ylabel("Count")
+    plt.title("Entity counts over time")
+    plt.grid(True, linestyle="--", alpha=0.4)
+    plt.legend()
+    plt.tight_layout()
+
+    out_path = Path(args.out)
+    plt.savefig(out_path, dpi=150)
+    print(f"Wrote plot to `{out_path}`")
+    # also show interactively if running in an environment with a display
+    try:
+        plt.show()
+    except Exception:
+        pass
+
+
+if __name__ == "__main__":
+    main()