From e94e51f4a9fc531c4002b10c0a4fbe1f2d71bab4 Mon Sep 17 00:00:00 2001 From: Sam Date: Sat, 8 Nov 2025 22:09:08 -0600 Subject: [PATCH] Add scripts for converting and plotting metrics from JSON to CSV --- scripts/convert_metrics.py | 107 +++++++++++++++++++++++++++++++++ scripts/plot_metrics.py | 118 +++++++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+) create mode 100644 scripts/convert_metrics.py create mode 100644 scripts/plot_metrics.py diff --git a/scripts/convert_metrics.py b/scripts/convert_metrics.py new file mode 100644 index 0000000..5e41e34 --- /dev/null +++ b/scripts/convert_metrics.py @@ -0,0 +1,107 @@ +# python +""" +convert_metrics.py + +Usage examples: + python convert_metrics.py --input-dir output_test + python convert_metrics.py --input-dir output_test --out combined_metrics.csv --per-file +""" + +import argparse +import json +from pathlib import Path +from typing import Any, Dict, List +import pandas as pd + + +def flatten(d: Dict[str, Any], parent_key: str = "", sep: str = "_") -> Dict[str, Any]: + """Flatten nested dict into single-level dict with keys joined by `sep`.""" + items: Dict[str, Any] = {} + for k, v in d.items(): + new_key = f"{parent_key}{sep}{k}" if parent_key else k + if isinstance(v, dict): + items.update(flatten(v, new_key, sep=sep)) + else: + items[new_key] = v + return items + + +def is_metrics_file(path: Path) -> bool: + """Quick heuristic: filename contains 'metrics' or JSON has collection_type == metrics.""" + if "metrics" in path.name.lower(): + return True + try: + data = json.loads(path.read_text()) + if isinstance(data, dict) and data.get("collection_type") == "metrics": + return True + if isinstance(data, list) and any(isinstance(item, dict) and item.get("collection_type") == "metrics" for item in data): + return True + except Exception: + pass + return False + + +def load_metric_records(path: Path) -> List[Dict[str, Any]]: + """Load JSON file and return list of metric records (handles single dict or list).""" + text = path.read_text() + data = json.loads(text) + if isinstance(data, list): + return data + if isinstance(data, dict): + return [data] + raise ValueError(f"Unsupported JSON structure in {path}") + + +def main(): + parser = argparse.ArgumentParser(description="Convert metric JSON files to CSV") + parser.add_argument("--input-dir", "-i", type=str, default="output_test", help="Directory to search for metric files") + parser.add_argument("--out", "-o", type=str, default="metrics_combined.csv", help="Output CSV path for combined metrics") + parser.add_argument("--recursive", "-r", action="store_true", help="Search recursively") + parser.add_argument("--per-file", action="store_true", help="Also write one CSV per metric file (same folder)") + args = parser.parse_args() + + base = Path(args.input_dir) + if not base.exists(): + raise SystemExit(f"Input directory does not exist: {base}") + + pattern = "**/*.json" if args.recursive else "*.json" + json_files = sorted(base.glob(pattern)) + + rows: List[Dict[str, Any]] = [] + processed = 0 + for p in json_files: + if not p.is_file(): + continue + if not is_metrics_file(p): + continue + try: + records = load_metric_records(p) + except Exception as e: + print(f"Skipping {p} (failed to parse): {e}") + continue + + per_file_rows: List[Dict[str, Any]] = [] + for rec in records: + flat = flatten(rec) + # add provenance + flat["_source_file"] = str(p) + per_file_rows.append(flat) + rows.append(flat) + + if args.per_file and per_file_rows: + df_pf = pd.DataFrame(per_file_rows) + out_path = p.with_suffix(".csv") + df_pf.to_csv(out_path, index=False) + processed += 1 + + if not rows: + print("No metric files found.") + return + + df = pd.DataFrame(rows) + df.to_csv(args.out, index=False) + print(f"Wrote combined CSV to {args.out} ({len(df)} rows) from {processed} metric files.") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/plot_metrics.py b/scripts/plot_metrics.py new file mode 100644 index 0000000..6962f29 --- /dev/null +++ b/scripts/plot_metrics.py @@ -0,0 +1,118 @@ +# python +""" +plot_metrics.py + +Usage examples: + python plot_metrics.py --csv metrics_combined.csv + python plot_metrics.py --csv metrics_combined.csv --time-col tick --out myplot.png + python plot_metrics.py --csv metrics_combined.csv --cols entity_counts_cells,entity_counts_food +""" + +import argparse +from pathlib import Path +import sys + +import pandas as pd +import matplotlib.pyplot as plt + + +COMMON_TIME_COLS = ["tick", "time", "step", "tick_number", "t"] +DEFAULT_PLOT_COLS = ["entity_counts_cells", "entity_counts_food"] + + +def find_column(df: pd.DataFrame, candidates): + # return the first matching column name from candidates (case-insensitive, substring match) + cols = {c.lower(): c for c in df.columns} + for cand in candidates: + cand_l = cand.lower() + # exact match + if cand_l in cols: + return cols[cand_l] + # substring match + for k, orig in cols.items(): + if cand_l in k: + return orig + return None + + +def main(): + p = argparse.ArgumentParser(description="Plot entity counts over time from a metrics CSV") + p.add_argument("--csv", "-c", type=str, default="metrics_combined.csv", help="Path to CSV file") + p.add_argument("--time-col", "-t", type=str, default=None, help="Name of the time column (optional)") + p.add_argument("--cols", type=str, default=None, help="Comma-separated column names to plot (default: entity_counts_cells,entity_counts_food)") + p.add_argument("--out", "-o", type=str, default="metrics_counts_plot.png", help="Output image path") + args = p.parse_args() + + csv_path = Path(args.csv) + if not csv_path.exists(): + print(f"CSV not found: {csv_path}", file=sys.stderr) + sys.exit(1) + + df = pd.read_csv(csv_path) + + # detect time column + time_col = None + if args.time_col: + if args.time_col in df.columns: + time_col = args.time_col + else: + print(f"Specified time column `{args.time_col}` not found in CSV columns.", file=sys.stderr) + sys.exit(1) + else: + time_col = find_column(df, COMMON_TIME_COLS) + if time_col is None: + print("Could not auto-detect a time column. Provide one with `--time-col`.", file=sys.stderr) + sys.exit(1) + + # determine plot columns + if args.cols: + cols = [c.strip() for c in args.cols.split(",") if c.strip()] + missing = [c for c in cols if c not in df.columns] + if missing: + print(f"Columns not found in CSV: {missing}", file=sys.stderr) + sys.exit(1) + else: + cols = [] + for want in DEFAULT_PLOT_COLS: + found = find_column(df, [want]) + if found: + cols.append(found) + if not cols: + print(f"Could not find default columns `{DEFAULT_PLOT_COLS}`. Provide `--cols` explicitly.", file=sys.stderr) + sys.exit(1) + + # prepare data + df = df[[time_col] + cols].copy() + df[time_col] = pd.to_numeric(df[time_col], errors="coerce") + for c in cols: + df[c] = pd.to_numeric(df[c], errors="coerce") + df = df.dropna(subset=[time_col]) + if df.empty: + print("No numeric time values found after cleaning.", file=sys.stderr) + sys.exit(1) + + df = df.sort_values(by=time_col) + + # plot + plt.figure(figsize=(10, 5)) + for c in cols: + plt.plot(df[time_col], df[c], label=c, linewidth=2) + plt.xlabel(time_col) + plt.ylabel("Count") + plt.title("Entity counts over time") + plt.grid(True, linestyle="--", alpha=0.4) + plt.legend() + plt.tight_layout() + + out_path = Path(args.out) + plt.savefig(out_path, dpi=150) + print(f"Wrote plot to `{out_path}`") + # also show interactively if running in an environment with a display + try: + plt.show() + except Exception: + pass + + +if __name__ == "__main__": + main() \ No newline at end of file