Add scripts for converting and plotting metrics from JSON to CSV
This commit is contained in:
parent
d90240391c
commit
e94e51f4a9
107
scripts/convert_metrics.py
Normal file
107
scripts/convert_metrics.py
Normal file
@ -0,0 +1,107 @@
|
||||
# python
|
||||
"""
|
||||
convert_metrics.py
|
||||
|
||||
Usage examples:
|
||||
python convert_metrics.py --input-dir output_test
|
||||
python convert_metrics.py --input-dir output_test --out combined_metrics.csv --per-file
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def flatten(d: Dict[str, Any], parent_key: str = "", sep: str = "_") -> Dict[str, Any]:
|
||||
"""Flatten nested dict into single-level dict with keys joined by `sep`."""
|
||||
items: Dict[str, Any] = {}
|
||||
for k, v in d.items():
|
||||
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
||||
if isinstance(v, dict):
|
||||
items.update(flatten(v, new_key, sep=sep))
|
||||
else:
|
||||
items[new_key] = v
|
||||
return items
|
||||
|
||||
|
||||
def is_metrics_file(path: Path) -> bool:
|
||||
"""Quick heuristic: filename contains 'metrics' or JSON has collection_type == metrics."""
|
||||
if "metrics" in path.name.lower():
|
||||
return True
|
||||
try:
|
||||
data = json.loads(path.read_text())
|
||||
if isinstance(data, dict) and data.get("collection_type") == "metrics":
|
||||
return True
|
||||
if isinstance(data, list) and any(isinstance(item, dict) and item.get("collection_type") == "metrics" for item in data):
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def load_metric_records(path: Path) -> List[Dict[str, Any]]:
|
||||
"""Load JSON file and return list of metric records (handles single dict or list)."""
|
||||
text = path.read_text()
|
||||
data = json.loads(text)
|
||||
if isinstance(data, list):
|
||||
return data
|
||||
if isinstance(data, dict):
|
||||
return [data]
|
||||
raise ValueError(f"Unsupported JSON structure in {path}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Convert metric JSON files to CSV")
|
||||
parser.add_argument("--input-dir", "-i", type=str, default="output_test", help="Directory to search for metric files")
|
||||
parser.add_argument("--out", "-o", type=str, default="metrics_combined.csv", help="Output CSV path for combined metrics")
|
||||
parser.add_argument("--recursive", "-r", action="store_true", help="Search recursively")
|
||||
parser.add_argument("--per-file", action="store_true", help="Also write one CSV per metric file (same folder)")
|
||||
args = parser.parse_args()
|
||||
|
||||
base = Path(args.input_dir)
|
||||
if not base.exists():
|
||||
raise SystemExit(f"Input directory does not exist: {base}")
|
||||
|
||||
pattern = "**/*.json" if args.recursive else "*.json"
|
||||
json_files = sorted(base.glob(pattern))
|
||||
|
||||
rows: List[Dict[str, Any]] = []
|
||||
processed = 0
|
||||
for p in json_files:
|
||||
if not p.is_file():
|
||||
continue
|
||||
if not is_metrics_file(p):
|
||||
continue
|
||||
try:
|
||||
records = load_metric_records(p)
|
||||
except Exception as e:
|
||||
print(f"Skipping {p} (failed to parse): {e}")
|
||||
continue
|
||||
|
||||
per_file_rows: List[Dict[str, Any]] = []
|
||||
for rec in records:
|
||||
flat = flatten(rec)
|
||||
# add provenance
|
||||
flat["_source_file"] = str(p)
|
||||
per_file_rows.append(flat)
|
||||
rows.append(flat)
|
||||
|
||||
if args.per_file and per_file_rows:
|
||||
df_pf = pd.DataFrame(per_file_rows)
|
||||
out_path = p.with_suffix(".csv")
|
||||
df_pf.to_csv(out_path, index=False)
|
||||
processed += 1
|
||||
|
||||
if not rows:
|
||||
print("No metric files found.")
|
||||
return
|
||||
|
||||
df = pd.DataFrame(rows)
|
||||
df.to_csv(args.out, index=False)
|
||||
print(f"Wrote combined CSV to {args.out} ({len(df)} rows) from {processed} metric files.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
118
scripts/plot_metrics.py
Normal file
118
scripts/plot_metrics.py
Normal file
@ -0,0 +1,118 @@
|
||||
# python
|
||||
"""
|
||||
plot_metrics.py
|
||||
|
||||
Usage examples:
|
||||
python plot_metrics.py --csv metrics_combined.csv
|
||||
python plot_metrics.py --csv metrics_combined.csv --time-col tick --out myplot.png
|
||||
python plot_metrics.py --csv metrics_combined.csv --cols entity_counts_cells,entity_counts_food
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
COMMON_TIME_COLS = ["tick", "time", "step", "tick_number", "t"]
|
||||
DEFAULT_PLOT_COLS = ["entity_counts_cells", "entity_counts_food"]
|
||||
|
||||
|
||||
def find_column(df: pd.DataFrame, candidates):
|
||||
# return the first matching column name from candidates (case-insensitive, substring match)
|
||||
cols = {c.lower(): c for c in df.columns}
|
||||
for cand in candidates:
|
||||
cand_l = cand.lower()
|
||||
# exact match
|
||||
if cand_l in cols:
|
||||
return cols[cand_l]
|
||||
# substring match
|
||||
for k, orig in cols.items():
|
||||
if cand_l in k:
|
||||
return orig
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser(description="Plot entity counts over time from a metrics CSV")
|
||||
p.add_argument("--csv", "-c", type=str, default="metrics_combined.csv", help="Path to CSV file")
|
||||
p.add_argument("--time-col", "-t", type=str, default=None, help="Name of the time column (optional)")
|
||||
p.add_argument("--cols", type=str, default=None, help="Comma-separated column names to plot (default: entity_counts_cells,entity_counts_food)")
|
||||
p.add_argument("--out", "-o", type=str, default="metrics_counts_plot.png", help="Output image path")
|
||||
args = p.parse_args()
|
||||
|
||||
csv_path = Path(args.csv)
|
||||
if not csv_path.exists():
|
||||
print(f"CSV not found: {csv_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
df = pd.read_csv(csv_path)
|
||||
|
||||
# detect time column
|
||||
time_col = None
|
||||
if args.time_col:
|
||||
if args.time_col in df.columns:
|
||||
time_col = args.time_col
|
||||
else:
|
||||
print(f"Specified time column `{args.time_col}` not found in CSV columns.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
time_col = find_column(df, COMMON_TIME_COLS)
|
||||
if time_col is None:
|
||||
print("Could not auto-detect a time column. Provide one with `--time-col`.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# determine plot columns
|
||||
if args.cols:
|
||||
cols = [c.strip() for c in args.cols.split(",") if c.strip()]
|
||||
missing = [c for c in cols if c not in df.columns]
|
||||
if missing:
|
||||
print(f"Columns not found in CSV: {missing}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
cols = []
|
||||
for want in DEFAULT_PLOT_COLS:
|
||||
found = find_column(df, [want])
|
||||
if found:
|
||||
cols.append(found)
|
||||
if not cols:
|
||||
print(f"Could not find default columns `{DEFAULT_PLOT_COLS}`. Provide `--cols` explicitly.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# prepare data
|
||||
df = df[[time_col] + cols].copy()
|
||||
df[time_col] = pd.to_numeric(df[time_col], errors="coerce")
|
||||
for c in cols:
|
||||
df[c] = pd.to_numeric(df[c], errors="coerce")
|
||||
df = df.dropna(subset=[time_col])
|
||||
if df.empty:
|
||||
print("No numeric time values found after cleaning.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
df = df.sort_values(by=time_col)
|
||||
|
||||
# plot
|
||||
plt.figure(figsize=(10, 5))
|
||||
for c in cols:
|
||||
plt.plot(df[time_col], df[c], label=c, linewidth=2)
|
||||
plt.xlabel(time_col)
|
||||
plt.ylabel("Count")
|
||||
plt.title("Entity counts over time")
|
||||
plt.grid(True, linestyle="--", alpha=0.4)
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
|
||||
out_path = Path(args.out)
|
||||
plt.savefig(out_path, dpi=150)
|
||||
print(f"Wrote plot to `{out_path}`")
|
||||
# also show interactively if running in an environment with a display
|
||||
try:
|
||||
plt.show()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
x
Reference in New Issue
Block a user