# python """ convert_metrics.py Usage examples: python convert_metrics.py --input-dir output_test python convert_metrics.py --input-dir output_test --out combined_metrics.csv --per-file """ import argparse import json from pathlib import Path from typing import Any, Dict, List import pandas as pd def flatten(d: Dict[str, Any], parent_key: str = "", sep: str = "_") -> Dict[str, Any]: """Flatten nested dict into single-level dict with keys joined by `sep`.""" items: Dict[str, Any] = {} for k, v in d.items(): new_key = f"{parent_key}{sep}{k}" if parent_key else k if isinstance(v, dict): items.update(flatten(v, new_key, sep=sep)) else: items[new_key] = v return items def is_metrics_file(path: Path) -> bool: """Quick heuristic: filename contains 'metrics' or JSON has collection_type == metrics.""" if "metrics" in path.name.lower(): return True try: data = json.loads(path.read_text()) if isinstance(data, dict) and data.get("collection_type") == "metrics": return True if isinstance(data, list) and any(isinstance(item, dict) and item.get("collection_type") == "metrics" for item in data): return True except Exception: pass return False def load_metric_records(path: Path) -> List[Dict[str, Any]]: """Load JSON file and return list of metric records (handles single dict or list).""" text = path.read_text() data = json.loads(text) if isinstance(data, list): return data if isinstance(data, dict): return [data] raise ValueError(f"Unsupported JSON structure in {path}") def main(): parser = argparse.ArgumentParser(description="Convert metric JSON files to CSV") parser.add_argument("--input-dir", "-i", type=str, default="output_test", help="Directory to search for metric files") parser.add_argument("--out", "-o", type=str, default="metrics_combined.csv", help="Output CSV path for combined metrics") parser.add_argument("--recursive", "-r", action="store_true", help="Search recursively") parser.add_argument("--per-file", action="store_true", help="Also write one CSV per metric file (same folder)") args = parser.parse_args() base = Path(args.input_dir) if not base.exists(): raise SystemExit(f"Input directory does not exist: {base}") pattern = "**/*.json" if args.recursive else "*.json" json_files = sorted(base.glob(pattern)) rows: List[Dict[str, Any]] = [] processed = 0 for p in json_files: if not p.is_file(): continue if not is_metrics_file(p): continue try: records = load_metric_records(p) except Exception as e: print(f"Skipping {p} (failed to parse): {e}") continue per_file_rows: List[Dict[str, Any]] = [] for rec in records: flat = flatten(rec) # add provenance flat["_source_file"] = str(p) per_file_rows.append(flat) rows.append(flat) if args.per_file and per_file_rows: df_pf = pd.DataFrame(per_file_rows) out_path = p.with_suffix(".csv") df_pf.to_csv(out_path, index=False) processed += 1 if not rows: print("No metric files found.") return df = pd.DataFrame(rows) df.to_csv(args.out, index=False) print(f"Wrote combined CSV to {args.out} ({len(df)} rows) from {processed} metric files.") if __name__ == "__main__": main()