DynamicAbstractionSystem/scripts/convert_metrics.py

# python
"""
convert_metrics.py

Usage examples:
  python convert_metrics.py --input-dir output_test
  python convert_metrics.py --input-dir output_test --out combined_metrics.csv --per-file
"""

import argparse
import json
from pathlib import Path
from typing import Any, Dict, List
import pandas as pd


def flatten(d: Dict[str, Any], parent_key: str = "", sep: str = "_") -> Dict[str, Any]:
    """Flatten nested dict into single-level dict with keys joined by `sep`."""
    items: Dict[str, Any] = {}
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.update(flatten(v, new_key, sep=sep))
        else:
            items[new_key] = v
    return items


def is_metrics_file(path: Path) -> bool:
    """Quick heuristic: filename contains 'metrics' or JSON has collection_type == metrics."""
    if "metrics" in path.name.lower():
        return True
    try:
        data = json.loads(path.read_text())
        if isinstance(data, dict) and data.get("collection_type") == "metrics":
            return True
        if isinstance(data, list) and any(isinstance(item, dict) and item.get("collection_type") == "metrics" for item in data):
            return True
    except Exception:
        pass
    return False


def load_metric_records(path: Path) -> List[Dict[str, Any]]:
    """Load JSON file and return list of metric records (handles single dict or list)."""
    text = path.read_text()
    data = json.loads(text)
    if isinstance(data, list):
        return data
    if isinstance(data, dict):
        return [data]
    raise ValueError(f"Unsupported JSON structure in {path}")


def main():
    parser = argparse.ArgumentParser(description="Convert metric JSON files to CSV")
    parser.add_argument("--input-dir", "-i", type=str, default="output_test", help="Directory to search for metric files")
    parser.add_argument("--out", "-o", type=str, default="metrics_combined.csv", help="Output CSV path for combined metrics")
    parser.add_argument("--recursive", "-r", action="store_true", help="Search recursively")
    parser.add_argument("--per-file", action="store_true", help="Also write one CSV per metric file (same folder)")
    args = parser.parse_args()

    base = Path(args.input_dir)
    if not base.exists():
        raise SystemExit(f"Input directory does not exist: {base}")

    pattern = "**/*.json" if args.recursive else "*.json"
    json_files = sorted(base.glob(pattern))

    rows: List[Dict[str, Any]] = []
    processed = 0
    for p in json_files:
        if not p.is_file():
            continue
        if not is_metrics_file(p):
            continue
        try:
            records = load_metric_records(p)
        except Exception as e:
            print(f"Skipping {p} (failed to parse): {e}")
            continue

        per_file_rows: List[Dict[str, Any]] = []
        for rec in records:
            flat = flatten(rec)
            # add provenance
            flat["_source_file"] = str(p)
            per_file_rows.append(flat)
            rows.append(flat)

        if args.per_file and per_file_rows:
            df_pf = pd.DataFrame(per_file_rows)
            out_path = p.with_suffix(".csv")
            df_pf.to_csv(out_path, index=False)
        processed += 1

    if not rows:
        print("No metric files found.")
        return

    df = pd.DataFrame(rows)
    df.to_csv(args.out, index=False)
    print(f"Wrote combined CSV to {args.out} ({len(df)} rows) from {processed} metric files.")


if __name__ == "__main__":
    main()