107 lines
3.5 KiB
Python
107 lines
3.5 KiB
Python
# python
|
|
"""
|
|
convert_metrics.py
|
|
|
|
Usage examples:
|
|
python convert_metrics.py --input-dir output_test
|
|
python convert_metrics.py --input-dir output_test --out combined_metrics.csv --per-file
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List
|
|
import pandas as pd
|
|
|
|
|
|
def flatten(d: Dict[str, Any], parent_key: str = "", sep: str = "_") -> Dict[str, Any]:
|
|
"""Flatten nested dict into single-level dict with keys joined by `sep`."""
|
|
items: Dict[str, Any] = {}
|
|
for k, v in d.items():
|
|
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
|
if isinstance(v, dict):
|
|
items.update(flatten(v, new_key, sep=sep))
|
|
else:
|
|
items[new_key] = v
|
|
return items
|
|
|
|
|
|
def is_metrics_file(path: Path) -> bool:
|
|
"""Quick heuristic: filename contains 'metrics' or JSON has collection_type == metrics."""
|
|
if "metrics" in path.name.lower():
|
|
return True
|
|
try:
|
|
data = json.loads(path.read_text())
|
|
if isinstance(data, dict) and data.get("collection_type") == "metrics":
|
|
return True
|
|
if isinstance(data, list) and any(isinstance(item, dict) and item.get("collection_type") == "metrics" for item in data):
|
|
return True
|
|
except Exception:
|
|
pass
|
|
return False
|
|
|
|
|
|
def load_metric_records(path: Path) -> List[Dict[str, Any]]:
|
|
"""Load JSON file and return list of metric records (handles single dict or list)."""
|
|
text = path.read_text()
|
|
data = json.loads(text)
|
|
if isinstance(data, list):
|
|
return data
|
|
if isinstance(data, dict):
|
|
return [data]
|
|
raise ValueError(f"Unsupported JSON structure in {path}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Convert metric JSON files to CSV")
|
|
parser.add_argument("--input-dir", "-i", type=str, default="output_test", help="Directory to search for metric files")
|
|
parser.add_argument("--out", "-o", type=str, default="metrics_combined.csv", help="Output CSV path for combined metrics")
|
|
parser.add_argument("--recursive", "-r", action="store_true", help="Search recursively")
|
|
parser.add_argument("--per-file", action="store_true", help="Also write one CSV per metric file (same folder)")
|
|
args = parser.parse_args()
|
|
|
|
base = Path(args.input_dir)
|
|
if not base.exists():
|
|
raise SystemExit(f"Input directory does not exist: {base}")
|
|
|
|
pattern = "**/*.json" if args.recursive else "*.json"
|
|
json_files = sorted(base.glob(pattern))
|
|
|
|
rows: List[Dict[str, Any]] = []
|
|
processed = 0
|
|
for p in json_files:
|
|
if not p.is_file():
|
|
continue
|
|
if not is_metrics_file(p):
|
|
continue
|
|
try:
|
|
records = load_metric_records(p)
|
|
except Exception as e:
|
|
print(f"Skipping {p} (failed to parse): {e}")
|
|
continue
|
|
|
|
per_file_rows: List[Dict[str, Any]] = []
|
|
for rec in records:
|
|
flat = flatten(rec)
|
|
# add provenance
|
|
flat["_source_file"] = str(p)
|
|
per_file_rows.append(flat)
|
|
rows.append(flat)
|
|
|
|
if args.per_file and per_file_rows:
|
|
df_pf = pd.DataFrame(per_file_rows)
|
|
out_path = p.with_suffix(".csv")
|
|
df_pf.to_csv(out_path, index=False)
|
|
processed += 1
|
|
|
|
if not rows:
|
|
print("No metric files found.")
|
|
return
|
|
|
|
df = pd.DataFrame(rows)
|
|
df.to_csv(args.out, index=False)
|
|
print(f"Wrote combined CSV to {args.out} ({len(df)} rows) from {processed} metric files.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |