#!/usr/bin/env python3 """ Score AWOOOI OpenClaw replacement candidate replay JSONL. Usage: python scripts/ai-agent-replay-scorecard.py \ --input /tmp/openclaw-incumbent.jsonl \ --input /tmp/langgraph-candidate.jsonl \ --output /tmp/agent-replay-report.json """ from __future__ import annotations import argparse import json import sys from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[1] API_SRC = ROOT / "apps" / "api" sys.path.insert(0, str(API_SRC)) from src.services.agent_replacement_evaluator import ( # noqa: E402 BASELINE_CANDIDATE_ID, MIN_INCIDENTS_FOR_CANARY, AgentReplayRecord, score_replay_records, ) def main() -> int: parser = argparse.ArgumentParser( description="Score OpenClaw replacement candidate replay records." ) parser.add_argument( "--input", required=True, action="append", help="Replay JSONL path. Repeat to merge baseline and candidate outputs.", ) parser.add_argument("--output", help="Report JSON path") parser.add_argument( "--baseline", default=BASELINE_CANDIDATE_ID, help=f"Baseline candidate id (default: {BASELINE_CANDIDATE_ID})", ) parser.add_argument( "--min-incidents", type=int, default=MIN_INCIDENTS_FOR_CANARY, help=f"Minimum incidents required for canary (default: {MIN_INCIDENTS_FOR_CANARY})", ) args = parser.parse_args() records: list[AgentReplayRecord] = [] for input_path in args.input: records.extend(_read_jsonl(Path(input_path))) report = score_replay_records( records, baseline_candidate_id=args.baseline, min_incidents_for_canary=args.min_incidents, ).to_dict() payload = json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True) if args.output: Path(args.output).write_text(payload + "\n", encoding="utf-8") else: print(payload) return 0 def _read_jsonl(path: Path) -> list[AgentReplayRecord]: records: list[AgentReplayRecord] = [] with path.open(encoding="utf-8") as handle: for line_number, line in enumerate(handle, start=1): line = line.strip() if not line or line.startswith("#"): continue try: payload: dict[str, Any] = json.loads(line) records.append(AgentReplayRecord.from_dict(payload)) except Exception as exc: raise SystemExit(f"{path}:{line_number}: invalid replay record: {exc}") from exc return records if __name__ == "__main__": raise SystemExit(main())