awoooi/scripts/ai-agent-replay-scorecard.py

#!/usr/bin/env python3
"""
Score AWOOOI OpenClaw replacement candidate replay JSONL.

Usage:
  python scripts/ai-agent-replay-scorecard.py \
    --input /tmp/openclaw-incumbent.jsonl \
    --input /tmp/langgraph-candidate.jsonl \
    --output /tmp/agent-replay-report.json
"""

from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path
from typing import Any


ROOT = Path(__file__).resolve().parents[1]
API_SRC = ROOT / "apps" / "api"
sys.path.insert(0, str(API_SRC))

from src.services.agent_replacement_evaluator import (  # noqa: E402
    BASELINE_CANDIDATE_ID,
    MIN_INCIDENTS_FOR_CANARY,
    AgentReplayRecord,
    score_replay_records,
)


def main() -> int:
    parser = argparse.ArgumentParser(
        description="Score OpenClaw replacement candidate replay records."
    )
    parser.add_argument(
        "--input",
        required=True,
        action="append",
        help="Replay JSONL path. Repeat to merge baseline and candidate outputs.",
    )
    parser.add_argument("--output", help="Report JSON path")
    parser.add_argument(
        "--baseline",
        default=BASELINE_CANDIDATE_ID,
        help=f"Baseline candidate id (default: {BASELINE_CANDIDATE_ID})",
    )
    parser.add_argument(
        "--min-incidents",
        type=int,
        default=MIN_INCIDENTS_FOR_CANARY,
        help=f"Minimum incidents required for canary (default: {MIN_INCIDENTS_FOR_CANARY})",
    )
    args = parser.parse_args()

    records: list[AgentReplayRecord] = []
    for input_path in args.input:
        records.extend(_read_jsonl(Path(input_path)))
    report = score_replay_records(
        records,
        baseline_candidate_id=args.baseline,
        min_incidents_for_canary=args.min_incidents,
    ).to_dict()
    payload = json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True)

    if args.output:
        Path(args.output).write_text(payload + "\n", encoding="utf-8")
    else:
        print(payload)

    return 0


def _read_jsonl(path: Path) -> list[AgentReplayRecord]:
    records: list[AgentReplayRecord] = []
    with path.open(encoding="utf-8") as handle:
        for line_number, line in enumerate(handle, start=1):
            line = line.strip()
            if not line or line.startswith("#"):
                continue
            try:
                payload: dict[str, Any] = json.loads(line)
                records.append(AgentReplayRecord.from_dict(payload))
            except Exception as exc:
                raise SystemExit(f"{path}:{line_number}: invalid replay record: {exc}") from exc
    return records


if __name__ == "__main__":
    raise SystemExit(main())