92 lines
2.6 KiB
Python
92 lines
2.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Score AWOOOI OpenClaw replacement candidate replay JSONL.
|
|
|
|
Usage:
|
|
python scripts/ai-agent-replay-scorecard.py \
|
|
--input /tmp/openclaw-incumbent.jsonl \
|
|
--input /tmp/langgraph-candidate.jsonl \
|
|
--output /tmp/agent-replay-report.json
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
API_SRC = ROOT / "apps" / "api"
|
|
sys.path.insert(0, str(API_SRC))
|
|
|
|
from src.services.agent_replacement_evaluator import ( # noqa: E402
|
|
BASELINE_CANDIDATE_ID,
|
|
MIN_INCIDENTS_FOR_CANARY,
|
|
AgentReplayRecord,
|
|
score_replay_records,
|
|
)
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(
|
|
description="Score OpenClaw replacement candidate replay records."
|
|
)
|
|
parser.add_argument(
|
|
"--input",
|
|
required=True,
|
|
action="append",
|
|
help="Replay JSONL path. Repeat to merge baseline and candidate outputs.",
|
|
)
|
|
parser.add_argument("--output", help="Report JSON path")
|
|
parser.add_argument(
|
|
"--baseline",
|
|
default=BASELINE_CANDIDATE_ID,
|
|
help=f"Baseline candidate id (default: {BASELINE_CANDIDATE_ID})",
|
|
)
|
|
parser.add_argument(
|
|
"--min-incidents",
|
|
type=int,
|
|
default=MIN_INCIDENTS_FOR_CANARY,
|
|
help=f"Minimum incidents required for canary (default: {MIN_INCIDENTS_FOR_CANARY})",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
records: list[AgentReplayRecord] = []
|
|
for input_path in args.input:
|
|
records.extend(_read_jsonl(Path(input_path)))
|
|
report = score_replay_records(
|
|
records,
|
|
baseline_candidate_id=args.baseline,
|
|
min_incidents_for_canary=args.min_incidents,
|
|
).to_dict()
|
|
payload = json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True)
|
|
|
|
if args.output:
|
|
Path(args.output).write_text(payload + "\n", encoding="utf-8")
|
|
else:
|
|
print(payload)
|
|
|
|
return 0
|
|
|
|
|
|
def _read_jsonl(path: Path) -> list[AgentReplayRecord]:
|
|
records: list[AgentReplayRecord] = []
|
|
with path.open(encoding="utf-8") as handle:
|
|
for line_number, line in enumerate(handle, start=1):
|
|
line = line.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
try:
|
|
payload: dict[str, Any] = json.loads(line)
|
|
records.append(AgentReplayRecord.from_dict(payload))
|
|
except Exception as exc:
|
|
raise SystemExit(f"{path}:{line_number}: invalid replay record: {exc}") from exc
|
|
return records
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|