#!/usr/bin/env python3 """ Apply AWOOOI fixture labels to normalized candidate replay JSONL. This is a local evaluator step. It does not call candidate agents or execute tools, and it ignores any candidate-supplied self-grading fields. """ from __future__ import annotations import argparse import json import sys from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[2] API_SRC = ROOT / "apps" / "api" sys.path.insert(0, str(API_SRC)) from src.services.agent_replay_label_grader import ( # noqa: E402 grade_replay_records_with_fixtures, ) from src.services.agent_replacement_evaluator import AgentReplayRecord # noqa: E402 def main() -> int: parser = argparse.ArgumentParser( description="Grade normalized candidate replay records with fixture labels." ) parser.add_argument("--fixtures", required=True, help="agent_replay_fixture_v1 JSONL") parser.add_argument("--input", required=True, help="normalized replay JSONL") parser.add_argument("--output", required=True, help="graded replay JSONL") parser.add_argument("--report", help="grading report JSON") args = parser.parse_args() graded_records, report = grade_replay_records_with_fixtures( fixtures=_read_jsonl(Path(args.fixtures)), replay_records=_read_replay_jsonl(Path(args.input)), ) _write_replay_jsonl(Path(args.output), graded_records) report_payload = report.to_dict() if args.report: Path(args.report).write_text( json.dumps(report_payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8", ) print(json.dumps(report_payload, ensure_ascii=False, sort_keys=True)) return 0 def _read_jsonl(path: Path) -> list[dict[str, Any]]: records: list[dict[str, Any]] = [] with path.open(encoding="utf-8") as handle: for line_number, line in enumerate(handle, start=1): line = line.strip() if not line or line.startswith("#"): continue try: records.append(json.loads(line)) except Exception as exc: raise SystemExit(f"{path}:{line_number}: invalid JSONL: {exc}") from exc return records def _read_replay_jsonl(path: Path) -> list[AgentReplayRecord]: return [AgentReplayRecord.from_dict(payload) for payload in _read_jsonl(path)] def _write_replay_jsonl(path: Path, records: list[AgentReplayRecord]) -> None: with path.open("w", encoding="utf-8") as handle: for record in records: handle.write(json.dumps(record.__dict__, ensure_ascii=False, sort_keys=True)) handle.write("\n") if __name__ == "__main__": raise SystemExit(main())