#!/usr/bin/env python3 """ Prepare candidate-visible Agent replay inputs from AWOOOI fixtures. This script strips evaluation_labels before any candidate adapter sees the data. """ from __future__ import annotations import argparse import json import sys from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[2] API_SRC = ROOT / "apps" / "api" sys.path.insert(0, str(API_SRC)) from src.services.agent_replay_input import ( # noqa: E402 assert_no_evaluation_label_leak, build_candidate_input_from_fixture, ) def main() -> int: parser = argparse.ArgumentParser( description="Strip fixture labels and prepare candidate-visible replay input JSONL." ) parser.add_argument("--fixtures", required=True, help="agent_replay_fixture_v1 JSONL") parser.add_argument("--output", required=True, help="candidate input JSONL") args = parser.parse_args() candidate_inputs = [] for fixture in _read_jsonl(Path(args.fixtures)): candidate_input = build_candidate_input_from_fixture(fixture).to_dict() assert_no_evaluation_label_leak(candidate_input) candidate_inputs.append(candidate_input) with Path(args.output).open("w", encoding="utf-8") as handle: for candidate_input in candidate_inputs: handle.write(json.dumps(candidate_input, ensure_ascii=False, sort_keys=True)) handle.write("\n") print( json.dumps( { "fixtures": args.fixtures, "output": args.output, "records": len(candidate_inputs), }, ensure_ascii=False, sort_keys=True, ) ) return 0 def _read_jsonl(path: Path) -> list[dict[str, Any]]: records: list[dict[str, Any]] = [] with path.open(encoding="utf-8") as handle: for line_number, line in enumerate(handle, start=1): line = line.strip() if not line or line.startswith("#"): continue try: records.append(json.loads(line)) except Exception as exc: raise SystemExit(f"{path}:{line_number}: invalid JSONL: {exc}") from exc return records if __name__ == "__main__": raise SystemExit(main())