#!/usr/bin/env python3 """ Export sanitized AWOOOI incident replay fixtures for candidate Agents. This script is read-only. It exports agent_replay_fixture_v1 JSONL, separating candidate-visible incident_context from evaluation_labels. """ from __future__ import annotations import argparse import asyncio import json import sys from datetime import timedelta from pathlib import Path from sqlalchemy import and_, func, select ROOT = Path(__file__).resolve().parents[1] API_SRC = ROOT / "apps" / "api" sys.path.insert(0, str(API_SRC)) from src.db.base import get_db_context # noqa: E402 from src.db.models import ( # noqa: E402 AgentSession, AutoRepairExecution, IncidentEvidence, IncidentRecord, ) from src.services.agent_replay_fixture import build_agent_replay_fixture # noqa: E402 from src.utils.timezone import now_taipei # noqa: E402 async def main_async() -> int: parser = argparse.ArgumentParser( description="Export sanitized incident fixtures for Agent replacement replay." ) parser.add_argument("--output", required=True, help="Output JSONL path") parser.add_argument("--limit", type=int, default=100, help="Max incidents") parser.add_argument("--days", type=int, default=30, help="Lookback days") parser.add_argument( "--run-id", default=f"agent-fixtures-{now_taipei().strftime('%Y%m%d%H%M%S')}", help="Replay fixture run id", ) args = parser.parse_args() cutoff = now_taipei() - timedelta(days=args.days) fixtures = [] try: async with get_db_context() as db: incident_ids = await _incident_ids(db, cutoff=cutoff, limit=args.limit) for incident_id in incident_ids: incident = await _incident(db, incident_id) if incident is None: continue evidence = await _latest_evidence(db, incident_id) execution = await _latest_execution(db, incident_id) turn_count = await _agent_turn_count(db, incident_id) fixtures.append( build_agent_replay_fixture( run_id=args.run_id, incident=incident, evidence=evidence, execution=execution, agent_turn_count=turn_count, ) ) except Exception as exc: print( json.dumps( { "error": "agent_replay_fixture_export_failed", "detail": str(exc), "output": args.output, "run_id": args.run_id, }, ensure_ascii=False, sort_keys=True, ) ) return 2 output = Path(args.output) with output.open("w", encoding="utf-8") as handle: for fixture in fixtures: handle.write(json.dumps(fixture.to_dict(), ensure_ascii=False, sort_keys=True)) handle.write("\n") print( json.dumps( { "output": str(output), "records": len(fixtures), "run_id": args.run_id, }, ensure_ascii=False, sort_keys=True, ) ) return 0 async def _incident_ids(db, *, cutoff, limit: int) -> list[str]: stmt = ( select(AgentSession.incident_id) .where( and_( AgentSession.agent_role == "coordinator", AgentSession.created_at >= cutoff, ) ) .distinct() .order_by(AgentSession.incident_id.desc()) .limit(limit) ) result = await db.execute(stmt) return [str(row[0]) for row in result.all()] async def _incident(db, incident_id: str): result = await db.execute( select(IncidentRecord).where(IncidentRecord.incident_id == incident_id) ) return result.scalar_one_or_none() async def _latest_evidence(db, incident_id: str): stmt = ( select(IncidentEvidence) .where(IncidentEvidence.incident_id == incident_id) .order_by(IncidentEvidence.collected_at.desc()) .limit(1) ) result = await db.execute(stmt) return result.scalar_one_or_none() async def _latest_execution(db, incident_id: str): stmt = ( select(AutoRepairExecution) .where(AutoRepairExecution.incident_id == incident_id) .order_by(AutoRepairExecution.created_at.desc()) .limit(1) ) result = await db.execute(stmt) return result.scalar_one_or_none() async def _agent_turn_count(db, incident_id: str) -> int: stmt = select(func.count()).select_from(AgentSession).where( AgentSession.incident_id == incident_id ) result = await db.execute(stmt) return int(result.scalar() or 0) def main() -> int: return asyncio.run(main_async()) if __name__ == "__main__": raise SystemExit(main())