awoooi/scripts/export-openclaw-incumbent-replay.py

#!/usr/bin/env python3
"""
Export OpenClaw incumbent replay JSONL from existing AWOOOI audit tables.

This script is read-only: it queries agent_sessions, auto_repair_executions, and
incident_evidence, then writes candidate_id=openclaw_incumbent records that can
be scored by scripts/ai-agent-replay-scorecard.py.
"""

from __future__ import annotations

import argparse
import asyncio
import json
import sys
from datetime import timedelta
from pathlib import Path

from sqlalchemy import and_, func, select


ROOT = Path(__file__).resolve().parents[1]
API_SRC = ROOT / "apps" / "api"
sys.path.insert(0, str(API_SRC))

from src.db.base import get_db_context  # noqa: E402
from src.db.models import AgentSession, AutoRepairExecution, IncidentEvidence  # noqa: E402
from src.services.agent_replacement_evaluator import (  # noqa: E402
    build_openclaw_incumbent_record,
)
from src.utils.timezone import now_taipei  # noqa: E402


async def main_async() -> int:
    parser = argparse.ArgumentParser(
        description="Export OpenClaw incumbent replay JSONL from DB."
    )
    parser.add_argument("--output", required=True, help="Output JSONL path")
    parser.add_argument("--limit", type=int, default=100, help="Max incidents")
    parser.add_argument("--days", type=int, default=30, help="Lookback days")
    parser.add_argument(
        "--run-id",
        default=f"openclaw-incumbent-{now_taipei().strftime('%Y%m%d%H%M%S')}",
        help="Replay run id",
    )
    args = parser.parse_args()

    cutoff = now_taipei() - timedelta(days=args.days)
    records = []
    try:
        async with get_db_context() as db:
            incident_ids = await _incident_ids(db, cutoff=cutoff, limit=args.limit)
            for incident_id in incident_ids:
                coordinator = await _latest_coordinator(db, incident_id)
                if coordinator is None:
                    continue
                execution = await _latest_execution(db, incident_id)
                evidence = await _latest_evidence(db, incident_id)
                turn_count = await _agent_turn_count(db, incident_id)
                records.append(
                    build_openclaw_incumbent_record(
                        run_id=args.run_id,
                        incident_id=incident_id,
                        coordinator_output=coordinator.output_json,
                        execution_success=(
                            execution.success if execution is not None else None
                        ),
                        verification_result=(
                            evidence.verification_result if evidence is not None else None
                        ),
                        audit_trace_complete=turn_count >= 3,
                        latency_ms=float(coordinator.latency_ms or 0),
                        coordinator_degraded=bool(coordinator.degraded),
                    )
                )
    except Exception as exc:
        print(
            json.dumps(
                {
                    "error": "openclaw_incumbent_export_failed",
                    "detail": str(exc),
                    "output": args.output,
                    "run_id": args.run_id,
                },
                ensure_ascii=False,
                sort_keys=True,
            )
        )
        return 2

    output = Path(args.output)
    with output.open("w", encoding="utf-8") as handle:
        for record in records:
            handle.write(json.dumps(record.__dict__, ensure_ascii=False, sort_keys=True))
            handle.write("\n")

    print(
        json.dumps(
            {
                "output": str(output),
                "records": len(records),
                "run_id": args.run_id,
            },
            ensure_ascii=False,
            sort_keys=True,
        )
    )
    return 0


async def _incident_ids(db, *, cutoff, limit: int) -> list[str]:
    stmt = (
        select(AgentSession.incident_id)
        .where(
            and_(
                AgentSession.agent_role == "coordinator",
                AgentSession.created_at >= cutoff,
            )
        )
        .distinct()
        .order_by(AgentSession.incident_id.desc())
        .limit(limit)
    )
    result = await db.execute(stmt)
    return [str(row[0]) for row in result.all()]


async def _latest_coordinator(db, incident_id: str):
    stmt = (
        select(AgentSession)
        .where(
            and_(
                AgentSession.incident_id == incident_id,
                AgentSession.agent_role == "coordinator",
            )
        )
        .order_by(AgentSession.created_at.desc())
        .limit(1)
    )
    result = await db.execute(stmt)
    return result.scalar_one_or_none()


async def _latest_execution(db, incident_id: str):
    stmt = (
        select(AutoRepairExecution)
        .where(AutoRepairExecution.incident_id == incident_id)
        .order_by(AutoRepairExecution.created_at.desc())
        .limit(1)
    )
    result = await db.execute(stmt)
    return result.scalar_one_or_none()


async def _latest_evidence(db, incident_id: str):
    stmt = (
        select(IncidentEvidence)
        .where(IncidentEvidence.incident_id == incident_id)
        .order_by(IncidentEvidence.collected_at.desc())
        .limit(1)
    )
    result = await db.execute(stmt)
    return result.scalar_one_or_none()


async def _agent_turn_count(db, incident_id: str) -> int:
    stmt = select(func.count()).select_from(AgentSession).where(
        AgentSession.incident_id == incident_id
    )
    result = await db.execute(stmt)
    return int(result.scalar() or 0)


def main() -> int:
    return asyncio.run(main_async())


if __name__ == "__main__":
    raise SystemExit(main())