88 lines
2.8 KiB
Python
88 lines
2.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Evaluate whether a candidate replay result may move to shadow/canary.
|
|
|
|
This CLI is intentionally read-only. It rejects contract probes and other
|
|
not-replacement-evidence outputs even when they satisfy the JSON contract.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
ROOT = Path(__file__).resolve().parents[2]
|
|
API_SRC = ROOT / "apps" / "api"
|
|
sys.path.insert(0, str(API_SRC))
|
|
|
|
from src.services.agent_replay_promotion_gate import ( # noqa: E402
|
|
evaluate_agent_replay_promotion_gate,
|
|
)
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(
|
|
description="Evaluate the AWOOOI Agent replay promotion gate."
|
|
)
|
|
parser.add_argument("--candidate-id", required=True, help="candidate_id to gate")
|
|
parser.add_argument("--scorecard", required=True, help="scorecard report JSON")
|
|
parser.add_argument("--contract-report", required=True, help="contract report JSON")
|
|
parser.add_argument("--raw-results", required=True, help="candidate raw result JSONL")
|
|
parser.add_argument(
|
|
"--import-report",
|
|
help="optional external-result import report JSON; required for NeMo/Nemotron",
|
|
)
|
|
parser.add_argument(
|
|
"--target-stage",
|
|
default="shadow",
|
|
choices=("shadow", "canary"),
|
|
help="target promotion stage",
|
|
)
|
|
parser.add_argument("--output", help="promotion gate report JSON")
|
|
args = parser.parse_args()
|
|
|
|
report = evaluate_agent_replay_promotion_gate(
|
|
candidate_id=args.candidate_id,
|
|
scorecard_report=_read_json(Path(args.scorecard)),
|
|
contract_report=_read_json(Path(args.contract_report)),
|
|
raw_results=_read_jsonl(Path(args.raw_results)),
|
|
import_report=_read_json(Path(args.import_report))
|
|
if args.import_report
|
|
else None,
|
|
target_stage=args.target_stage,
|
|
).to_dict()
|
|
payload = json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True)
|
|
if args.output:
|
|
Path(args.output).write_text(payload + "\n", encoding="utf-8")
|
|
else:
|
|
print(payload)
|
|
|
|
return 0 if report["approved"] else 2
|
|
|
|
|
|
def _read_json(path: Path) -> dict[str, Any]:
|
|
with path.open(encoding="utf-8") as handle:
|
|
return json.load(handle)
|
|
|
|
|
|
def _read_jsonl(path: Path) -> list[dict[str, Any]]:
|
|
records: list[dict[str, Any]] = []
|
|
with path.open(encoding="utf-8") as handle:
|
|
for line_number, line in enumerate(handle, start=1):
|
|
line = line.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
try:
|
|
records.append(json.loads(line))
|
|
except Exception as exc:
|
|
raise SystemExit(f"{path}:{line_number}: invalid JSONL: {exc}") from exc
|
|
return records
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|