136 lines
4.9 KiB
Python
136 lines
4.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Finalize an externally executed NeMo/Nemotron replay batch.
|
|
|
|
This command is local and deterministic. It does not call NIM, NVIDIA APIs,
|
|
production tools, or LLMs. It consumes external JSONL that already exists, then
|
|
runs import -> contract -> normalize -> grade -> score -> promotion gate.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
ROOT = Path(__file__).resolve().parents[2]
|
|
API_SRC = ROOT / "apps" / "api"
|
|
sys.path.insert(0, str(API_SRC))
|
|
|
|
from src.services.agent_nemotron_replay_finalizer import ( # noqa: E402
|
|
NemotronReplayFinalizerOutputs,
|
|
finalize_nemotron_replay,
|
|
)
|
|
from src.services.agent_replacement_evaluator import ( # noqa: E402
|
|
AgentReplayRecord,
|
|
)
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(
|
|
description="Finalize NeMo/Nemotron external replay results."
|
|
)
|
|
parser.add_argument("--requests", required=True, help="NeMo request JSONL")
|
|
parser.add_argument(
|
|
"--external-results",
|
|
required=True,
|
|
help="agent_nemotron_external_result_v1 JSONL",
|
|
)
|
|
parser.add_argument("--inputs", required=True, help="candidate input JSONL")
|
|
parser.add_argument("--fixtures", required=True, help="internal fixture JSONL")
|
|
parser.add_argument("--baseline", required=True, help="OpenClaw baseline JSONL")
|
|
parser.add_argument("--output-prefix", required=True, help="output path prefix")
|
|
parser.add_argument(
|
|
"--target-stage",
|
|
default="shadow",
|
|
choices=("shadow", "canary"),
|
|
help="target promotion stage",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
outputs = NemotronReplayFinalizerOutputs.from_prefix(Path(args.output_prefix))
|
|
summary, artifacts = finalize_nemotron_replay(
|
|
requests=_read_jsonl(Path(args.requests)),
|
|
external_results=_read_jsonl(Path(args.external_results)),
|
|
candidate_inputs=_read_jsonl(Path(args.inputs)),
|
|
fixtures=_read_jsonl(Path(args.fixtures)),
|
|
baseline_records=_read_replay_jsonl(Path(args.baseline)),
|
|
target_stage=args.target_stage,
|
|
)
|
|
summary["inputs"] = {
|
|
"requests": args.requests,
|
|
"external_results": args.external_results,
|
|
"candidate_inputs": args.inputs,
|
|
"fixtures": args.fixtures,
|
|
"baseline": args.baseline,
|
|
}
|
|
summary["outputs"] = outputs.to_dict()
|
|
|
|
_write_json(outputs.import_report, summary["import_report"])
|
|
if artifacts["candidate_raw"]:
|
|
_write_jsonl(outputs.candidate_raw, artifacts["candidate_raw"])
|
|
if summary.get("contract_report"):
|
|
_write_json(outputs.contract_report, summary["contract_report"])
|
|
if artifacts["normalized"]:
|
|
_write_replay_jsonl(outputs.normalized_output, artifacts["normalized"])
|
|
if artifacts["graded"]:
|
|
_write_replay_jsonl(outputs.graded_output, artifacts["graded"])
|
|
if summary.get("grading_report"):
|
|
_write_json(outputs.grading_report, summary["grading_report"])
|
|
if summary.get("scorecard"):
|
|
_write_json(outputs.scorecard, summary["scorecard"])
|
|
if summary.get("pipeline_report"):
|
|
_write_json(outputs.pipeline_report, summary["pipeline_report"])
|
|
if summary.get("promotion_gate"):
|
|
_write_json(outputs.promotion_gate, summary["promotion_gate"])
|
|
_write_json(outputs.summary, summary)
|
|
|
|
print(json.dumps(summary, ensure_ascii=False, sort_keys=True))
|
|
return 0 if summary["approved"] else 2
|
|
|
|
|
|
def _read_jsonl(path: Path) -> list[dict[str, Any]]:
|
|
records: list[dict[str, Any]] = []
|
|
with path.open(encoding="utf-8") as handle:
|
|
for line_number, line in enumerate(handle, start=1):
|
|
line = line.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
try:
|
|
records.append(json.loads(line))
|
|
except Exception as exc:
|
|
raise SystemExit(f"{path}:{line_number}: invalid JSONL: {exc}") from exc
|
|
return records
|
|
|
|
|
|
def _read_replay_jsonl(path: Path) -> list[AgentReplayRecord]:
|
|
return [AgentReplayRecord.from_dict(payload) for payload in _read_jsonl(path)]
|
|
|
|
|
|
def _write_jsonl(path: Path, records: list[dict[str, Any]]) -> None:
|
|
with path.open("w", encoding="utf-8") as handle:
|
|
for record in records:
|
|
handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True))
|
|
handle.write("\n")
|
|
|
|
|
|
def _write_replay_jsonl(path: Path, records: list[AgentReplayRecord]) -> None:
|
|
with path.open("w", encoding="utf-8") as handle:
|
|
for record in records:
|
|
handle.write(json.dumps(record.__dict__, ensure_ascii=False, sort_keys=True))
|
|
handle.write("\n")
|
|
|
|
|
|
def _write_json(path: Path, payload: dict[str, Any]) -> None:
|
|
path.write_text(
|
|
json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|