#!/usr/bin/env python3 """ Run an approved NeMo/Nemotron request pack through NVIDIA NIM offline replay. This command reads a sanitized request JSONL, calls only the configured chat completion endpoint, and writes agent_nemotron_external_result_v1 JSONL. It does not execute tools, mutate production systems, or read fixture labels. """ from __future__ import annotations import argparse import asyncio import json import os import sys from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[2] API_SRC = ROOT / "apps" / "api" sys.path.insert(0, str(API_SRC)) from src.services.agent_nemotron_external_runner import ( # noqa: E402 DEFAULT_CONCURRENCY, DEFAULT_MAX_TOKENS, DEFAULT_NEMOTRON_MODEL, DEFAULT_NVIDIA_CHAT_COMPLETIONS_URL, DEFAULT_TIMEOUT_SECONDS, NemotronExternalRunnerConfig, run_nemotron_external_replay, ) async def main_async() -> int: parser = argparse.ArgumentParser( description="Run NeMo/Nemotron external offline replay." ) parser.add_argument("--requests", required=True, help="sanitized NeMo request JSONL") parser.add_argument("--output", required=True, help="external result JSONL") parser.add_argument("--report", required=True, help="runner report JSON") parser.add_argument("--readiness", help="readiness report JSON; must be ready=true") parser.add_argument( "--api-key-env", default="NVIDIA_API_KEY", help="environment variable holding the NVIDIA/NIM API key", ) parser.add_argument( "--base-url", default=os.getenv("NVIDIA_API_BASE_URL") or os.getenv("NIM_BASE_URL") or DEFAULT_NVIDIA_CHAT_COMPLETIONS_URL, help="chat completions endpoint", ) parser.add_argument( "--model", default=os.getenv("NEMOTRON_TOOL_MODEL") or DEFAULT_NEMOTRON_MODEL, help="NVIDIA/Nemotron model name", ) parser.add_argument( "--timeout-seconds", type=float, default=float(os.getenv("NEMOTRON_TIMEOUT_SECONDS") or DEFAULT_TIMEOUT_SECONDS), ) parser.add_argument("--max-tokens", type=int, default=DEFAULT_MAX_TOKENS) parser.add_argument("--temperature", type=float, default=0.0) parser.add_argument("--concurrency", type=int, default=DEFAULT_CONCURRENCY) parser.add_argument("--max-records", type=int, help="optional local smoke limit") args = parser.parse_args() readiness = _read_json(Path(args.readiness)) if args.readiness else None if readiness is not None and readiness.get("ready") is not True: report = { "schema_version": "agent_nemotron_external_runner_report_v1", "candidate_id": "nemo_nemotron_fabric", "requests": 0, "results": 0, "valid": False, "model": args.model, "failures": ["readiness_not_ready"], } _write_json(Path(args.report), report) return 2 api_key = os.getenv(args.api_key_env, "") requests = _read_jsonl(Path(args.requests)) if args.max_records is not None: requests = requests[: args.max_records] results, report = await run_nemotron_external_replay( requests=requests, config=NemotronExternalRunnerConfig( api_key=api_key, base_url=args.base_url, model=args.model, timeout_seconds=args.timeout_seconds, max_tokens=args.max_tokens, temperature=args.temperature, concurrency=args.concurrency, ), ) _write_jsonl(Path(args.output), results) _write_json(Path(args.report), report.to_dict()) print(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True)) return 0 if report.valid else 2 def _read_json(path: Path) -> dict[str, Any]: try: payload = json.loads(path.read_text(encoding="utf-8")) except Exception as exc: raise SystemExit(f"{path}: invalid JSON: {exc}") from exc if not isinstance(payload, dict): raise SystemExit(f"{path}: expected JSON object") return payload def _read_jsonl(path: Path) -> list[dict[str, Any]]: records: list[dict[str, Any]] = [] with path.open(encoding="utf-8") as handle: for line_number, line in enumerate(handle, start=1): line = line.strip() if not line or line.startswith("#"): continue try: payload = json.loads(line) except Exception as exc: raise SystemExit(f"{path}:{line_number}: invalid JSONL: {exc}") from exc if not isinstance(payload, dict): raise SystemExit(f"{path}:{line_number}: expected JSON object") records.append(payload) return records def _write_jsonl(path: Path, records: list[dict[str, Any]]) -> None: with path.open("w", encoding="utf-8") as handle: for record in records: handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True)) handle.write("\n") def _write_json(path: Path, payload: dict[str, Any]) -> None: path.write_text( json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8", ) def main() -> int: return asyncio.run(main_async()) if __name__ == "__main__": raise SystemExit(main())