155 lines
5.2 KiB
Python
155 lines
5.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Run an approved NeMo/Nemotron request pack through NVIDIA NIM offline replay.
|
|
|
|
This command reads a sanitized request JSONL, calls only the configured chat
|
|
completion endpoint, and writes agent_nemotron_external_result_v1 JSONL. It
|
|
does not execute tools, mutate production systems, or read fixture labels.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import asyncio
|
|
import json
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
ROOT = Path(__file__).resolve().parents[2]
|
|
API_SRC = ROOT / "apps" / "api"
|
|
sys.path.insert(0, str(API_SRC))
|
|
|
|
from src.services.agent_nemotron_external_runner import ( # noqa: E402
|
|
DEFAULT_CONCURRENCY,
|
|
DEFAULT_MAX_TOKENS,
|
|
DEFAULT_NEMOTRON_MODEL,
|
|
DEFAULT_NVIDIA_CHAT_COMPLETIONS_URL,
|
|
DEFAULT_TIMEOUT_SECONDS,
|
|
NemotronExternalRunnerConfig,
|
|
run_nemotron_external_replay,
|
|
)
|
|
|
|
|
|
async def main_async() -> int:
|
|
parser = argparse.ArgumentParser(
|
|
description="Run NeMo/Nemotron external offline replay."
|
|
)
|
|
parser.add_argument("--requests", required=True, help="sanitized NeMo request JSONL")
|
|
parser.add_argument("--output", required=True, help="external result JSONL")
|
|
parser.add_argument("--report", required=True, help="runner report JSON")
|
|
parser.add_argument("--readiness", help="readiness report JSON; must be ready=true")
|
|
parser.add_argument(
|
|
"--api-key-env",
|
|
default="NVIDIA_API_KEY",
|
|
help="environment variable holding the NVIDIA/NIM API key",
|
|
)
|
|
parser.add_argument(
|
|
"--base-url",
|
|
default=os.getenv("NVIDIA_API_BASE_URL")
|
|
or os.getenv("NIM_BASE_URL")
|
|
or DEFAULT_NVIDIA_CHAT_COMPLETIONS_URL,
|
|
help="chat completions endpoint",
|
|
)
|
|
parser.add_argument(
|
|
"--model",
|
|
default=os.getenv("NEMOTRON_TOOL_MODEL") or DEFAULT_NEMOTRON_MODEL,
|
|
help="NVIDIA/Nemotron model name",
|
|
)
|
|
parser.add_argument(
|
|
"--timeout-seconds",
|
|
type=float,
|
|
default=float(os.getenv("NEMOTRON_TIMEOUT_SECONDS") or DEFAULT_TIMEOUT_SECONDS),
|
|
)
|
|
parser.add_argument("--max-tokens", type=int, default=DEFAULT_MAX_TOKENS)
|
|
parser.add_argument("--temperature", type=float, default=0.0)
|
|
parser.add_argument("--concurrency", type=int, default=DEFAULT_CONCURRENCY)
|
|
parser.add_argument("--max-records", type=int, help="optional local smoke limit")
|
|
args = parser.parse_args()
|
|
|
|
readiness = _read_json(Path(args.readiness)) if args.readiness else None
|
|
if readiness is not None and readiness.get("ready") is not True:
|
|
report = {
|
|
"schema_version": "agent_nemotron_external_runner_report_v1",
|
|
"candidate_id": "nemo_nemotron_fabric",
|
|
"requests": 0,
|
|
"results": 0,
|
|
"valid": False,
|
|
"model": args.model,
|
|
"failures": ["readiness_not_ready"],
|
|
}
|
|
_write_json(Path(args.report), report)
|
|
return 2
|
|
|
|
api_key = os.getenv(args.api_key_env, "")
|
|
requests = _read_jsonl(Path(args.requests))
|
|
if args.max_records is not None:
|
|
requests = requests[: args.max_records]
|
|
results, report = await run_nemotron_external_replay(
|
|
requests=requests,
|
|
config=NemotronExternalRunnerConfig(
|
|
api_key=api_key,
|
|
base_url=args.base_url,
|
|
model=args.model,
|
|
timeout_seconds=args.timeout_seconds,
|
|
max_tokens=args.max_tokens,
|
|
temperature=args.temperature,
|
|
concurrency=args.concurrency,
|
|
),
|
|
)
|
|
_write_jsonl(Path(args.output), results)
|
|
_write_json(Path(args.report), report.to_dict())
|
|
print(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True))
|
|
return 0 if report.valid else 2
|
|
|
|
|
|
def _read_json(path: Path) -> dict[str, Any]:
|
|
try:
|
|
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
except Exception as exc:
|
|
raise SystemExit(f"{path}: invalid JSON: {exc}") from exc
|
|
if not isinstance(payload, dict):
|
|
raise SystemExit(f"{path}: expected JSON object")
|
|
return payload
|
|
|
|
|
|
def _read_jsonl(path: Path) -> list[dict[str, Any]]:
|
|
records: list[dict[str, Any]] = []
|
|
with path.open(encoding="utf-8") as handle:
|
|
for line_number, line in enumerate(handle, start=1):
|
|
line = line.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
try:
|
|
payload = json.loads(line)
|
|
except Exception as exc:
|
|
raise SystemExit(f"{path}:{line_number}: invalid JSONL: {exc}") from exc
|
|
if not isinstance(payload, dict):
|
|
raise SystemExit(f"{path}:{line_number}: expected JSON object")
|
|
records.append(payload)
|
|
return records
|
|
|
|
|
|
def _write_jsonl(path: Path, records: list[dict[str, Any]]) -> None:
|
|
with path.open("w", encoding="utf-8") as handle:
|
|
for record in records:
|
|
handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True))
|
|
handle.write("\n")
|
|
|
|
|
|
def _write_json(path: Path, payload: dict[str, Any]) -> None:
|
|
path.write_text(
|
|
json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
|
|
def main() -> int:
|
|
return asyncio.run(main_async())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|