Files
awoooi/scripts/agents/nemotron-run-external-offline.py
Your Name cfb866d055
Some checks failed
Ansible Lint / lint (push) Successful in 35s
CD Pipeline / tests (push) Failing after 13s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Failing after 11s
feat(governance): add agent market automation surfaces
2026-06-04 21:50:55 +08:00

155 lines
5.2 KiB
Python

#!/usr/bin/env python3
"""
Run an approved NeMo/Nemotron request pack through NVIDIA NIM offline replay.
This command reads a sanitized request JSONL, calls only the configured chat
completion endpoint, and writes agent_nemotron_external_result_v1 JSONL. It
does not execute tools, mutate production systems, or read fixture labels.
"""
from __future__ import annotations
import argparse
import asyncio
import json
import os
import sys
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[2]
API_SRC = ROOT / "apps" / "api"
sys.path.insert(0, str(API_SRC))
from src.services.agent_nemotron_external_runner import ( # noqa: E402
DEFAULT_CONCURRENCY,
DEFAULT_MAX_TOKENS,
DEFAULT_NEMOTRON_MODEL,
DEFAULT_NVIDIA_CHAT_COMPLETIONS_URL,
DEFAULT_TIMEOUT_SECONDS,
NemotronExternalRunnerConfig,
run_nemotron_external_replay,
)
async def main_async() -> int:
parser = argparse.ArgumentParser(
description="Run NeMo/Nemotron external offline replay."
)
parser.add_argument("--requests", required=True, help="sanitized NeMo request JSONL")
parser.add_argument("--output", required=True, help="external result JSONL")
parser.add_argument("--report", required=True, help="runner report JSON")
parser.add_argument("--readiness", help="readiness report JSON; must be ready=true")
parser.add_argument(
"--api-key-env",
default="NVIDIA_API_KEY",
help="environment variable holding the NVIDIA/NIM API key",
)
parser.add_argument(
"--base-url",
default=os.getenv("NVIDIA_API_BASE_URL")
or os.getenv("NIM_BASE_URL")
or DEFAULT_NVIDIA_CHAT_COMPLETIONS_URL,
help="chat completions endpoint",
)
parser.add_argument(
"--model",
default=os.getenv("NEMOTRON_TOOL_MODEL") or DEFAULT_NEMOTRON_MODEL,
help="NVIDIA/Nemotron model name",
)
parser.add_argument(
"--timeout-seconds",
type=float,
default=float(os.getenv("NEMOTRON_TIMEOUT_SECONDS") or DEFAULT_TIMEOUT_SECONDS),
)
parser.add_argument("--max-tokens", type=int, default=DEFAULT_MAX_TOKENS)
parser.add_argument("--temperature", type=float, default=0.0)
parser.add_argument("--concurrency", type=int, default=DEFAULT_CONCURRENCY)
parser.add_argument("--max-records", type=int, help="optional local smoke limit")
args = parser.parse_args()
readiness = _read_json(Path(args.readiness)) if args.readiness else None
if readiness is not None and readiness.get("ready") is not True:
report = {
"schema_version": "agent_nemotron_external_runner_report_v1",
"candidate_id": "nemo_nemotron_fabric",
"requests": 0,
"results": 0,
"valid": False,
"model": args.model,
"failures": ["readiness_not_ready"],
}
_write_json(Path(args.report), report)
return 2
api_key = os.getenv(args.api_key_env, "")
requests = _read_jsonl(Path(args.requests))
if args.max_records is not None:
requests = requests[: args.max_records]
results, report = await run_nemotron_external_replay(
requests=requests,
config=NemotronExternalRunnerConfig(
api_key=api_key,
base_url=args.base_url,
model=args.model,
timeout_seconds=args.timeout_seconds,
max_tokens=args.max_tokens,
temperature=args.temperature,
concurrency=args.concurrency,
),
)
_write_jsonl(Path(args.output), results)
_write_json(Path(args.report), report.to_dict())
print(json.dumps(report.to_dict(), ensure_ascii=False, sort_keys=True))
return 0 if report.valid else 2
def _read_json(path: Path) -> dict[str, Any]:
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception as exc:
raise SystemExit(f"{path}: invalid JSON: {exc}") from exc
if not isinstance(payload, dict):
raise SystemExit(f"{path}: expected JSON object")
return payload
def _read_jsonl(path: Path) -> list[dict[str, Any]]:
records: list[dict[str, Any]] = []
with path.open(encoding="utf-8") as handle:
for line_number, line in enumerate(handle, start=1):
line = line.strip()
if not line or line.startswith("#"):
continue
try:
payload = json.loads(line)
except Exception as exc:
raise SystemExit(f"{path}:{line_number}: invalid JSONL: {exc}") from exc
if not isinstance(payload, dict):
raise SystemExit(f"{path}:{line_number}: expected JSON object")
records.append(payload)
return records
def _write_jsonl(path: Path, records: list[dict[str, Any]]) -> None:
with path.open("w", encoding="utf-8") as handle:
for record in records:
handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True))
handle.write("\n")
def _write_json(path: Path, payload: dict[str, Any]) -> None:
path.write_text(
json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True) + "\n",
encoding="utf-8",
)
def main() -> int:
return asyncio.run(main_async())
if __name__ == "__main__":
raise SystemExit(main())