88 lines
2.8 KiB
Python
88 lines
2.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Build NeMo/Nemotron external replay request JSONL from AWOOOI candidate inputs.
|
|
|
|
This script does not call NVIDIA APIs, NIM endpoints, tools, or LLMs.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
|
|
ROOT = Path(__file__).resolve().parents[2]
|
|
API_SRC = ROOT / "apps" / "api"
|
|
sys.path.insert(0, str(API_SRC))
|
|
|
|
from src.services.agent_nemotron_replay_adapter import ( # noqa: E402
|
|
build_nemotron_replay_requests,
|
|
)
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(
|
|
description="Build NeMo/Nemotron replay request JSONL."
|
|
)
|
|
parser.add_argument("--inputs", required=True, help="candidate input JSONL")
|
|
parser.add_argument("--output", required=True, help="Nemotron request JSONL")
|
|
parser.add_argument("--report", help="optional request-pack build report JSON")
|
|
parser.add_argument(
|
|
"--candidate-variant-id",
|
|
help="optional Nemotron candidate variant id, e.g. contract tuned v1",
|
|
)
|
|
parser.add_argument("--max-records", type=int, help="optional local smoke limit")
|
|
args = parser.parse_args()
|
|
|
|
candidate_inputs = _read_jsonl(Path(args.inputs))
|
|
if args.max_records is not None:
|
|
candidate_inputs = candidate_inputs[: args.max_records]
|
|
requests = build_nemotron_replay_requests(
|
|
candidate_inputs,
|
|
candidate_variant_id=args.candidate_variant_id,
|
|
)
|
|
with Path(args.output).open("w", encoding="utf-8") as handle:
|
|
for request in requests:
|
|
handle.write(json.dumps(request.to_dict(), ensure_ascii=False, sort_keys=True))
|
|
handle.write("\n")
|
|
|
|
report = {
|
|
"schema_version": "agent_nemotron_request_pack_build_report_v1",
|
|
"inputs": args.inputs,
|
|
"output": args.output,
|
|
"records": len(requests),
|
|
"external_calls": False,
|
|
"request_only": True,
|
|
"candidate_id": "nemo_nemotron_fabric",
|
|
"candidate_variant_id": args.candidate_variant_id,
|
|
"max_records": args.max_records,
|
|
}
|
|
if args.report:
|
|
Path(args.report).write_text(
|
|
json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True) + "\n",
|
|
encoding="utf-8",
|
|
)
|
|
print(json.dumps(report, ensure_ascii=False, sort_keys=True))
|
|
return 0
|
|
|
|
|
|
def _read_jsonl(path: Path) -> list[dict[str, Any]]:
|
|
records: list[dict[str, Any]] = []
|
|
with path.open(encoding="utf-8") as handle:
|
|
for line_number, line in enumerate(handle, start=1):
|
|
line = line.strip()
|
|
if not line or line.startswith("#"):
|
|
continue
|
|
try:
|
|
records.append(json.loads(line))
|
|
except Exception as exc:
|
|
raise SystemExit(f"{path}:{line_number}: invalid JSONL: {exc}") from exc
|
|
return records
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|