Files
awoooi/apps/api/src/services/agent_nemotron_external_runner_readiness.py
Your Name cfb866d055
Some checks failed
Ansible Lint / lint (push) Successful in 35s
CD Pipeline / tests (push) Failing after 13s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Failing after 11s
feat(governance): add agent market automation surfaces
2026-06-04 21:50:55 +08:00

418 lines
15 KiB
Python

"""
NeMo/Nemotron External Runner Readiness Gate
============================================
Combines the external-runner manifest, sanitize report, and sanitized preflight
report into one pre-execution decision. This module is local and deterministic:
it does not call NIM, NVIDIA APIs, tools, production systems, or LLMs.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any
from src.services.agent_nemotron_replay_adapter import NEMOTRON_CANDIDATE_ID
READINESS_SCHEMA_VERSION = "agent_nemotron_external_runner_readiness_v1"
MANIFEST_SCHEMA_VERSION = "agent_nemotron_external_runner_manifest_v1"
SANITIZE_SCHEMA_VERSION = "agent_nemotron_request_pack_sanitize_report_v1"
PREFLIGHT_SCHEMA_VERSION = "agent_nemotron_external_runner_preflight_v1"
READY_MANIFEST_STATUS = "ready_for_approved_external_offline_runner_with_sanitized_pack"
DEFAULT_MINIMUM_RECORDS = 50
_SELF_GRADING_FIELDS = {
"evaluation_labels",
"verification_result",
"execution_success",
"execution_error",
"self_healing_score",
"rca_correct",
"tool_dry_run_pass",
"repair_success",
"false_repair",
}
@dataclass(frozen=True)
class NemotronExternalRunnerReadinessReport:
"""Single readiness decision before a NeMo external runner can be used."""
candidate_id: str
run_id: str
ready: bool
decision: str
minimum_records: int
gates: dict[str, bool] = field(default_factory=dict)
failures: list[str] = field(default_factory=list)
counts: dict[str, Any] = field(default_factory=dict)
artifacts: dict[str, Any] = field(default_factory=dict)
safety: dict[str, Any] = field(default_factory=dict)
next_actions: list[str] = field(default_factory=list)
def to_dict(self) -> dict[str, Any]:
return {
"schema_version": READINESS_SCHEMA_VERSION,
"candidate_id": self.candidate_id,
"run_id": self.run_id,
"ready": self.ready,
"decision": self.decision,
"minimum_records": self.minimum_records,
"gates": dict(self.gates),
"failures": list(self.failures),
"counts": dict(self.counts),
"artifacts": dict(self.artifacts),
"safety": dict(self.safety),
"next_actions": list(self.next_actions),
}
def evaluate_nemotron_external_runner_readiness(
*,
manifest: dict[str, Any],
sanitize_report: dict[str, Any],
sanitized_preflight: dict[str, Any],
minimum_records: int = DEFAULT_MINIMUM_RECORDS,
) -> NemotronExternalRunnerReadinessReport:
"""Evaluate whether the sanitized request pack is ready for approval."""
failures: list[str] = []
gates: dict[str, bool] = {}
def gate(name: str, passed: bool, failure: str | None = None) -> None:
gates[name] = bool(passed)
if not passed:
failures.append(failure or name)
candidate_id = str(manifest.get("candidate_id") or "")
run_id = str(manifest.get("run_id") or "")
manifest_counts = _manifest_counts(manifest)
sanitize_counts = _report_counts(sanitize_report)
preflight_counts = _report_counts(sanitized_preflight)
gate(
"manifest_schema_valid",
manifest.get("schema_version") == MANIFEST_SCHEMA_VERSION,
"manifest_schema_mismatch",
)
gate(
"candidate_is_nemotron_fabric",
candidate_id == NEMOTRON_CANDIDATE_ID,
"manifest_candidate_mismatch",
)
gate("run_id_present", bool(run_id.strip()), "manifest_run_id_missing")
gate(
"manifest_status_sanitized_ready",
manifest.get("status") == READY_MANIFEST_STATUS,
"manifest_status_not_sanitized_ready",
)
gate(
"external_calls_not_performed_by_codex",
manifest.get("external_calls_performed_by_codex") is False,
"external_calls_already_performed_by_codex",
)
gate(
"external_execution_still_requires_approval",
manifest.get("approval_required_before_external_execution") is True,
"approval_required_flag_missing",
)
gate(
"raw_artifacts_not_committed",
manifest.get("raw_artifacts_committed") is False,
"raw_artifacts_committed_or_unknown",
)
gate(
"sanitize_report_schema_valid",
sanitize_report.get("schema_version") == SANITIZE_SCHEMA_VERSION,
"sanitize_report_schema_mismatch",
)
gate(
"sanitize_report_valid",
sanitize_report.get("valid") is True,
"sanitize_report_invalid",
)
gate(
"sanitize_preflight_valid",
sanitize_report.get("preflight_valid") is True,
"sanitize_report_preflight_invalid",
)
gate(
"sanitize_failures_empty",
not (sanitize_report.get("failures") or [])
and not (sanitize_report.get("preflight_failures") or []),
"sanitize_report_has_failures",
)
gate(
"sanitize_sensitive_markers_removed",
sanitize_report.get("sensitive_marker_records_after") == 0,
"sanitize_sensitive_markers_remaining",
)
gate(
"sanitized_preflight_schema_valid",
sanitized_preflight.get("schema_version") == PREFLIGHT_SCHEMA_VERSION,
"sanitized_preflight_schema_mismatch",
)
gate(
"sanitized_preflight_candidate_valid",
sanitized_preflight.get("candidate_id") == NEMOTRON_CANDIDATE_ID,
"sanitized_preflight_candidate_mismatch",
)
gate(
"sanitized_preflight_valid",
sanitized_preflight.get("valid") is True,
"sanitized_preflight_invalid",
)
gate(
"sanitized_preflight_failures_empty",
not sanitized_preflight.get("failures"),
"sanitized_preflight_has_failures",
)
gate(
"no_missing_extra_or_duplicate_records",
_preflight_record_sets_clean(sanitized_preflight),
"sanitized_preflight_record_set_not_clean",
)
gate(
"no_label_leaks",
sanitized_preflight.get("candidate_input_label_leak_records") == 0
and sanitized_preflight.get("request_context_label_leak_records") == 0
and _manifest_request_pack(manifest).get("label_leak_records") == 0
and _manifest_candidate_inputs(manifest).get("label_leak_records") == 0,
"label_leak_records_present",
)
gate(
"no_sensitive_context_markers",
sanitized_preflight.get("sensitive_marker_present_in_context") is False
and sanitized_preflight.get("sensitive_marker_records") == 0
and _manifest_request_pack(manifest).get("sensitive_marker_records") == 0,
"sensitive_context_markers_present",
)
gate(
"request_pack_is_request_only",
sanitized_preflight.get("request_only_records")
== sanitized_preflight.get("requests")
and _manifest_request_pack(manifest).get("request_only_records")
== _manifest_request_pack(manifest).get("records"),
"request_pack_not_fully_request_only",
)
gate(
"request_pack_not_replacement_evidence",
sanitized_preflight.get("not_replacement_evidence_records")
== sanitized_preflight.get("requests")
and _manifest_request_pack(manifest).get("not_replacement_evidence_records")
== _manifest_request_pack(manifest).get("records"),
"request_pack_contains_replacement_evidence",
)
gate(
"counts_match_across_reports",
_counts_match(manifest_counts, sanitize_counts, preflight_counts),
"record_counts_mismatch",
)
gate(
"minimum_records_met",
_count_value(manifest_counts, "requests") >= minimum_records
and _count_value(sanitize_counts, "requests") >= minimum_records
and _count_value(preflight_counts, "requests") >= minimum_records,
"minimum_records_not_met",
)
gate(
"manifest_uses_sanitized_tmp_artifacts",
_uses_sanitized_tmp_artifacts(manifest),
"manifest_not_pointing_to_sanitized_tmp_artifacts",
)
gate(
"external_output_contract_declared",
_external_output_contract_declared(
manifest,
expected_records=_count_value(manifest_counts, "requests"),
),
"external_output_contract_incomplete",
)
gate(
"post_external_finalizer_declared",
bool(str(manifest.get("preferred_post_external_run_command") or "").strip()),
"preferred_post_external_run_command_missing",
)
ready = not failures
return NemotronExternalRunnerReadinessReport(
candidate_id=candidate_id,
run_id=run_id,
ready=ready,
decision="ready_for_approval" if ready else "blocked",
minimum_records=minimum_records,
gates=gates,
failures=failures,
counts={
"manifest": manifest_counts,
"sanitize_report": sanitize_counts,
"sanitized_preflight": preflight_counts,
},
artifacts=_artifacts(manifest),
safety=_safety(manifest, sanitized_preflight),
next_actions=_next_actions(manifest, ready=ready),
)
def _manifest_counts(manifest: dict[str, Any]) -> dict[str, Any]:
return {
"fixtures": _manifest_fixtures(manifest).get("records"),
"candidate_inputs": _manifest_candidate_inputs(manifest).get("records"),
"requests": _manifest_request_pack(manifest).get("records"),
"expected_action_marker_records": _manifest_fixtures(manifest).get(
"expected_action_marker_records"
),
}
def _report_counts(report: dict[str, Any]) -> dict[str, Any]:
return {
"fixtures": report.get("fixtures"),
"candidate_inputs": report.get("candidate_inputs"),
"requests": report.get("requests"),
"expected_action_marker_records": report.get("expected_action_marker_records"),
}
def _counts_match(*counts: dict[str, Any]) -> bool:
keys = {"fixtures", "candidate_inputs", "requests"}
for key in keys:
values = [_coerce_int(count.get(key)) for count in counts]
if any(value is None for value in values):
return False
if len(set(values)) != 1:
return False
marker_values = [
_coerce_int(count.get("expected_action_marker_records"))
for count in counts
if count.get("expected_action_marker_records") is not None
]
return len(set(marker_values)) <= 1
def _count_value(counts: dict[str, Any], key: str) -> int:
return _coerce_int(counts.get(key)) or 0
def _coerce_int(value: Any) -> int | None:
if isinstance(value, bool):
return None
if isinstance(value, int):
return value
return None
def _preflight_record_sets_clean(preflight: dict[str, Any]) -> bool:
fields = (
"duplicate_fixtures",
"duplicate_candidate_inputs",
"duplicate_requests",
"missing_candidate_inputs",
"missing_requests",
"unexpected_candidate_inputs",
"unexpected_requests",
)
return all(not preflight.get(field) for field in fields)
def _uses_sanitized_tmp_artifacts(manifest: dict[str, Any]) -> bool:
nodes = (
_manifest_fixtures(manifest),
_manifest_candidate_inputs(manifest),
_manifest_request_pack(manifest),
)
for node in nodes:
path = str(node.get("local_path") or "")
if not path.startswith("/tmp/") or "sanitized" not in path:
return False
source_path = str(node.get("source_unsanitized_path") or "")
if source_path and source_path == path:
return False
return True
def _external_output_contract_declared(
manifest: dict[str, Any],
*,
expected_records: int,
) -> bool:
output = dict(manifest.get("external_runner_output") or {})
forbidden_fields = {str(field) for field in output.get("forbidden_model_output_fields") or []}
return (
str(output.get("required_path") or "").startswith("/tmp/")
and output.get("schema") == "docs/schemas/agent_nemotron_external_result_v1.schema.json"
and output.get("required_records") == expected_records
and output.get("one_result_per_request") is True
and _SELF_GRADING_FIELDS.issubset(forbidden_fields)
)
def _artifacts(manifest: dict[str, Any]) -> dict[str, Any]:
output = dict(manifest.get("external_runner_output") or {})
return {
"request_pack": _manifest_request_pack(manifest),
"candidate_inputs": _manifest_candidate_inputs(manifest),
"fixtures": _manifest_fixtures(manifest),
"sanitize_report": manifest.get("sanitize_report"),
"sanitized_preflight_report": manifest.get(
"external_runner_preflight_report_sanitized"
),
"external_results_required_path": output.get("required_path"),
"preferred_post_external_run_command": manifest.get(
"preferred_post_external_run_command"
),
}
def _safety(
manifest: dict[str, Any],
preflight: dict[str, Any],
) -> dict[str, Any]:
return {
"external_calls_performed_by_codex": manifest.get(
"external_calls_performed_by_codex"
),
"approval_required_before_external_execution": manifest.get(
"approval_required_before_external_execution"
),
"raw_artifacts_committed": manifest.get("raw_artifacts_committed"),
"sensitive_marker_records": preflight.get("sensitive_marker_records"),
"candidate_input_label_leak_records": preflight.get(
"candidate_input_label_leak_records"
),
"request_context_label_leak_records": preflight.get(
"request_context_label_leak_records"
),
"request_only_records": preflight.get("request_only_records"),
"not_replacement_evidence_records": preflight.get(
"not_replacement_evidence_records"
),
}
def _next_actions(manifest: dict[str, Any], *, ready: bool) -> list[str]:
if not ready:
return [
"Fix the readiness failures.",
"Regenerate sanitized fixtures, candidate inputs, and requests if needed.",
"Rerun sanitized preflight and readiness before any external execution.",
]
return [
"Obtain explicit commander approval before external execution.",
"Run the approved offline NeMo/NIM/Nemotron runner against the sanitized request pack only.",
"Write external results to "
f"{(manifest.get('external_runner_output') or {}).get('required_path')}.",
"Run the preferred post-external finalizer command.",
]
def _manifest_request_pack(manifest: dict[str, Any]) -> dict[str, Any]:
return dict(manifest.get("request_pack") or {})
def _manifest_candidate_inputs(manifest: dict[str, Any]) -> dict[str, Any]:
return dict(manifest.get("candidate_inputs") or {})
def _manifest_fixtures(manifest: dict[str, Any]) -> dict[str, Any]:
return dict(manifest.get("fixtures") or {})