awoooi/apps/api/src/services/agent_nemotron_external_runner_readiness.py

"""
NeMo/Nemotron External Runner Readiness Gate
============================================

Combines the external-runner manifest, sanitize report, and sanitized preflight
report into one pre-execution decision. This module is local and deterministic:
it does not call NIM, NVIDIA APIs, tools, production systems, or LLMs.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any

from src.services.agent_nemotron_replay_adapter import NEMOTRON_CANDIDATE_ID

READINESS_SCHEMA_VERSION = "agent_nemotron_external_runner_readiness_v1"
MANIFEST_SCHEMA_VERSION = "agent_nemotron_external_runner_manifest_v1"
SANITIZE_SCHEMA_VERSION = "agent_nemotron_request_pack_sanitize_report_v1"
PREFLIGHT_SCHEMA_VERSION = "agent_nemotron_external_runner_preflight_v1"
READY_MANIFEST_STATUS = "ready_for_approved_external_offline_runner_with_sanitized_pack"
DEFAULT_MINIMUM_RECORDS = 50

_SELF_GRADING_FIELDS = {
    "evaluation_labels",
    "verification_result",
    "execution_success",
    "execution_error",
    "self_healing_score",
    "rca_correct",
    "tool_dry_run_pass",
    "repair_success",
    "false_repair",
}


@dataclass(frozen=True)
class NemotronExternalRunnerReadinessReport:
    """Single readiness decision before a NeMo external runner can be used."""

    candidate_id: str
    run_id: str
    ready: bool
    decision: str
    minimum_records: int
    gates: dict[str, bool] = field(default_factory=dict)
    failures: list[str] = field(default_factory=list)
    counts: dict[str, Any] = field(default_factory=dict)
    artifacts: dict[str, Any] = field(default_factory=dict)
    safety: dict[str, Any] = field(default_factory=dict)
    next_actions: list[str] = field(default_factory=list)

    def to_dict(self) -> dict[str, Any]:
        return {
            "schema_version": READINESS_SCHEMA_VERSION,
            "candidate_id": self.candidate_id,
            "run_id": self.run_id,
            "ready": self.ready,
            "decision": self.decision,
            "minimum_records": self.minimum_records,
            "gates": dict(self.gates),
            "failures": list(self.failures),
            "counts": dict(self.counts),
            "artifacts": dict(self.artifacts),
            "safety": dict(self.safety),
            "next_actions": list(self.next_actions),
        }


def evaluate_nemotron_external_runner_readiness(
    *,
    manifest: dict[str, Any],
    sanitize_report: dict[str, Any],
    sanitized_preflight: dict[str, Any],
    minimum_records: int = DEFAULT_MINIMUM_RECORDS,
) -> NemotronExternalRunnerReadinessReport:
    """Evaluate whether the sanitized request pack is ready for approval."""
    failures: list[str] = []
    gates: dict[str, bool] = {}

    def gate(name: str, passed: bool, failure: str | None = None) -> None:
        gates[name] = bool(passed)
        if not passed:
            failures.append(failure or name)

    candidate_id = str(manifest.get("candidate_id") or "")
    run_id = str(manifest.get("run_id") or "")
    manifest_counts = _manifest_counts(manifest)
    sanitize_counts = _report_counts(sanitize_report)
    preflight_counts = _report_counts(sanitized_preflight)

    gate(
        "manifest_schema_valid",
        manifest.get("schema_version") == MANIFEST_SCHEMA_VERSION,
        "manifest_schema_mismatch",
    )
    gate(
        "candidate_is_nemotron_fabric",
        candidate_id == NEMOTRON_CANDIDATE_ID,
        "manifest_candidate_mismatch",
    )
    gate("run_id_present", bool(run_id.strip()), "manifest_run_id_missing")
    gate(
        "manifest_status_sanitized_ready",
        manifest.get("status") == READY_MANIFEST_STATUS,
        "manifest_status_not_sanitized_ready",
    )
    gate(
        "external_calls_not_performed_by_codex",
        manifest.get("external_calls_performed_by_codex") is False,
        "external_calls_already_performed_by_codex",
    )
    gate(
        "external_execution_still_requires_approval",
        manifest.get("approval_required_before_external_execution") is True,
        "approval_required_flag_missing",
    )
    gate(
        "raw_artifacts_not_committed",
        manifest.get("raw_artifacts_committed") is False,
        "raw_artifacts_committed_or_unknown",
    )
    gate(
        "sanitize_report_schema_valid",
        sanitize_report.get("schema_version") == SANITIZE_SCHEMA_VERSION,
        "sanitize_report_schema_mismatch",
    )
    gate(
        "sanitize_report_valid",
        sanitize_report.get("valid") is True,
        "sanitize_report_invalid",
    )
    gate(
        "sanitize_preflight_valid",
        sanitize_report.get("preflight_valid") is True,
        "sanitize_report_preflight_invalid",
    )
    gate(
        "sanitize_failures_empty",
        not (sanitize_report.get("failures") or [])
        and not (sanitize_report.get("preflight_failures") or []),
        "sanitize_report_has_failures",
    )
    gate(
        "sanitize_sensitive_markers_removed",
        sanitize_report.get("sensitive_marker_records_after") == 0,
        "sanitize_sensitive_markers_remaining",
    )
    gate(
        "sanitized_preflight_schema_valid",
        sanitized_preflight.get("schema_version") == PREFLIGHT_SCHEMA_VERSION,
        "sanitized_preflight_schema_mismatch",
    )
    gate(
        "sanitized_preflight_candidate_valid",
        sanitized_preflight.get("candidate_id") == NEMOTRON_CANDIDATE_ID,
        "sanitized_preflight_candidate_mismatch",
    )
    gate(
        "sanitized_preflight_valid",
        sanitized_preflight.get("valid") is True,
        "sanitized_preflight_invalid",
    )
    gate(
        "sanitized_preflight_failures_empty",
        not sanitized_preflight.get("failures"),
        "sanitized_preflight_has_failures",
    )
    gate(
        "no_missing_extra_or_duplicate_records",
        _preflight_record_sets_clean(sanitized_preflight),
        "sanitized_preflight_record_set_not_clean",
    )
    gate(
        "no_label_leaks",
        sanitized_preflight.get("candidate_input_label_leak_records") == 0
        and sanitized_preflight.get("request_context_label_leak_records") == 0
        and _manifest_request_pack(manifest).get("label_leak_records") == 0
        and _manifest_candidate_inputs(manifest).get("label_leak_records") == 0,
        "label_leak_records_present",
    )
    gate(
        "no_sensitive_context_markers",
        sanitized_preflight.get("sensitive_marker_present_in_context") is False
        and sanitized_preflight.get("sensitive_marker_records") == 0
        and _manifest_request_pack(manifest).get("sensitive_marker_records") == 0,
        "sensitive_context_markers_present",
    )
    gate(
        "request_pack_is_request_only",
        sanitized_preflight.get("request_only_records")
        == sanitized_preflight.get("requests")
        and _manifest_request_pack(manifest).get("request_only_records")
        == _manifest_request_pack(manifest).get("records"),
        "request_pack_not_fully_request_only",
    )
    gate(
        "request_pack_not_replacement_evidence",
        sanitized_preflight.get("not_replacement_evidence_records")
        == sanitized_preflight.get("requests")
        and _manifest_request_pack(manifest).get("not_replacement_evidence_records")
        == _manifest_request_pack(manifest).get("records"),
        "request_pack_contains_replacement_evidence",
    )
    gate(
        "counts_match_across_reports",
        _counts_match(manifest_counts, sanitize_counts, preflight_counts),
        "record_counts_mismatch",
    )
    gate(
        "minimum_records_met",
        _count_value(manifest_counts, "requests") >= minimum_records
        and _count_value(sanitize_counts, "requests") >= minimum_records
        and _count_value(preflight_counts, "requests") >= minimum_records,
        "minimum_records_not_met",
    )
    gate(
        "manifest_uses_sanitized_tmp_artifacts",
        _uses_sanitized_tmp_artifacts(manifest),
        "manifest_not_pointing_to_sanitized_tmp_artifacts",
    )
    gate(
        "external_output_contract_declared",
        _external_output_contract_declared(
            manifest,
            expected_records=_count_value(manifest_counts, "requests"),
        ),
        "external_output_contract_incomplete",
    )
    gate(
        "post_external_finalizer_declared",
        bool(str(manifest.get("preferred_post_external_run_command") or "").strip()),
        "preferred_post_external_run_command_missing",
    )

    ready = not failures
    return NemotronExternalRunnerReadinessReport(
        candidate_id=candidate_id,
        run_id=run_id,
        ready=ready,
        decision="ready_for_approval" if ready else "blocked",
        minimum_records=minimum_records,
        gates=gates,
        failures=failures,
        counts={
            "manifest": manifest_counts,
            "sanitize_report": sanitize_counts,
            "sanitized_preflight": preflight_counts,
        },
        artifacts=_artifacts(manifest),
        safety=_safety(manifest, sanitized_preflight),
        next_actions=_next_actions(manifest, ready=ready),
    )


def _manifest_counts(manifest: dict[str, Any]) -> dict[str, Any]:
    return {
        "fixtures": _manifest_fixtures(manifest).get("records"),
        "candidate_inputs": _manifest_candidate_inputs(manifest).get("records"),
        "requests": _manifest_request_pack(manifest).get("records"),
        "expected_action_marker_records": _manifest_fixtures(manifest).get(
            "expected_action_marker_records"
        ),
    }


def _report_counts(report: dict[str, Any]) -> dict[str, Any]:
    return {
        "fixtures": report.get("fixtures"),
        "candidate_inputs": report.get("candidate_inputs"),
        "requests": report.get("requests"),
        "expected_action_marker_records": report.get("expected_action_marker_records"),
    }


def _counts_match(*counts: dict[str, Any]) -> bool:
    keys = {"fixtures", "candidate_inputs", "requests"}
    for key in keys:
        values = [_coerce_int(count.get(key)) for count in counts]
        if any(value is None for value in values):
            return False
        if len(set(values)) != 1:
            return False
    marker_values = [
        _coerce_int(count.get("expected_action_marker_records"))
        for count in counts
        if count.get("expected_action_marker_records") is not None
    ]
    return len(set(marker_values)) <= 1


def _count_value(counts: dict[str, Any], key: str) -> int:
    return _coerce_int(counts.get(key)) or 0


def _coerce_int(value: Any) -> int | None:
    if isinstance(value, bool):
        return None
    if isinstance(value, int):
        return value
    return None


def _preflight_record_sets_clean(preflight: dict[str, Any]) -> bool:
    fields = (
        "duplicate_fixtures",
        "duplicate_candidate_inputs",
        "duplicate_requests",
        "missing_candidate_inputs",
        "missing_requests",
        "unexpected_candidate_inputs",
        "unexpected_requests",
    )
    return all(not preflight.get(field) for field in fields)


def _uses_sanitized_tmp_artifacts(manifest: dict[str, Any]) -> bool:
    nodes = (
        _manifest_fixtures(manifest),
        _manifest_candidate_inputs(manifest),
        _manifest_request_pack(manifest),
    )
    for node in nodes:
        path = str(node.get("local_path") or "")
        if not path.startswith("/tmp/") or "sanitized" not in path:
            return False
        source_path = str(node.get("source_unsanitized_path") or "")
        if source_path and source_path == path:
            return False
    return True


def _external_output_contract_declared(
    manifest: dict[str, Any],
    *,
    expected_records: int,
) -> bool:
    output = dict(manifest.get("external_runner_output") or {})
    forbidden_fields = {str(field) for field in output.get("forbidden_model_output_fields") or []}
    return (
        str(output.get("required_path") or "").startswith("/tmp/")
        and output.get("schema") == "docs/schemas/agent_nemotron_external_result_v1.schema.json"
        and output.get("required_records") == expected_records
        and output.get("one_result_per_request") is True
        and _SELF_GRADING_FIELDS.issubset(forbidden_fields)
    )


def _artifacts(manifest: dict[str, Any]) -> dict[str, Any]:
    output = dict(manifest.get("external_runner_output") or {})
    return {
        "request_pack": _manifest_request_pack(manifest),
        "candidate_inputs": _manifest_candidate_inputs(manifest),
        "fixtures": _manifest_fixtures(manifest),
        "sanitize_report": manifest.get("sanitize_report"),
        "sanitized_preflight_report": manifest.get(
            "external_runner_preflight_report_sanitized"
        ),
        "external_results_required_path": output.get("required_path"),
        "preferred_post_external_run_command": manifest.get(
            "preferred_post_external_run_command"
        ),
    }


def _safety(
    manifest: dict[str, Any],
    preflight: dict[str, Any],
) -> dict[str, Any]:
    return {
        "external_calls_performed_by_codex": manifest.get(
            "external_calls_performed_by_codex"
        ),
        "approval_required_before_external_execution": manifest.get(
            "approval_required_before_external_execution"
        ),
        "raw_artifacts_committed": manifest.get("raw_artifacts_committed"),
        "sensitive_marker_records": preflight.get("sensitive_marker_records"),
        "candidate_input_label_leak_records": preflight.get(
            "candidate_input_label_leak_records"
        ),
        "request_context_label_leak_records": preflight.get(
            "request_context_label_leak_records"
        ),
        "request_only_records": preflight.get("request_only_records"),
        "not_replacement_evidence_records": preflight.get(
            "not_replacement_evidence_records"
        ),
    }


def _next_actions(manifest: dict[str, Any], *, ready: bool) -> list[str]:
    if not ready:
        return [
            "Fix the readiness failures.",
            "Regenerate sanitized fixtures, candidate inputs, and requests if needed.",
            "Rerun sanitized preflight and readiness before any external execution.",
        ]
    return [
        "Obtain explicit commander approval before external execution.",
        "Run the approved offline NeMo/NIM/Nemotron runner against the sanitized request pack only.",
        "Write external results to "
        f"{(manifest.get('external_runner_output') or {}).get('required_path')}.",
        "Run the preferred post-external finalizer command.",
    ]


def _manifest_request_pack(manifest: dict[str, Any]) -> dict[str, Any]:
    return dict(manifest.get("request_pack") or {})


def _manifest_candidate_inputs(manifest: dict[str, Any]) -> dict[str, Any]:
    return dict(manifest.get("candidate_inputs") or {})


def _manifest_fixtures(manifest: dict[str, Any]) -> dict[str, Any]:
    return dict(manifest.get("fixtures") or {})