from __future__ import annotations from src.services.agent_nemotron_external_runner_readiness import ( evaluate_nemotron_external_runner_readiness, ) def test_readiness_accepts_sanitized_ready_pack(): report = evaluate_nemotron_external_runner_readiness( manifest=_manifest(), sanitize_report=_sanitize_report(), sanitized_preflight=_preflight(), ).to_dict() assert report["ready"] is True assert report["decision"] == "ready_for_approval" assert report["gates"]["external_execution_still_requires_approval"] is True assert report["counts"]["manifest"]["requests"] == 50 assert report["safety"]["raw_artifacts_committed"] is False def test_readiness_blocks_unsanitized_or_invalid_preflight(): preflight = _preflight() preflight["valid"] = False preflight["failures"] = ["sensitive_marker_present_in_context:4"] preflight["sensitive_marker_present_in_context"] = True preflight["sensitive_marker_records"] = 4 report = evaluate_nemotron_external_runner_readiness( manifest=_manifest(), sanitize_report=_sanitize_report(), sanitized_preflight=preflight, ).to_dict() assert report["ready"] is False assert report["decision"] == "blocked" assert "sanitized_preflight_invalid" in report["failures"] assert "sensitive_context_markers_present" in report["failures"] def test_readiness_blocks_count_drift_and_external_call_drift(): manifest = _manifest() manifest["request_pack"]["records"] = 49 manifest["external_runner_output"]["required_records"] = 49 manifest["external_calls_performed_by_codex"] = True report = evaluate_nemotron_external_runner_readiness( manifest=manifest, sanitize_report=_sanitize_report(), sanitized_preflight=_preflight(), ).to_dict() assert report["ready"] is False assert "external_calls_already_performed_by_codex" in report["failures"] assert "record_counts_mismatch" in report["failures"] assert report["gates"]["counts_match_across_reports"] is False def _manifest() -> dict: return { "schema_version": "agent_nemotron_external_runner_manifest_v1", "candidate_id": "nemo_nemotron_fabric", "run_id": "nemotron-replay-prod-20260601165413", "status": "ready_for_approved_external_offline_runner_with_sanitized_pack", "external_calls_performed_by_codex": False, "approval_required_before_external_execution": True, "raw_artifacts_committed": False, "sanitize_report": "docs/evaluations/sanitize.json", "external_runner_preflight_report_sanitized": "docs/evaluations/preflight.json", "request_pack": { "local_path": "/tmp/run-sanitized-nemotron-requests.jsonl", "source_unsanitized_path": "/tmp/run-nemotron-requests.local.jsonl", "records": 50, "request_only_records": 50, "not_replacement_evidence_records": 50, "label_leak_records": 0, "sensitive_marker_records": 0, }, "candidate_inputs": { "local_path": "/tmp/run-sanitized-candidate-inputs.jsonl", "source_unsanitized_path": "/tmp/run-candidate-inputs.jsonl", "records": 50, "label_leak_records": 0, }, "fixtures": { "local_path": "/tmp/run-sanitized-fixtures.jsonl", "source_unsanitized_path": "/tmp/run-fixtures.jsonl", "records": 50, "expected_action_marker_records": 17, "operator_only": True, }, "external_runner_output": { "required_path": "/tmp/run-external-results.jsonl", "schema": "docs/schemas/agent_nemotron_external_result_v1.schema.json", "required_records": 50, "one_result_per_request": True, "forbidden_model_output_fields": [ "evaluation_labels", "verification_result", "execution_success", "execution_error", "self_healing_score", "rca_correct", "tool_dry_run_pass", "repair_success", "false_repair", ], }, "preferred_post_external_run_command": ( "apps/api/.venv/bin/python scripts/agents/nemotron-finalize-replay.py" ), } def _sanitize_report() -> dict: return { "schema_version": "agent_nemotron_request_pack_sanitize_report_v1", "fixtures": 50, "candidate_inputs": 50, "requests": 50, "valid": True, "changed_fixture_records": 50, "sensitive_marker_records_before": 4, "sensitive_marker_records_after": 0, "marker_distribution_before": {"secret": 4}, "marker_distribution_after": {}, "preflight_valid": True, "preflight_failures": [], "failures": [], } def _preflight() -> dict: return { "schema_version": "agent_nemotron_external_runner_preflight_v1", "candidate_id": "nemo_nemotron_fabric", "fixtures": 50, "candidate_inputs": 50, "requests": 50, "valid": True, "failures": [], "duplicate_fixtures": [], "duplicate_candidate_inputs": [], "duplicate_requests": [], "missing_candidate_inputs": [], "missing_requests": [], "unexpected_candidate_inputs": [], "unexpected_requests": [], "candidate_input_label_leak_records": 0, "request_context_label_leak_records": 0, "request_only_records": 50, "not_replacement_evidence_records": 50, "expected_action_marker_records": 17, "sensitive_marker_present_in_context": False, "sensitive_marker_records": 0, "sensitive_marker_distribution": {}, }