from __future__ import annotations from src.services.agent_replay_promotion_gate import ( evaluate_agent_replay_promotion_gate, ) def test_promotion_gate_blocks_contract_probe_even_with_valid_contract(): report = evaluate_agent_replay_promotion_gate( candidate_id="nemo_nemotron_fabric", contract_report={ "candidate_id": "nemo_nemotron_fabric", "valid": True, "inputs": 50, "results": 50, }, raw_results=[ { "candidate_id": "nemo_nemotron_fabric", "error": "external_candidate_adapter_not_configured", "metadata": { "adapter_mode": "contract_probe", "not_replacement_evidence": True, }, } ], scorecard_report={ "candidates": [ { "candidate_id": "nemo_nemotron_fabric", "incidents": 50, "hard_gates_pass": True, "eligible_for_canary": True, "beats_baseline": True, "gate_failures": [], "total_score": 0.9, } ] }, ).to_dict() assert report["approved"] is False assert report["decision"] == "blocked" assert "not_replacement_evidence_present:1" in report["failures"] assert "contract_probe_result_present:1" in report["failures"] assert "candidate_result_errors_present:1" in report["failures"] assert "nemotron_import_report_missing" in report["failures"] def test_promotion_gate_approves_real_replay_when_all_gates_pass(): report = evaluate_agent_replay_promotion_gate( candidate_id="langgraph_incident_kernel", contract_report={ "candidate_id": "langgraph_incident_kernel", "valid": True, "inputs": 50, "results": 50, }, raw_results=[ { "candidate_id": "langgraph_incident_kernel", "error": None, "metadata": {"adapter_mode": "real_offline_replay"}, } ], scorecard_report={ "candidates": [ { "candidate_id": "langgraph_incident_kernel", "incidents": 50, "hard_gates_pass": True, "eligible_for_canary": True, "beats_baseline": True, "gate_failures": [], "total_score": 0.9, } ] }, ).to_dict() assert report["approved"] is True assert report["decision"] == "approved" assert report["failures"] == [] def test_promotion_gate_blocks_small_sample_and_missing_scorecard(): report = evaluate_agent_replay_promotion_gate( candidate_id="openai_agents_sdk_coordinator", contract_report={ "candidate_id": "openai_agents_sdk_coordinator", "valid": True, }, raw_results=[{"candidate_id": "openai_agents_sdk_coordinator"}], scorecard_report={"candidates": []}, ).to_dict() assert report["approved"] is False assert "scorecard_candidate_missing" in report["failures"] def test_promotion_gate_requires_nemotron_import_report(): report = evaluate_agent_replay_promotion_gate( candidate_id="nemo_nemotron_fabric", contract_report={ "candidate_id": "nemo_nemotron_fabric", "valid": True, "inputs": 50, "results": 50, }, raw_results=[ { "candidate_id": "nemo_nemotron_fabric", "error": None, "metadata": {"adapter_mode": "real_offline_replay"}, } ], scorecard_report={ "candidates": [ { "candidate_id": "nemo_nemotron_fabric", "incidents": 50, "hard_gates_pass": True, "eligible_for_canary": True, "beats_baseline": True, "gate_failures": [], "total_score": 0.9, } ] }, ).to_dict() assert report["approved"] is False assert "nemotron_import_report_missing" in report["failures"] assert report["evidence"]["import_report"] == {"provided": False} def test_promotion_gate_accepts_valid_nemotron_import_report(): report = evaluate_agent_replay_promotion_gate( candidate_id="nemo_nemotron_fabric", contract_report={ "candidate_id": "nemo_nemotron_fabric", "valid": True, "inputs": 1, "results": 1, }, raw_results=[ { "candidate_id": "nemo_nemotron_fabric", "error": None, "metadata": {"adapter_mode": "real_offline_replay"}, } ], import_report={ "schema_version": "agent_nemotron_import_report_v1", "candidate_id": "nemo_nemotron_fabric", "external_results": 1, "imported_results": 1, "requests": 1, "valid": True, "failures": [], "duplicate_results": [], "missing_results": [], "unexpected_results": [], "external_error_records": 0, "fallback_used_records": 0, "incomplete_trace_records": 0, "total_cost_usd": 0, "avg_latency_ms": 1000, "p95_latency_ms": 1000, }, scorecard_report={ "candidates": [ { "candidate_id": "nemo_nemotron_fabric", "incidents": 50, "hard_gates_pass": True, "eligible_for_canary": True, "beats_baseline": True, "gate_failures": [], "total_score": 0.9, } ] }, ).to_dict() assert report["approved"] is True assert report["evidence"]["import_report"]["provided"] is True assert report["evidence"]["import_report"]["valid"] is True def test_promotion_gate_blocks_bad_import_report_counts(): report = evaluate_agent_replay_promotion_gate( candidate_id="nemo_nemotron_fabric", contract_report={ "candidate_id": "nemo_nemotron_fabric", "valid": True, "inputs": 2, "results": 2, }, raw_results=[ { "candidate_id": "nemo_nemotron_fabric", "error": None, "metadata": {"adapter_mode": "real_offline_replay"}, } ], import_report={ "schema_version": "agent_nemotron_import_report_v1", "candidate_id": "nemo_nemotron_fabric", "external_results": 1, "imported_results": 1, "requests": 1, "valid": False, "failures": ["missing_external_results:run::INC-2"], "duplicate_results": [], "missing_results": ["run::INC-2"], "unexpected_results": [], "external_error_records": 1, "fallback_used_records": 0, "incomplete_trace_records": 0, }, scorecard_report={ "candidates": [ { "candidate_id": "nemo_nemotron_fabric", "incidents": 50, "hard_gates_pass": True, "eligible_for_canary": True, "beats_baseline": True, "gate_failures": [], "total_score": 0.9, } ] }, ).to_dict() assert report["approved"] is False assert "import_report_invalid" in report["failures"] assert "import_report_contract_result_count_mismatch:imported=1;contract=2" in report["failures"] assert "import_report_contract_input_count_mismatch:requests=1;contract=2" in report["failures"] assert "import_report_missing_results_present:1" in report["failures"] assert "import_report_external_errors_present:1" in report["failures"]