Files
awoooi/apps/api/tests/test_agent_replay_promotion_gate.py
Your Name cfb866d055
Some checks failed
Ansible Lint / lint (push) Successful in 35s
CD Pipeline / tests (push) Failing after 13s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Failing after 11s
feat(governance): add agent market automation surfaces
2026-06-04 21:50:55 +08:00

243 lines
8.1 KiB
Python

from __future__ import annotations
from src.services.agent_replay_promotion_gate import (
evaluate_agent_replay_promotion_gate,
)
def test_promotion_gate_blocks_contract_probe_even_with_valid_contract():
report = evaluate_agent_replay_promotion_gate(
candidate_id="nemo_nemotron_fabric",
contract_report={
"candidate_id": "nemo_nemotron_fabric",
"valid": True,
"inputs": 50,
"results": 50,
},
raw_results=[
{
"candidate_id": "nemo_nemotron_fabric",
"error": "external_candidate_adapter_not_configured",
"metadata": {
"adapter_mode": "contract_probe",
"not_replacement_evidence": True,
},
}
],
scorecard_report={
"candidates": [
{
"candidate_id": "nemo_nemotron_fabric",
"incidents": 50,
"hard_gates_pass": True,
"eligible_for_canary": True,
"beats_baseline": True,
"gate_failures": [],
"total_score": 0.9,
}
]
},
).to_dict()
assert report["approved"] is False
assert report["decision"] == "blocked"
assert "not_replacement_evidence_present:1" in report["failures"]
assert "contract_probe_result_present:1" in report["failures"]
assert "candidate_result_errors_present:1" in report["failures"]
assert "nemotron_import_report_missing" in report["failures"]
def test_promotion_gate_approves_real_replay_when_all_gates_pass():
report = evaluate_agent_replay_promotion_gate(
candidate_id="langgraph_incident_kernel",
contract_report={
"candidate_id": "langgraph_incident_kernel",
"valid": True,
"inputs": 50,
"results": 50,
},
raw_results=[
{
"candidate_id": "langgraph_incident_kernel",
"error": None,
"metadata": {"adapter_mode": "real_offline_replay"},
}
],
scorecard_report={
"candidates": [
{
"candidate_id": "langgraph_incident_kernel",
"incidents": 50,
"hard_gates_pass": True,
"eligible_for_canary": True,
"beats_baseline": True,
"gate_failures": [],
"total_score": 0.9,
}
]
},
).to_dict()
assert report["approved"] is True
assert report["decision"] == "approved"
assert report["failures"] == []
def test_promotion_gate_blocks_small_sample_and_missing_scorecard():
report = evaluate_agent_replay_promotion_gate(
candidate_id="openai_agents_sdk_coordinator",
contract_report={
"candidate_id": "openai_agents_sdk_coordinator",
"valid": True,
},
raw_results=[{"candidate_id": "openai_agents_sdk_coordinator"}],
scorecard_report={"candidates": []},
).to_dict()
assert report["approved"] is False
assert "scorecard_candidate_missing" in report["failures"]
def test_promotion_gate_requires_nemotron_import_report():
report = evaluate_agent_replay_promotion_gate(
candidate_id="nemo_nemotron_fabric",
contract_report={
"candidate_id": "nemo_nemotron_fabric",
"valid": True,
"inputs": 50,
"results": 50,
},
raw_results=[
{
"candidate_id": "nemo_nemotron_fabric",
"error": None,
"metadata": {"adapter_mode": "real_offline_replay"},
}
],
scorecard_report={
"candidates": [
{
"candidate_id": "nemo_nemotron_fabric",
"incidents": 50,
"hard_gates_pass": True,
"eligible_for_canary": True,
"beats_baseline": True,
"gate_failures": [],
"total_score": 0.9,
}
]
},
).to_dict()
assert report["approved"] is False
assert "nemotron_import_report_missing" in report["failures"]
assert report["evidence"]["import_report"] == {"provided": False}
def test_promotion_gate_accepts_valid_nemotron_import_report():
report = evaluate_agent_replay_promotion_gate(
candidate_id="nemo_nemotron_fabric",
contract_report={
"candidate_id": "nemo_nemotron_fabric",
"valid": True,
"inputs": 1,
"results": 1,
},
raw_results=[
{
"candidate_id": "nemo_nemotron_fabric",
"error": None,
"metadata": {"adapter_mode": "real_offline_replay"},
}
],
import_report={
"schema_version": "agent_nemotron_import_report_v1",
"candidate_id": "nemo_nemotron_fabric",
"external_results": 1,
"imported_results": 1,
"requests": 1,
"valid": True,
"failures": [],
"duplicate_results": [],
"missing_results": [],
"unexpected_results": [],
"external_error_records": 0,
"fallback_used_records": 0,
"incomplete_trace_records": 0,
"total_cost_usd": 0,
"avg_latency_ms": 1000,
"p95_latency_ms": 1000,
},
scorecard_report={
"candidates": [
{
"candidate_id": "nemo_nemotron_fabric",
"incidents": 50,
"hard_gates_pass": True,
"eligible_for_canary": True,
"beats_baseline": True,
"gate_failures": [],
"total_score": 0.9,
}
]
},
).to_dict()
assert report["approved"] is True
assert report["evidence"]["import_report"]["provided"] is True
assert report["evidence"]["import_report"]["valid"] is True
def test_promotion_gate_blocks_bad_import_report_counts():
report = evaluate_agent_replay_promotion_gate(
candidate_id="nemo_nemotron_fabric",
contract_report={
"candidate_id": "nemo_nemotron_fabric",
"valid": True,
"inputs": 2,
"results": 2,
},
raw_results=[
{
"candidate_id": "nemo_nemotron_fabric",
"error": None,
"metadata": {"adapter_mode": "real_offline_replay"},
}
],
import_report={
"schema_version": "agent_nemotron_import_report_v1",
"candidate_id": "nemo_nemotron_fabric",
"external_results": 1,
"imported_results": 1,
"requests": 1,
"valid": False,
"failures": ["missing_external_results:run::INC-2"],
"duplicate_results": [],
"missing_results": ["run::INC-2"],
"unexpected_results": [],
"external_error_records": 1,
"fallback_used_records": 0,
"incomplete_trace_records": 0,
},
scorecard_report={
"candidates": [
{
"candidate_id": "nemo_nemotron_fabric",
"incidents": 50,
"hard_gates_pass": True,
"eligible_for_canary": True,
"beats_baseline": True,
"gate_failures": [],
"total_score": 0.9,
}
]
},
).to_dict()
assert report["approved"] is False
assert "import_report_invalid" in report["failures"]
assert "import_report_contract_result_count_mismatch:imported=1;contract=2" in report["failures"]
assert "import_report_contract_input_count_mismatch:requests=1;contract=2" in report["failures"]
assert "import_report_missing_results_present:1" in report["failures"]
assert "import_report_external_errors_present:1" in report["failures"]