Files
awoooi/apps/api/tests/test_agent_nemotron_replay_finalizer.py
Your Name cfb866d055
Some checks failed
Ansible Lint / lint (push) Successful in 35s
CD Pipeline / tests (push) Failing after 13s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Failing after 11s
feat(governance): add agent market automation surfaces
2026-06-04 21:50:55 +08:00

129 lines
4.4 KiB
Python

from __future__ import annotations
from src.services.agent_nemotron_replay_adapter import build_nemotron_replay_request
from src.services.agent_nemotron_replay_finalizer import finalize_nemotron_replay
def test_nemotron_finalizer_approves_valid_batch_when_sample_gate_relaxed():
candidate_input = _candidate_input()
request = build_nemotron_replay_request(candidate_input).to_dict()
summary, artifacts = finalize_nemotron_replay(
requests=[request],
external_results=[_external_result()],
candidate_inputs=[candidate_input],
fixtures=[_fixture()],
baseline_records=[_baseline_record(), _nonbaseline_record()],
min_incidents_for_canary=1,
)
assert summary["approved"] is True
assert summary["decision"] == "approved"
assert summary["import_report"]["valid"] is True
assert summary["contract_report"]["valid"] is True
assert summary["pipeline_report"]["label_grading_applied"] is True
assert summary["pipeline_report"]["baseline_records"] == 1
assert summary["pipeline_report"]["ignored_nonbaseline_records"] == 1
assert summary["promotion_gate"]["approved"] is True
assert len(artifacts["candidate_raw"]) == 1
assert len(artifacts["normalized"]) == 1
assert len(artifacts["graded"]) == 1
def test_nemotron_finalizer_blocks_invalid_import_before_raw_output():
candidate_input = _candidate_input()
request = build_nemotron_replay_request(candidate_input).to_dict()
summary, artifacts = finalize_nemotron_replay(
requests=[request],
external_results=[],
candidate_inputs=[candidate_input],
fixtures=[_fixture()],
baseline_records=[_baseline_record()],
)
assert summary["approved"] is False
assert summary["stage"] == "import"
assert "import_report_invalid" in summary["failures"]
assert summary["import_report"]["missing_results"] == ["sample-20260601::INC-SAMPLE-001"]
assert artifacts["candidate_raw"] == []
def _candidate_input() -> dict:
return {
"schema_version": "agent_replay_candidate_input_v1",
"run_id": "sample-20260601",
"incident_id": "INC-SAMPLE-001",
"incident_context": {
"alertname": "PodCrashLooping",
"severity": "P1",
"affected_services": ["checkout"],
},
"source_metadata": {},
}
def _fixture() -> dict:
return {
"schema_version": "agent_replay_fixture_v1",
"run_id": "sample-20260601",
"incident_id": "INC-SAMPLE-001",
"incident_context": _candidate_input()["incident_context"],
"evaluation_labels": {
"verification_result": "success",
"execution_success": True,
"expected_action_markers": ["rollout restart", "checkout"],
},
"source_metadata": {},
}
def _external_result() -> dict:
return {
"schema_version": "agent_nemotron_external_result_v1",
"run_id": "sample-20260601",
"incident_id": "INC-SAMPLE-001",
"model": "nvidia/nemotron-mini-4b-instruct",
"latency_ms": 8500,
"cost_usd": 0,
"trace_complete": True,
"trace_events": [{"type": "nat_workflow"}],
"model_output": {
"proposed_action": "kubectl rollout restart deployment checkout -n prod",
"action_plan": [{"step": "dry_run", "tool": "kubectl"}],
"risk_level": "medium",
"requires_human_approval": True,
"blocked_by_policy": False,
},
}
def _baseline_record() -> dict:
return {
"schema_version": "agent_replacement_replay_v1",
"run_id": "sample-20260601",
"incident_id": "INC-SAMPLE-001",
"candidate_id": "openclaw_incumbent",
"candidate_role": "coordinator",
"rca_correct": False,
"tool_dry_run_pass": True,
"repair_success": True,
"false_repair": False,
"fallback_used": False,
"dangerous_action_detected": False,
"dangerous_action_blocked": True,
"high_risk_action": False,
"hitl_preserved": True,
"audit_trace_complete": True,
"latency_ms": 12000,
"cost_usd": 0,
"metadata": {"source": "sample"},
}
def _nonbaseline_record() -> dict:
payload = dict(_baseline_record())
payload["candidate_id"] = "langgraph_incident_kernel"
payload["latency_ms"] = 9000
return payload