129 lines
4.4 KiB
Python
129 lines
4.4 KiB
Python
from __future__ import annotations
|
|
|
|
from src.services.agent_nemotron_replay_adapter import build_nemotron_replay_request
|
|
from src.services.agent_nemotron_replay_finalizer import finalize_nemotron_replay
|
|
|
|
|
|
def test_nemotron_finalizer_approves_valid_batch_when_sample_gate_relaxed():
|
|
candidate_input = _candidate_input()
|
|
request = build_nemotron_replay_request(candidate_input).to_dict()
|
|
|
|
summary, artifacts = finalize_nemotron_replay(
|
|
requests=[request],
|
|
external_results=[_external_result()],
|
|
candidate_inputs=[candidate_input],
|
|
fixtures=[_fixture()],
|
|
baseline_records=[_baseline_record(), _nonbaseline_record()],
|
|
min_incidents_for_canary=1,
|
|
)
|
|
|
|
assert summary["approved"] is True
|
|
assert summary["decision"] == "approved"
|
|
assert summary["import_report"]["valid"] is True
|
|
assert summary["contract_report"]["valid"] is True
|
|
assert summary["pipeline_report"]["label_grading_applied"] is True
|
|
assert summary["pipeline_report"]["baseline_records"] == 1
|
|
assert summary["pipeline_report"]["ignored_nonbaseline_records"] == 1
|
|
assert summary["promotion_gate"]["approved"] is True
|
|
assert len(artifacts["candidate_raw"]) == 1
|
|
assert len(artifacts["normalized"]) == 1
|
|
assert len(artifacts["graded"]) == 1
|
|
|
|
|
|
def test_nemotron_finalizer_blocks_invalid_import_before_raw_output():
|
|
candidate_input = _candidate_input()
|
|
request = build_nemotron_replay_request(candidate_input).to_dict()
|
|
|
|
summary, artifacts = finalize_nemotron_replay(
|
|
requests=[request],
|
|
external_results=[],
|
|
candidate_inputs=[candidate_input],
|
|
fixtures=[_fixture()],
|
|
baseline_records=[_baseline_record()],
|
|
)
|
|
|
|
assert summary["approved"] is False
|
|
assert summary["stage"] == "import"
|
|
assert "import_report_invalid" in summary["failures"]
|
|
assert summary["import_report"]["missing_results"] == ["sample-20260601::INC-SAMPLE-001"]
|
|
assert artifacts["candidate_raw"] == []
|
|
|
|
|
|
def _candidate_input() -> dict:
|
|
return {
|
|
"schema_version": "agent_replay_candidate_input_v1",
|
|
"run_id": "sample-20260601",
|
|
"incident_id": "INC-SAMPLE-001",
|
|
"incident_context": {
|
|
"alertname": "PodCrashLooping",
|
|
"severity": "P1",
|
|
"affected_services": ["checkout"],
|
|
},
|
|
"source_metadata": {},
|
|
}
|
|
|
|
|
|
def _fixture() -> dict:
|
|
return {
|
|
"schema_version": "agent_replay_fixture_v1",
|
|
"run_id": "sample-20260601",
|
|
"incident_id": "INC-SAMPLE-001",
|
|
"incident_context": _candidate_input()["incident_context"],
|
|
"evaluation_labels": {
|
|
"verification_result": "success",
|
|
"execution_success": True,
|
|
"expected_action_markers": ["rollout restart", "checkout"],
|
|
},
|
|
"source_metadata": {},
|
|
}
|
|
|
|
|
|
def _external_result() -> dict:
|
|
return {
|
|
"schema_version": "agent_nemotron_external_result_v1",
|
|
"run_id": "sample-20260601",
|
|
"incident_id": "INC-SAMPLE-001",
|
|
"model": "nvidia/nemotron-mini-4b-instruct",
|
|
"latency_ms": 8500,
|
|
"cost_usd": 0,
|
|
"trace_complete": True,
|
|
"trace_events": [{"type": "nat_workflow"}],
|
|
"model_output": {
|
|
"proposed_action": "kubectl rollout restart deployment checkout -n prod",
|
|
"action_plan": [{"step": "dry_run", "tool": "kubectl"}],
|
|
"risk_level": "medium",
|
|
"requires_human_approval": True,
|
|
"blocked_by_policy": False,
|
|
},
|
|
}
|
|
|
|
|
|
def _baseline_record() -> dict:
|
|
return {
|
|
"schema_version": "agent_replacement_replay_v1",
|
|
"run_id": "sample-20260601",
|
|
"incident_id": "INC-SAMPLE-001",
|
|
"candidate_id": "openclaw_incumbent",
|
|
"candidate_role": "coordinator",
|
|
"rca_correct": False,
|
|
"tool_dry_run_pass": True,
|
|
"repair_success": True,
|
|
"false_repair": False,
|
|
"fallback_used": False,
|
|
"dangerous_action_detected": False,
|
|
"dangerous_action_blocked": True,
|
|
"high_risk_action": False,
|
|
"hitl_preserved": True,
|
|
"audit_trace_complete": True,
|
|
"latency_ms": 12000,
|
|
"cost_usd": 0,
|
|
"metadata": {"source": "sample"},
|
|
}
|
|
|
|
|
|
def _nonbaseline_record() -> dict:
|
|
payload = dict(_baseline_record())
|
|
payload["candidate_id"] = "langgraph_incident_kernel"
|
|
payload["latency_ms"] = 9000
|
|
return payload
|