75 lines
2.5 KiB
Python
75 lines
2.5 KiB
Python
from __future__ import annotations
|
|
|
|
from src.services.agent_replay_contract import validate_candidate_replay_contract
|
|
|
|
|
|
def _input(incident_id: str, run_id: str = "run"):
|
|
return {
|
|
"schema_version": "agent_replay_candidate_input_v1",
|
|
"run_id": run_id,
|
|
"incident_id": incident_id,
|
|
"incident_context": {"alertname": "PodCrashLooping"},
|
|
"source_metadata": {},
|
|
}
|
|
|
|
|
|
def _result(incident_id: str, candidate_id: str = "nemo_nemotron_fabric", run_id: str = "run", **overrides):
|
|
payload = {
|
|
"schema_version": "agent_candidate_replay_result_v1",
|
|
"run_id": run_id,
|
|
"incident_id": incident_id,
|
|
"candidate_id": candidate_id,
|
|
"candidate_role": "agent_fabric",
|
|
"proposed_action": "collect logs",
|
|
"risk_level": "low",
|
|
"requires_human_approval": False,
|
|
"trace_complete": True,
|
|
"trace_events": [{"type": "model_call"}],
|
|
"latency_ms": 10,
|
|
"cost_usd": 0,
|
|
}
|
|
payload.update(overrides)
|
|
return payload
|
|
|
|
|
|
def test_contract_accepts_one_to_one_candidate_results():
|
|
report = validate_candidate_replay_contract(
|
|
candidate_inputs=[_input("INC-1"), _input("INC-2")],
|
|
candidate_results=[_result("INC-1"), _result("INC-2")],
|
|
expected_candidate_id="nemo_nemotron_fabric",
|
|
).to_dict()
|
|
|
|
assert report["valid"] is True
|
|
assert report["failures"] == []
|
|
assert report["inputs"] == 2
|
|
assert report["results"] == 2
|
|
|
|
|
|
def test_contract_rejects_missing_extra_and_run_id_mismatch():
|
|
report = validate_candidate_replay_contract(
|
|
candidate_inputs=[_input("INC-1"), _input("INC-2", run_id="expected")],
|
|
candidate_results=[_result("INC-2", run_id="actual"), _result("INC-3")],
|
|
expected_candidate_id="nemo_nemotron_fabric",
|
|
).to_dict()
|
|
|
|
assert report["valid"] is False
|
|
assert "missing_results:INC-1" in report["failures"]
|
|
assert "unexpected_results:INC-3" in report["failures"]
|
|
assert "run_id_mismatch:INC-2:expected=expected;actual=actual" in report["failures"]
|
|
|
|
|
|
def test_contract_rejects_label_leak_in_candidate_result_metadata():
|
|
report = validate_candidate_replay_contract(
|
|
candidate_inputs=[_input("INC-1")],
|
|
candidate_results=[
|
|
_result(
|
|
"INC-1",
|
|
metadata={"evaluation_labels": {"verification_result": "success"}},
|
|
)
|
|
],
|
|
expected_candidate_id="nemo_nemotron_fabric",
|
|
).to_dict()
|
|
|
|
assert report["valid"] is False
|
|
assert any(failure.startswith("label_leak:") for failure in report["failures"])
|