119 lines
4.1 KiB
Python
119 lines
4.1 KiB
Python
from __future__ import annotations
|
|
|
|
from src.services.agent_nemotron_replay_adapter import build_nemotron_replay_request
|
|
from src.services.agent_nemotron_replay_preflight import (
|
|
evaluate_nemotron_external_runner_preflight,
|
|
)
|
|
|
|
|
|
def test_nemotron_preflight_accepts_aligned_request_pack():
|
|
fixture = _fixture()
|
|
candidate_input = _candidate_input()
|
|
request = build_nemotron_replay_request(candidate_input).to_dict()
|
|
|
|
report = evaluate_nemotron_external_runner_preflight(
|
|
fixtures=[fixture],
|
|
candidate_inputs=[candidate_input],
|
|
requests=[request],
|
|
).to_dict()
|
|
|
|
assert report["valid"] is True
|
|
assert report["fixtures"] == 1
|
|
assert report["candidate_inputs"] == 1
|
|
assert report["requests"] == 1
|
|
assert report["candidate_input_label_leak_records"] == 0
|
|
assert report["request_context_label_leak_records"] == 0
|
|
assert report["request_only_records"] == 1
|
|
assert report["not_replacement_evidence_records"] == 1
|
|
assert report["expected_action_marker_records"] == 1
|
|
assert report["sensitive_marker_records"] == 0
|
|
|
|
|
|
def test_nemotron_preflight_blocks_missing_request_and_label_leak():
|
|
fixture = _fixture()
|
|
candidate_input = _candidate_input()
|
|
candidate_input["incident_context"]["verification_result"] = "success"
|
|
|
|
report = evaluate_nemotron_external_runner_preflight(
|
|
fixtures=[fixture],
|
|
candidate_inputs=[candidate_input],
|
|
requests=[],
|
|
).to_dict()
|
|
|
|
assert report["valid"] is False
|
|
assert report["missing_requests"] == ["run::INC-1"]
|
|
assert report["candidate_input_label_leak_records"] == 1
|
|
assert any(
|
|
failure.startswith("candidate_input_label_leak")
|
|
for failure in report["failures"]
|
|
)
|
|
|
|
|
|
def test_nemotron_preflight_blocks_request_metadata_and_context_drift():
|
|
fixture = _fixture()
|
|
candidate_input = _candidate_input()
|
|
request = build_nemotron_replay_request(candidate_input).to_dict()
|
|
request["incident_context"]["affected_services"] = ["payments"]
|
|
request["metadata"]["not_replacement_evidence"] = False
|
|
|
|
report = evaluate_nemotron_external_runner_preflight(
|
|
fixtures=[fixture],
|
|
candidate_inputs=[candidate_input],
|
|
requests=[request],
|
|
).to_dict()
|
|
|
|
assert report["valid"] is False
|
|
assert report["not_replacement_evidence_records"] == 0
|
|
assert "request_missing_not_replacement_evidence:line_1" in report["failures"]
|
|
assert "input_request_context_mismatch:run::INC-1" in report["failures"]
|
|
|
|
|
|
def test_nemotron_preflight_blocks_sensitive_marker_context():
|
|
fixture = _fixture()
|
|
candidate_input = _candidate_input()
|
|
candidate_input["incident_context"]["evidence_summary"] = (
|
|
"/srv/app/.secrets/admin.htpasswd=***REDACTED***"
|
|
)
|
|
fixture["incident_context"] = candidate_input["incident_context"]
|
|
request = build_nemotron_replay_request(candidate_input).to_dict()
|
|
|
|
report = evaluate_nemotron_external_runner_preflight(
|
|
fixtures=[fixture],
|
|
candidate_inputs=[candidate_input],
|
|
requests=[request],
|
|
).to_dict()
|
|
|
|
assert report["valid"] is False
|
|
assert report["sensitive_marker_present_in_context"] is True
|
|
assert report["sensitive_marker_records"] == 1
|
|
assert "sensitive_marker_present_in_context:1" in report["failures"]
|
|
|
|
|
|
def _candidate_input() -> dict:
|
|
return {
|
|
"schema_version": "agent_replay_candidate_input_v1",
|
|
"run_id": "run",
|
|
"incident_id": "INC-1",
|
|
"incident_context": {
|
|
"alertname": "PodCrashLooping",
|
|
"severity": "P1",
|
|
"affected_services": ["checkout"],
|
|
},
|
|
"source_metadata": {"source": "test"},
|
|
}
|
|
|
|
|
|
def _fixture() -> dict:
|
|
return {
|
|
"schema_version": "agent_replay_fixture_v1",
|
|
"run_id": "run",
|
|
"incident_id": "INC-1",
|
|
"incident_context": _candidate_input()["incident_context"],
|
|
"evaluation_labels": {
|
|
"verification_result": "success",
|
|
"execution_success": True,
|
|
"expected_action_markers": ["rollout restart", "checkout"],
|
|
},
|
|
"source_metadata": {"source": "test"},
|
|
}
|