50 lines
1.5 KiB
Python
50 lines
1.5 KiB
Python
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from src.services.agent_replay_input import (
|
|
assert_no_evaluation_label_leak,
|
|
build_candidate_input_from_fixture,
|
|
)
|
|
|
|
|
|
def test_candidate_input_strips_evaluation_labels():
|
|
candidate_input = build_candidate_input_from_fixture({
|
|
"schema_version": "agent_replay_fixture_v1",
|
|
"run_id": "fixtures",
|
|
"incident_id": "INC-001",
|
|
"incident_context": {
|
|
"alertname": "PodCrashLooping",
|
|
"severity": "P1",
|
|
},
|
|
"evaluation_labels": {
|
|
"verification_result": "success",
|
|
"execution_success": True,
|
|
},
|
|
"source_metadata": {
|
|
"created_at": "2026-06-01T12:00:00+08:00",
|
|
"agent_turn_count": 4,
|
|
"internal_answer": "must-not-leak",
|
|
},
|
|
}).to_dict()
|
|
|
|
assert candidate_input["schema_version"] == "agent_replay_candidate_input_v1"
|
|
assert "evaluation_labels" not in candidate_input
|
|
assert "verification_result" not in candidate_input["incident_context"]
|
|
assert candidate_input["source_metadata"] == {
|
|
"created_at": "2026-06-01T12:00:00+08:00",
|
|
"agent_turn_count": 4,
|
|
}
|
|
assert_no_evaluation_label_leak(candidate_input)
|
|
|
|
|
|
def test_candidate_input_leak_detector_rejects_answer_key_fields():
|
|
with pytest.raises(ValueError, match="evaluation label"):
|
|
assert_no_evaluation_label_leak({
|
|
"incident_context": {
|
|
"nested": {
|
|
"verification_result": "success",
|
|
}
|
|
}
|
|
})
|