awoooi/apps/api/tests/test_agent_replay_input.py

from __future__ import annotations

import pytest

from src.services.agent_replay_input import (
    assert_no_evaluation_label_leak,
    build_candidate_input_from_fixture,
)


def test_candidate_input_strips_evaluation_labels():
    candidate_input = build_candidate_input_from_fixture({
        "schema_version": "agent_replay_fixture_v1",
        "run_id": "fixtures",
        "incident_id": "INC-001",
        "incident_context": {
            "alertname": "PodCrashLooping",
            "severity": "P1",
        },
        "evaluation_labels": {
            "verification_result": "success",
            "execution_success": True,
        },
        "source_metadata": {
            "created_at": "2026-06-01T12:00:00+08:00",
            "agent_turn_count": 4,
            "internal_answer": "must-not-leak",
        },
    }).to_dict()

    assert candidate_input["schema_version"] == "agent_replay_candidate_input_v1"
    assert "evaluation_labels" not in candidate_input
    assert "verification_result" not in candidate_input["incident_context"]
    assert candidate_input["source_metadata"] == {
        "created_at": "2026-06-01T12:00:00+08:00",
        "agent_turn_count": 4,
    }
    assert_no_evaluation_label_leak(candidate_input)


def test_candidate_input_leak_detector_rejects_answer_key_fields():
    with pytest.raises(ValueError, match="evaluation label"):
        assert_no_evaluation_label_leak({
            "incident_context": {
                "nested": {
                    "verification_result": "success",
                }
            }
        })