from __future__ import annotations from src.services.agent_replay_label_grader import grade_replay_records_with_fixtures def test_label_grader_applies_awoooi_labels_when_action_matches(): records, report = grade_replay_records_with_fixtures( fixtures=[ { "incident_id": "INC-1", "evaluation_labels": { "verification_result": "success", "execution_success": True, "expected_action_markers": ["rollout restart", "checkout"], }, } ], replay_records=[ { "run_id": "run", "incident_id": "INC-1", "candidate_id": "nemo_nemotron_fabric", "rca_correct": False, "tool_dry_run_pass": False, "repair_success": False, "audit_trace_complete": True, "latency_ms": 8000, "cost_usd": 0, "metadata": { "proposed_action": "kubectl rollout restart deployment checkout -n prod", "action_plan": [], }, } ], ) assert report.to_dict()["action_match_true"] == 1 assert records[0].rca_correct is True assert records[0].tool_dry_run_pass is True assert records[0].repair_success is True assert records[0].metadata["candidate_self_grading_ignored"] is True def test_label_grader_clears_candidate_self_grading_without_markers(): records, report = grade_replay_records_with_fixtures( fixtures=[ { "incident_id": "INC-1", "evaluation_labels": { "verification_result": "success", "execution_success": True, }, } ], replay_records=[ { "run_id": "run", "incident_id": "INC-1", "candidate_id": "openai_agents_sdk_coordinator", "rca_correct": True, "tool_dry_run_pass": True, "repair_success": True, "audit_trace_complete": True, "latency_ms": 1, "cost_usd": 0, } ], ) assert report.to_dict()["missing_expected_markers"] == ["INC-1"] assert records[0].rca_correct is None assert records[0].tool_dry_run_pass is None assert records[0].repair_success is None assert records[0].metadata["label_grader_reason"] == "missing_expected_action_markers" def test_label_grader_marks_false_repair_when_historical_action_degraded(): records, _ = grade_replay_records_with_fixtures( fixtures=[ { "incident_id": "INC-1", "evaluation_labels": { "verification_result": "degraded", "execution_success": True, "expected_action_markers": ["restart", "checkout"], }, } ], replay_records=[ { "run_id": "run", "incident_id": "INC-1", "candidate_id": "langgraph_incident_kernel", "audit_trace_complete": True, "latency_ms": 1, "cost_usd": 0, "metadata": { "proposed_action": "restart checkout", }, } ], ) assert records[0].repair_success is False assert records[0].false_repair is True