from __future__ import annotations from src.services.agent_nemotron_replay_failure_analysis import ( analyze_nemotron_replay_failure, ) def test_failure_analysis_summarizes_contract_hilt_latency_and_baseline_failures(): report = analyze_nemotron_replay_failure( external_results=[ { "incident_id": "INC-1", "error": None, "model_output": { "risk_level": "medium", "requires_human_approval": True, "blocked_by_policy": False, }, }, { "incident_id": "INC-2", "error": "model_output_missing_fields:action_plan,blocked_by_policy", "model_output": { "risk_level": "medium", "requires_human_approval": False, }, }, ], external_runner_report={ "requests": 2, "results": 2, "valid": False, "model": "nvidia/nemotron-3-super-120b-a12b", "external_error_records": 1, "fallback_used_records": 1, "trace_incomplete_records": 1, "p95_latency_ms": 120000, "avg_latency_ms": 70000, "failures": ["external_error:INC-2"], }, finalizer_report={ "decision": "blocked", "failures": ["candidate_result_errors_present:1"], "promotion_gate": { "approved": False, "decision": "blocked", "failures": ["candidate_result_errors_present:1"], }, }, scorecard_report={ "baseline_candidate_id": "openclaw_incumbent", "candidates": [ { "candidate_id": "nemo_nemotron_fabric", "beats_baseline": False, "hard_gates_pass": False, "gate_failures": [ "hitl_preserved_rate_below_100pct", "audit_trace_rate_below_0.95", ], "metrics": { "audit_trace_rate": 0.5, "hitl_preserved_rate": 0.5, }, "total_score": 0.3, }, { "candidate_id": "openclaw_incumbent", "gate_failures": [], "metrics": {}, "total_score": 0.7, }, ], }, generated_at="2026-06-01T00:00:00+00:00", ) aggregate = report["external_result_aggregate"] assert report["schema_version"] == "agent_nemotron_replay_failure_analysis_v1" assert report["decision"] == "blocked" assert report["not_replacement_evidence"] is True assert aggregate["model_output_missing_fields"] == { "action_plan": 1, "blocked_by_policy": 1, } assert aggregate["unsafe_hitl_records"] == 1 assert report["scorecard_delta"]["score_delta"] == -0.4 assert {mode["id"] for mode in report["primary_failure_modes"]} >= { "output_contract_incomplete", "audit_trace_below_gate", "hitl_below_gate", "latency_outside_existing_async_budget", "candidate_under_baseline", "promotion_gate_blocked", } assert ( report["candidate_variant_plan"]["next_variant_id"] == "nemo_nemotron_fabric_contract_tuned_v1" )