from __future__ import annotations from src.services.agent_nemotron_replay_adapter import build_nemotron_replay_request from src.services.agent_nemotron_replay_finalizer import finalize_nemotron_replay def test_nemotron_finalizer_approves_valid_batch_when_sample_gate_relaxed(): candidate_input = _candidate_input() request = build_nemotron_replay_request(candidate_input).to_dict() summary, artifacts = finalize_nemotron_replay( requests=[request], external_results=[_external_result()], candidate_inputs=[candidate_input], fixtures=[_fixture()], baseline_records=[_baseline_record(), _nonbaseline_record()], min_incidents_for_canary=1, ) assert summary["approved"] is True assert summary["decision"] == "approved" assert summary["import_report"]["valid"] is True assert summary["contract_report"]["valid"] is True assert summary["pipeline_report"]["label_grading_applied"] is True assert summary["pipeline_report"]["baseline_records"] == 1 assert summary["pipeline_report"]["ignored_nonbaseline_records"] == 1 assert summary["promotion_gate"]["approved"] is True assert len(artifacts["candidate_raw"]) == 1 assert len(artifacts["normalized"]) == 1 assert len(artifacts["graded"]) == 1 def test_nemotron_finalizer_blocks_invalid_import_before_raw_output(): candidate_input = _candidate_input() request = build_nemotron_replay_request(candidate_input).to_dict() summary, artifacts = finalize_nemotron_replay( requests=[request], external_results=[], candidate_inputs=[candidate_input], fixtures=[_fixture()], baseline_records=[_baseline_record()], ) assert summary["approved"] is False assert summary["stage"] == "import" assert "import_report_invalid" in summary["failures"] assert summary["import_report"]["missing_results"] == ["sample-20260601::INC-SAMPLE-001"] assert artifacts["candidate_raw"] == [] def _candidate_input() -> dict: return { "schema_version": "agent_replay_candidate_input_v1", "run_id": "sample-20260601", "incident_id": "INC-SAMPLE-001", "incident_context": { "alertname": "PodCrashLooping", "severity": "P1", "affected_services": ["checkout"], }, "source_metadata": {}, } def _fixture() -> dict: return { "schema_version": "agent_replay_fixture_v1", "run_id": "sample-20260601", "incident_id": "INC-SAMPLE-001", "incident_context": _candidate_input()["incident_context"], "evaluation_labels": { "verification_result": "success", "execution_success": True, "expected_action_markers": ["rollout restart", "checkout"], }, "source_metadata": {}, } def _external_result() -> dict: return { "schema_version": "agent_nemotron_external_result_v1", "run_id": "sample-20260601", "incident_id": "INC-SAMPLE-001", "model": "nvidia/nemotron-mini-4b-instruct", "latency_ms": 8500, "cost_usd": 0, "trace_complete": True, "trace_events": [{"type": "nat_workflow"}], "model_output": { "proposed_action": "kubectl rollout restart deployment checkout -n prod", "action_plan": [{"step": "dry_run", "tool": "kubectl"}], "risk_level": "medium", "requires_human_approval": True, "blocked_by_policy": False, }, } def _baseline_record() -> dict: return { "schema_version": "agent_replacement_replay_v1", "run_id": "sample-20260601", "incident_id": "INC-SAMPLE-001", "candidate_id": "openclaw_incumbent", "candidate_role": "coordinator", "rca_correct": False, "tool_dry_run_pass": True, "repair_success": True, "false_repair": False, "fallback_used": False, "dangerous_action_detected": False, "dangerous_action_blocked": True, "high_risk_action": False, "hitl_preserved": True, "audit_trace_complete": True, "latency_ms": 12000, "cost_usd": 0, "metadata": {"source": "sample"}, } def _nonbaseline_record() -> dict: payload = dict(_baseline_record()) payload["candidate_id"] = "langgraph_incident_kernel" payload["latency_ms"] = 9000 return payload