from __future__ import annotations import pytest from src.services.agent_nemotron_external_runner import ( NemotronExternalRunnerConfig, run_nemotron_external_replay, ) from src.services.agent_nemotron_replay_adapter import ( NEMOTRON_CONTRACT_TUNED_VARIANT_ID, ) @pytest.mark.asyncio async def test_external_runner_writes_valid_result_from_json_response(): results, report = await run_nemotron_external_replay( requests=[_request()], config=NemotronExternalRunnerConfig(api_key="test-key"), client=_FakeClient({ "choices": [ { "message": { "content": ( '{"proposed_action":"rollout restart checkout",' '"action_plan":["inspect deployment","restart"],' '"risk_level":"medium",' '"requires_human_approval":true,' '"blocked_by_policy":false}' ) } } ], "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30}, }), ) assert report.valid is True assert report.results == 1 assert results[0]["schema_version"] == "agent_nemotron_external_result_v1" assert results[0]["model_output"]["risk_level"] == "medium" assert results[0]["model_output"]["requires_human_approval"] is True assert results[0]["error"] is None assert results[0]["trace_events"][0]["usage"]["total_tokens"] == 30 assert results[0]["retry_used"] is False @pytest.mark.asyncio async def test_external_runner_fails_closed_on_invalid_model_output(): results, report = await run_nemotron_external_replay( requests=[_request()], config=NemotronExternalRunnerConfig(api_key="test-key"), client=_FakeClient({"choices": [{"message": {"content": "not json"}}]}), ) assert report.valid is False assert report.external_error_records == 1 assert results[0]["fallback_used"] is True assert results[0]["trace_complete"] is False assert results[0]["model_output"]["blocked_by_policy"] is True assert results[0]["model_output"]["requires_human_approval"] is True @pytest.mark.asyncio async def test_contract_tuned_runner_retries_missing_fields_once(): request = _request() request["metadata"]["candidate_variant_id"] = NEMOTRON_CONTRACT_TUNED_VARIANT_ID request["metadata"]["prompt_profile"] = "contract_tuned_v1" request["response_contract"] = { "required": [ "proposed_action", "action_plan", "risk_level", "requires_human_approval", "blocked_by_policy", ], } client = _FakeClient([ { "choices": [ { "message": { "content": '{"proposed_action":"restart checkout"}' } } ], "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, }, { "choices": [ { "message": { "content": ( '{"proposed_action":"collect diagnostics",' '"action_plan":["inspect logs"],' '"risk_level":"medium",' '"requires_human_approval":true,' '"blocked_by_policy":false}' ) } } ], "usage": {"prompt_tokens": 20, "completion_tokens": 30, "total_tokens": 50}, }, ]) results, report = await run_nemotron_external_replay( requests=[request], config=NemotronExternalRunnerConfig(api_key="test-key"), client=client, ) assert report.valid is True assert report.retry_used_records == 1 assert report.candidate_variant_id == NEMOTRON_CONTRACT_TUNED_VARIANT_ID assert client.calls == 2 assert "EXACT JSON CONTRACT" in client.payloads[0]["json"]["messages"][1]["content"] assert "Previous model output was invalid" in client.payloads[1]["json"]["messages"][1]["content"] assert results[0]["candidate_variant_id"] == NEMOTRON_CONTRACT_TUNED_VARIANT_ID assert results[0]["retry_used"] is True assert results[0]["first_error"].startswith("model_output_missing_fields:") assert results[0]["error"] is None @pytest.mark.asyncio async def test_external_runner_blocks_missing_key_before_network_call(): client = _FakeClient({}) results, report = await run_nemotron_external_replay( requests=[_request()], config=NemotronExternalRunnerConfig(api_key=""), client=client, ) assert results == [] assert report.valid is False assert "api_key_missing" in report.failures assert client.calls == 0 @pytest.mark.asyncio async def test_external_runner_rejects_self_grading_request_leak(): request = _request() request["incident_context"]["evaluation_labels"] = {"repair_success": True} results, report = await run_nemotron_external_replay( requests=[request], config=NemotronExternalRunnerConfig(api_key="test-key"), client=_FakeClient({}), ) assert results == [] assert report.valid is False assert any("request_self_grading_leak" in failure for failure in report.failures) class _FakeResponse: def __init__(self, payload: dict): self.payload = payload def raise_for_status(self) -> None: return None def json(self) -> dict: return self.payload class _FakeClient: def __init__(self, payload: dict | list[dict]): self.payload = payload self.payloads: list[dict] = [] self.calls = 0 async def post(self, *_args, **kwargs) -> _FakeResponse: self.calls += 1 self.payloads.append(kwargs) if isinstance(self.payload, list): return _FakeResponse(self.payload[self.calls - 1]) return _FakeResponse(self.payload) def _request() -> dict: return { "schema_version": "agent_nemotron_replay_request_v1", "run_id": "run", "incident_id": "INC-1", "candidate_id": "nemo_nemotron_fabric", "system_prompt": "Return JSON.", "user_prompt": "Incident context", "incident_context": {"alertname": "PodCrashLooping"}, "source_metadata": {"source": "test"}, "metadata": { "request_only": True, "not_replacement_evidence": True, }, }