Files
awoooi/apps/api/tests/test_agent_nemotron_external_runner.py
Your Name cfb866d055
Some checks failed
Ansible Lint / lint (push) Successful in 35s
CD Pipeline / tests (push) Failing after 13s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Failing after 11s
feat(governance): add agent market automation surfaces
2026-06-04 21:50:55 +08:00

194 lines
6.5 KiB
Python

from __future__ import annotations
import pytest
from src.services.agent_nemotron_external_runner import (
NemotronExternalRunnerConfig,
run_nemotron_external_replay,
)
from src.services.agent_nemotron_replay_adapter import (
NEMOTRON_CONTRACT_TUNED_VARIANT_ID,
)
@pytest.mark.asyncio
async def test_external_runner_writes_valid_result_from_json_response():
results, report = await run_nemotron_external_replay(
requests=[_request()],
config=NemotronExternalRunnerConfig(api_key="test-key"),
client=_FakeClient({
"choices": [
{
"message": {
"content": (
'{"proposed_action":"rollout restart checkout",'
'"action_plan":["inspect deployment","restart"],'
'"risk_level":"medium",'
'"requires_human_approval":true,'
'"blocked_by_policy":false}'
)
}
}
],
"usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
}),
)
assert report.valid is True
assert report.results == 1
assert results[0]["schema_version"] == "agent_nemotron_external_result_v1"
assert results[0]["model_output"]["risk_level"] == "medium"
assert results[0]["model_output"]["requires_human_approval"] is True
assert results[0]["error"] is None
assert results[0]["trace_events"][0]["usage"]["total_tokens"] == 30
assert results[0]["retry_used"] is False
@pytest.mark.asyncio
async def test_external_runner_fails_closed_on_invalid_model_output():
results, report = await run_nemotron_external_replay(
requests=[_request()],
config=NemotronExternalRunnerConfig(api_key="test-key"),
client=_FakeClient({"choices": [{"message": {"content": "not json"}}]}),
)
assert report.valid is False
assert report.external_error_records == 1
assert results[0]["fallback_used"] is True
assert results[0]["trace_complete"] is False
assert results[0]["model_output"]["blocked_by_policy"] is True
assert results[0]["model_output"]["requires_human_approval"] is True
@pytest.mark.asyncio
async def test_contract_tuned_runner_retries_missing_fields_once():
request = _request()
request["metadata"]["candidate_variant_id"] = NEMOTRON_CONTRACT_TUNED_VARIANT_ID
request["metadata"]["prompt_profile"] = "contract_tuned_v1"
request["response_contract"] = {
"required": [
"proposed_action",
"action_plan",
"risk_level",
"requires_human_approval",
"blocked_by_policy",
],
}
client = _FakeClient([
{
"choices": [
{
"message": {
"content": '{"proposed_action":"restart checkout"}'
}
}
],
"usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
},
{
"choices": [
{
"message": {
"content": (
'{"proposed_action":"collect diagnostics",'
'"action_plan":["inspect logs"],'
'"risk_level":"medium",'
'"requires_human_approval":true,'
'"blocked_by_policy":false}'
)
}
}
],
"usage": {"prompt_tokens": 20, "completion_tokens": 30, "total_tokens": 50},
},
])
results, report = await run_nemotron_external_replay(
requests=[request],
config=NemotronExternalRunnerConfig(api_key="test-key"),
client=client,
)
assert report.valid is True
assert report.retry_used_records == 1
assert report.candidate_variant_id == NEMOTRON_CONTRACT_TUNED_VARIANT_ID
assert client.calls == 2
assert "EXACT JSON CONTRACT" in client.payloads[0]["json"]["messages"][1]["content"]
assert "Previous model output was invalid" in client.payloads[1]["json"]["messages"][1]["content"]
assert results[0]["candidate_variant_id"] == NEMOTRON_CONTRACT_TUNED_VARIANT_ID
assert results[0]["retry_used"] is True
assert results[0]["first_error"].startswith("model_output_missing_fields:")
assert results[0]["error"] is None
@pytest.mark.asyncio
async def test_external_runner_blocks_missing_key_before_network_call():
client = _FakeClient({})
results, report = await run_nemotron_external_replay(
requests=[_request()],
config=NemotronExternalRunnerConfig(api_key=""),
client=client,
)
assert results == []
assert report.valid is False
assert "api_key_missing" in report.failures
assert client.calls == 0
@pytest.mark.asyncio
async def test_external_runner_rejects_self_grading_request_leak():
request = _request()
request["incident_context"]["evaluation_labels"] = {"repair_success": True}
results, report = await run_nemotron_external_replay(
requests=[request],
config=NemotronExternalRunnerConfig(api_key="test-key"),
client=_FakeClient({}),
)
assert results == []
assert report.valid is False
assert any("request_self_grading_leak" in failure for failure in report.failures)
class _FakeResponse:
def __init__(self, payload: dict):
self.payload = payload
def raise_for_status(self) -> None:
return None
def json(self) -> dict:
return self.payload
class _FakeClient:
def __init__(self, payload: dict | list[dict]):
self.payload = payload
self.payloads: list[dict] = []
self.calls = 0
async def post(self, *_args, **kwargs) -> _FakeResponse:
self.calls += 1
self.payloads.append(kwargs)
if isinstance(self.payload, list):
return _FakeResponse(self.payload[self.calls - 1])
return _FakeResponse(self.payload)
def _request() -> dict:
return {
"schema_version": "agent_nemotron_replay_request_v1",
"run_id": "run",
"incident_id": "INC-1",
"candidate_id": "nemo_nemotron_fabric",
"system_prompt": "Return JSON.",
"user_prompt": "Incident context",
"incident_context": {"alertname": "PodCrashLooping"},
"source_metadata": {"source": "test"},
"metadata": {
"request_only": True,
"not_replacement_evidence": True,
},
}