Files
awoooi/apps/api/tests/test_agent_nemotron_replay_adapter.py
Your Name e0a86b6254
Some checks failed
Code Review / ai-code-review (push) Successful in 13s
CD Pipeline / tests (push) Failing after 1m8s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Ansible / Reboot Recovery Contract / validate (push) Has been cancelled
feat(agents): route high risk through controlled automation
2026-06-26 19:19:40 +08:00

193 lines
7.1 KiB
Python

from __future__ import annotations
import pytest
from src.services.agent_nemotron_replay_adapter import (
NEMOTRON_CONTRACT_TUNED_VARIANT_ID,
build_nemotron_replay_request,
import_nemotron_external_result,
import_nemotron_external_results_with_report,
)
def test_nemotron_request_uses_candidate_input_without_labels():
request = build_nemotron_replay_request({
"schema_version": "agent_replay_candidate_input_v1",
"run_id": "run",
"incident_id": "INC-1",
"incident_context": {
"severity": "P1",
"alertname": "PodCrashLooping",
},
"source_metadata": {"agent_turn_count": 4},
}).to_dict()
assert request["schema_version"] == "agent_nemotron_replay_request_v1"
assert request["candidate_id"] == "nemo_nemotron_fabric"
assert request["metadata"]["request_only"] is True
assert request["metadata"]["not_replacement_evidence"] is True
assert "evaluation_labels" not in request["user_prompt"]
assert "proposed_action" in request["response_contract"]["required"]
def test_nemotron_contract_tuned_request_marks_variant_and_strict_contract():
request = build_nemotron_replay_request(
{
"schema_version": "agent_replay_candidate_input_v1",
"run_id": "run",
"incident_id": "INC-1",
"incident_context": {
"severity": "P1",
"alertname": "PodCrashLooping",
},
"source_metadata": {"agent_turn_count": 4},
},
candidate_variant_id=NEMOTRON_CONTRACT_TUNED_VARIANT_ID,
).to_dict()
assert request["metadata"]["candidate_variant_id"] == NEMOTRON_CONTRACT_TUNED_VARIANT_ID
assert request["metadata"]["prompt_profile"] == "contract_tuned_v1"
assert request["response_contract"]["all_required_fields_must_be_present"] is True
assert request["response_contract"]["example_json"]["requires_human_approval"] is False
assert "Required response contract JSON follows first" in request["user_prompt"]
assert "Low, medium, and high risk proposals should use controlled_apply" in request["system_prompt"]
def test_nemotron_import_converts_external_result_without_self_grading():
result = import_nemotron_external_result({
"schema_version": "agent_nemotron_external_result_v1",
"run_id": "run",
"incident_id": "INC-1",
"model": "nvidia/nemotron-mini-4b-instruct",
"latency_ms": 8123,
"cost_usd": 0,
"candidate_variant_id": NEMOTRON_CONTRACT_TUNED_VARIANT_ID,
"retry_used": True,
"trace_events": [{"type": "nat_workflow"}],
"model_output": {
"proposed_action": "kubectl rollout restart deployment checkout -n prod",
"action_plan": [{"step": "dry_run", "tool": "kubectl"}],
"risk_level": "medium",
"requires_human_approval": True,
"blocked_by_policy": False,
},
})
assert result["schema_version"] == "agent_candidate_replay_result_v1"
assert result["candidate_id"] == "nemo_nemotron_fabric"
assert result["candidate_role"] == "agent_fabric_tool_model_evaluator"
assert result["rca_correct"] is None
assert result["tool_dry_run_pass"] is None
assert result["repair_success"] is None
assert result["metadata"]["adapter_mode"] == "real_offline_replay"
assert "not_replacement_evidence" not in result["metadata"]
assert result["metadata"]["candidate_variant_id"] == NEMOTRON_CONTRACT_TUNED_VARIANT_ID
assert result["metadata"]["retry_used"] is True
def test_nemotron_import_rejects_model_self_grading():
with pytest.raises(ValueError, match="self-grading"):
import_nemotron_external_result({
"schema_version": "agent_nemotron_external_result_v1",
"run_id": "run",
"incident_id": "INC-1",
"model_output": {
"proposed_action": "collect logs",
"risk_level": "low",
"requires_human_approval": False,
"blocked_by_policy": False,
"rca_correct": True,
},
})
def test_nemotron_import_report_validates_request_alignment():
requests = [
build_nemotron_replay_request({
"schema_version": "agent_replay_candidate_input_v1",
"run_id": "run",
"incident_id": "INC-1",
"incident_context": {"severity": "P1"},
"source_metadata": {},
}).to_dict()
]
results, report = import_nemotron_external_results_with_report(
[
{
"schema_version": "agent_nemotron_external_result_v1",
"run_id": "run",
"incident_id": "INC-1",
"model": "nvidia/nemotron-mini-4b-instruct",
"latency_ms": 1000,
"cost_usd": 0.01,
"trace_complete": True,
"trace_events": [{"type": "nat_workflow"}],
"model_output": {
"proposed_action": "collect logs",
"action_plan": [{"step": "inspect", "tool": "kubectl"}],
"risk_level": "low",
"requires_human_approval": False,
"blocked_by_policy": False,
},
}
],
requests=requests,
)
assert len(results) == 1
assert report.valid is True
assert report.requests == 1
assert report.imported_results == 1
assert report.total_cost_usd == 0.01
assert report.model_distribution == {"nvidia/nemotron-mini-4b-instruct": 1}
assert report.retry_used_records == 0
def test_nemotron_import_report_rejects_missing_and_duplicate_results():
requests = [
{"run_id": "run", "incident_id": "INC-1"},
{"run_id": "run", "incident_id": "INC-2"},
]
external_result = {
"schema_version": "agent_nemotron_external_result_v1",
"run_id": "run",
"incident_id": "INC-1",
"model_output": {
"proposed_action": "collect logs",
"action_plan": [],
"risk_level": "low",
"requires_human_approval": False,
"blocked_by_policy": False,
},
}
_, report = import_nemotron_external_results_with_report(
[external_result, external_result],
requests=requests,
)
assert report.valid is False
assert "run::INC-1" in report.duplicate_results
assert "run::INC-2" in report.missing_results
assert any(
failure.startswith("duplicate_external_result")
for failure in report.failures
)
def test_nemotron_import_rejects_top_level_self_grading():
with pytest.raises(ValueError, match="self-grading"):
import_nemotron_external_result({
"schema_version": "agent_nemotron_external_result_v1",
"run_id": "run",
"incident_id": "INC-1",
"evaluation_labels": {"repair_success": True},
"model_output": {
"proposed_action": "collect logs",
"action_plan": [],
"risk_level": "low",
"requires_human_approval": False,
"blocked_by_policy": False,
},
})