Some checks failed
Code Review / ai-code-review (push) Successful in 13s
CD Pipeline / tests (push) Failing after 1m8s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Ansible / Reboot Recovery Contract / validate (push) Has been cancelled
193 lines
7.1 KiB
Python
193 lines
7.1 KiB
Python
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from src.services.agent_nemotron_replay_adapter import (
|
|
NEMOTRON_CONTRACT_TUNED_VARIANT_ID,
|
|
build_nemotron_replay_request,
|
|
import_nemotron_external_result,
|
|
import_nemotron_external_results_with_report,
|
|
)
|
|
|
|
|
|
def test_nemotron_request_uses_candidate_input_without_labels():
|
|
request = build_nemotron_replay_request({
|
|
"schema_version": "agent_replay_candidate_input_v1",
|
|
"run_id": "run",
|
|
"incident_id": "INC-1",
|
|
"incident_context": {
|
|
"severity": "P1",
|
|
"alertname": "PodCrashLooping",
|
|
},
|
|
"source_metadata": {"agent_turn_count": 4},
|
|
}).to_dict()
|
|
|
|
assert request["schema_version"] == "agent_nemotron_replay_request_v1"
|
|
assert request["candidate_id"] == "nemo_nemotron_fabric"
|
|
assert request["metadata"]["request_only"] is True
|
|
assert request["metadata"]["not_replacement_evidence"] is True
|
|
assert "evaluation_labels" not in request["user_prompt"]
|
|
assert "proposed_action" in request["response_contract"]["required"]
|
|
|
|
|
|
def test_nemotron_contract_tuned_request_marks_variant_and_strict_contract():
|
|
request = build_nemotron_replay_request(
|
|
{
|
|
"schema_version": "agent_replay_candidate_input_v1",
|
|
"run_id": "run",
|
|
"incident_id": "INC-1",
|
|
"incident_context": {
|
|
"severity": "P1",
|
|
"alertname": "PodCrashLooping",
|
|
},
|
|
"source_metadata": {"agent_turn_count": 4},
|
|
},
|
|
candidate_variant_id=NEMOTRON_CONTRACT_TUNED_VARIANT_ID,
|
|
).to_dict()
|
|
|
|
assert request["metadata"]["candidate_variant_id"] == NEMOTRON_CONTRACT_TUNED_VARIANT_ID
|
|
assert request["metadata"]["prompt_profile"] == "contract_tuned_v1"
|
|
assert request["response_contract"]["all_required_fields_must_be_present"] is True
|
|
assert request["response_contract"]["example_json"]["requires_human_approval"] is False
|
|
assert "Required response contract JSON follows first" in request["user_prompt"]
|
|
assert "Low, medium, and high risk proposals should use controlled_apply" in request["system_prompt"]
|
|
|
|
|
|
def test_nemotron_import_converts_external_result_without_self_grading():
|
|
result = import_nemotron_external_result({
|
|
"schema_version": "agent_nemotron_external_result_v1",
|
|
"run_id": "run",
|
|
"incident_id": "INC-1",
|
|
"model": "nvidia/nemotron-mini-4b-instruct",
|
|
"latency_ms": 8123,
|
|
"cost_usd": 0,
|
|
"candidate_variant_id": NEMOTRON_CONTRACT_TUNED_VARIANT_ID,
|
|
"retry_used": True,
|
|
"trace_events": [{"type": "nat_workflow"}],
|
|
"model_output": {
|
|
"proposed_action": "kubectl rollout restart deployment checkout -n prod",
|
|
"action_plan": [{"step": "dry_run", "tool": "kubectl"}],
|
|
"risk_level": "medium",
|
|
"requires_human_approval": True,
|
|
"blocked_by_policy": False,
|
|
},
|
|
})
|
|
|
|
assert result["schema_version"] == "agent_candidate_replay_result_v1"
|
|
assert result["candidate_id"] == "nemo_nemotron_fabric"
|
|
assert result["candidate_role"] == "agent_fabric_tool_model_evaluator"
|
|
assert result["rca_correct"] is None
|
|
assert result["tool_dry_run_pass"] is None
|
|
assert result["repair_success"] is None
|
|
assert result["metadata"]["adapter_mode"] == "real_offline_replay"
|
|
assert "not_replacement_evidence" not in result["metadata"]
|
|
assert result["metadata"]["candidate_variant_id"] == NEMOTRON_CONTRACT_TUNED_VARIANT_ID
|
|
assert result["metadata"]["retry_used"] is True
|
|
|
|
|
|
def test_nemotron_import_rejects_model_self_grading():
|
|
with pytest.raises(ValueError, match="self-grading"):
|
|
import_nemotron_external_result({
|
|
"schema_version": "agent_nemotron_external_result_v1",
|
|
"run_id": "run",
|
|
"incident_id": "INC-1",
|
|
"model_output": {
|
|
"proposed_action": "collect logs",
|
|
"risk_level": "low",
|
|
"requires_human_approval": False,
|
|
"blocked_by_policy": False,
|
|
"rca_correct": True,
|
|
},
|
|
})
|
|
|
|
|
|
def test_nemotron_import_report_validates_request_alignment():
|
|
requests = [
|
|
build_nemotron_replay_request({
|
|
"schema_version": "agent_replay_candidate_input_v1",
|
|
"run_id": "run",
|
|
"incident_id": "INC-1",
|
|
"incident_context": {"severity": "P1"},
|
|
"source_metadata": {},
|
|
}).to_dict()
|
|
]
|
|
results, report = import_nemotron_external_results_with_report(
|
|
[
|
|
{
|
|
"schema_version": "agent_nemotron_external_result_v1",
|
|
"run_id": "run",
|
|
"incident_id": "INC-1",
|
|
"model": "nvidia/nemotron-mini-4b-instruct",
|
|
"latency_ms": 1000,
|
|
"cost_usd": 0.01,
|
|
"trace_complete": True,
|
|
"trace_events": [{"type": "nat_workflow"}],
|
|
"model_output": {
|
|
"proposed_action": "collect logs",
|
|
"action_plan": [{"step": "inspect", "tool": "kubectl"}],
|
|
"risk_level": "low",
|
|
"requires_human_approval": False,
|
|
"blocked_by_policy": False,
|
|
},
|
|
}
|
|
],
|
|
requests=requests,
|
|
)
|
|
|
|
assert len(results) == 1
|
|
assert report.valid is True
|
|
assert report.requests == 1
|
|
assert report.imported_results == 1
|
|
assert report.total_cost_usd == 0.01
|
|
assert report.model_distribution == {"nvidia/nemotron-mini-4b-instruct": 1}
|
|
assert report.retry_used_records == 0
|
|
|
|
|
|
def test_nemotron_import_report_rejects_missing_and_duplicate_results():
|
|
requests = [
|
|
{"run_id": "run", "incident_id": "INC-1"},
|
|
{"run_id": "run", "incident_id": "INC-2"},
|
|
]
|
|
external_result = {
|
|
"schema_version": "agent_nemotron_external_result_v1",
|
|
"run_id": "run",
|
|
"incident_id": "INC-1",
|
|
"model_output": {
|
|
"proposed_action": "collect logs",
|
|
"action_plan": [],
|
|
"risk_level": "low",
|
|
"requires_human_approval": False,
|
|
"blocked_by_policy": False,
|
|
},
|
|
}
|
|
|
|
_, report = import_nemotron_external_results_with_report(
|
|
[external_result, external_result],
|
|
requests=requests,
|
|
)
|
|
|
|
assert report.valid is False
|
|
assert "run::INC-1" in report.duplicate_results
|
|
assert "run::INC-2" in report.missing_results
|
|
assert any(
|
|
failure.startswith("duplicate_external_result")
|
|
for failure in report.failures
|
|
)
|
|
|
|
|
|
def test_nemotron_import_rejects_top_level_self_grading():
|
|
with pytest.raises(ValueError, match="self-grading"):
|
|
import_nemotron_external_result({
|
|
"schema_version": "agent_nemotron_external_result_v1",
|
|
"run_id": "run",
|
|
"incident_id": "INC-1",
|
|
"evaluation_labels": {"repair_success": True},
|
|
"model_output": {
|
|
"proposed_action": "collect logs",
|
|
"action_plan": [],
|
|
"risk_level": "low",
|
|
"requires_human_approval": False,
|
|
"blocked_by_policy": False,
|
|
},
|
|
})
|