Files
awoooi/apps/api/tests/test_agent_nemotron_replay_failure_analysis.py
Your Name cfb866d055
Some checks failed
Ansible Lint / lint (push) Successful in 35s
CD Pipeline / tests (push) Failing after 13s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Failing after 11s
feat(governance): add agent market automation surfaces
2026-06-04 21:50:55 +08:00

100 lines
3.4 KiB
Python

from __future__ import annotations
from src.services.agent_nemotron_replay_failure_analysis import (
analyze_nemotron_replay_failure,
)
def test_failure_analysis_summarizes_contract_hilt_latency_and_baseline_failures():
report = analyze_nemotron_replay_failure(
external_results=[
{
"incident_id": "INC-1",
"error": None,
"model_output": {
"risk_level": "medium",
"requires_human_approval": True,
"blocked_by_policy": False,
},
},
{
"incident_id": "INC-2",
"error": "model_output_missing_fields:action_plan,blocked_by_policy",
"model_output": {
"risk_level": "medium",
"requires_human_approval": False,
},
},
],
external_runner_report={
"requests": 2,
"results": 2,
"valid": False,
"model": "nvidia/nemotron-3-super-120b-a12b",
"external_error_records": 1,
"fallback_used_records": 1,
"trace_incomplete_records": 1,
"p95_latency_ms": 120000,
"avg_latency_ms": 70000,
"failures": ["external_error:INC-2"],
},
finalizer_report={
"decision": "blocked",
"failures": ["candidate_result_errors_present:1"],
"promotion_gate": {
"approved": False,
"decision": "blocked",
"failures": ["candidate_result_errors_present:1"],
},
},
scorecard_report={
"baseline_candidate_id": "openclaw_incumbent",
"candidates": [
{
"candidate_id": "nemo_nemotron_fabric",
"beats_baseline": False,
"hard_gates_pass": False,
"gate_failures": [
"hitl_preserved_rate_below_100pct",
"audit_trace_rate_below_0.95",
],
"metrics": {
"audit_trace_rate": 0.5,
"hitl_preserved_rate": 0.5,
},
"total_score": 0.3,
},
{
"candidate_id": "openclaw_incumbent",
"gate_failures": [],
"metrics": {},
"total_score": 0.7,
},
],
},
generated_at="2026-06-01T00:00:00+00:00",
)
aggregate = report["external_result_aggregate"]
assert report["schema_version"] == "agent_nemotron_replay_failure_analysis_v1"
assert report["decision"] == "blocked"
assert report["not_replacement_evidence"] is True
assert aggregate["model_output_missing_fields"] == {
"action_plan": 1,
"blocked_by_policy": 1,
}
assert aggregate["unsafe_hitl_records"] == 1
assert report["scorecard_delta"]["score_delta"] == -0.4
assert {mode["id"] for mode in report["primary_failure_modes"]} >= {
"output_contract_incomplete",
"audit_trace_below_gate",
"hitl_below_gate",
"latency_outside_existing_async_budget",
"candidate_under_baseline",
"promotion_gate_blocked",
}
assert (
report["candidate_variant_plan"]["next_variant_id"]
== "nemo_nemotron_fabric_contract_tuned_v1"
)