53 lines
1.8 KiB
Python
53 lines
1.8 KiB
Python
from __future__ import annotations
|
|
|
|
from src.services.agent_nemotron_smoke_gate import (
|
|
evaluate_nemotron_contract_tuned_smoke_gate,
|
|
)
|
|
|
|
|
|
def test_smoke_gate_blocks_latency_even_when_runner_is_valid():
|
|
report = evaluate_nemotron_contract_tuned_smoke_gate(
|
|
runner_report={
|
|
"valid": True,
|
|
"candidate_variant_id": "nemo_nemotron_fabric_contract_tuned_v1",
|
|
"requests": 5,
|
|
"results": 5,
|
|
"external_error_records": 0,
|
|
"fallback_used_records": 0,
|
|
"trace_incomplete_records": 0,
|
|
"retry_used_records": 1,
|
|
"avg_latency_ms": 200000,
|
|
"p95_latency_ms": 374591.0851,
|
|
"model": "nvidia/nemotron-3-super-120b-a12b",
|
|
}
|
|
).to_dict()
|
|
|
|
assert report["approved_for_full_replay"] is False
|
|
assert report["decision"] == "blocked"
|
|
assert report["gates"]["runner_valid"] is True
|
|
assert report["gates"]["latency_budget_met"] is False
|
|
assert report["failures"] == ["latency_budget_exceeded"]
|
|
assert report["runner_summary"]["retry_used_records"] == 1
|
|
|
|
|
|
def test_smoke_gate_approves_clean_fast_smoke():
|
|
report = evaluate_nemotron_contract_tuned_smoke_gate(
|
|
runner_report={
|
|
"valid": True,
|
|
"candidate_variant_id": "nemo_nemotron_fabric_contract_tuned_v1",
|
|
"requests": 5,
|
|
"results": 5,
|
|
"external_error_records": 0,
|
|
"fallback_used_records": 0,
|
|
"trace_incomplete_records": 0,
|
|
"retry_used_records": 0,
|
|
"avg_latency_ms": 20000,
|
|
"p95_latency_ms": 44000,
|
|
"model": "nvidia/nemotron-3-super-120b-a12b",
|
|
}
|
|
).to_dict()
|
|
|
|
assert report["approved_for_full_replay"] is True
|
|
assert report["decision"] == "approved_for_full_replay"
|
|
assert report["gates"]["latency_budget_met"] is True
|