import asyncio
import inspect
from datetime import datetime
from decimal import Decimal
from types import SimpleNamespace
from uuid import UUID
import pytest
from fastapi import HTTPException
import src.services.platform_operator_service as platform_operator_service
from src.api.v1.platform.operator_runs import (
AiRouteStatusResponse,
ListApprovalsResponse,
ListCallbackRepliesResponse,
ListCicdEventsResponse,
ListRunsResponse,
)
from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult
from src.services.ollama_health_monitor import HealthReport, HealthStatus
from src.services.platform_operator_service import (
_RUN_CONTEXT_QUERY_CHUNK_SIZE,
_ai_route_health_map,
_ai_route_lane_state,
_ai_route_policy_order,
_ai_route_repair_evidence_item,
_build_awooop_status_chain,
_callback_reply_audit_summary_from_row,
_callback_reply_event_item,
_callback_reply_summary_matches_status,
_cicd_duration_seconds,
_cicd_event_item_from_row,
_collect_run_incident_ids,
_is_source_correlation_applied_link,
_iter_run_context_batches,
_legacy_mcp_timeline_status,
_legacy_mcp_timeline_summary,
_list_filter_context_limit,
_outbound_timeline_status,
_outbound_timeline_summary,
_outbound_timeline_title,
_recent_channel_event_item,
_recent_event_source_summary,
_remediation_summary_matches_incident_id,
_remediation_summary_matches_status,
_remediation_timeline_summary,
_run_callback_reply_summary,
_run_remediation_list_summary,
_score_source_correlation_event,
_source_event_correlation_context,
_timeline_sort_key,
_validate_ai_route_workload,
_validate_callback_reply_action_filter,
_validate_callback_reply_status_filter,
_validate_cicd_stage_filter,
_validate_cicd_status_filter,
)
def test_outbound_timeline_title_labels_runbook_review() -> None:
title = _outbound_timeline_title(
"telegram",
"approval_request",
"📄 RUNBOOK REVIEW|待審核\nIncident:INC-1",
)
assert title == "TELEGRAM:Runbook 待人工審核"
def test_outbound_timeline_title_labels_governance_alert() -> None:
title = _outbound_timeline_title(
"telegram",
"final",
"⚠️ *AI 治理警報|知識庫劣化*",
)
assert title == "TELEGRAM:AI 治理警報"
def test_outbound_timeline_title_labels_cicd_status() -> None:
title = _outbound_timeline_title(
"telegram",
"final",
"✅ [AWOOOI CI/CD] | code-review\n📦 Code Review 完成・LOW",
)
assert title == "TELEGRAM:CI/CD 狀態通知"
def test_recent_event_source_summary_projects_telegram_callback_safely() -> None:
row = SimpleNamespace(
event_id=UUID("11111111-1111-4111-8111-111111111111"),
project_id="awoooi",
channel_type="telegram",
provider_event_id="telegram_callback:503475699",
channel_chat_id=None,
run_id=None,
content_type="callback_query",
content_preview=(
"Telegram callback_query received; action=approve; "
"incident_id=INC-20260602-5734BE"
),
is_duplicate=False,
received_at=datetime(2026, 6, 4, 7, 16, 33),
source_envelope={
"schema_version": "inbound_source_envelope_v1",
"provider": "telegram",
"stage": "received",
"provider_event_id": "telegram_callback:503475699",
"redaction_version": "audit_sink_v1",
"extra": {
"telegram_callback_query": {
"callback_query_id_sha256": "q" * 64,
"callback_data_sha256": "d" * 64,
"callback_action": "approve",
"callback_ref": "INC-20260602-5734BE",
"incident_id": "INC-20260602-5734BE",
"approval_id": None,
"message_id": "30972",
"user_id_sha256": "u" * 64,
"username_present": True,
}
},
},
)
summary = _recent_event_source_summary(row)
assert summary["schema_version"] == "awooop_recent_event_source_summary_v1"
assert summary["provider"] == "telegram"
assert summary["stage"] == "received"
assert summary["source_ref_count"] == 0
assert summary["telegram_callback_query"] == {
"action": "approve",
"callback_ref": "INC-20260602-5734BE",
"incident_id": "INC-20260602-5734BE",
"approval_id": None,
"message_id": "30972",
"username_present": True,
}
dumped = str(summary)
assert "sha256" not in dumped
assert "callback_data" not in dumped
assert "user_id" not in dumped
def test_recent_channel_event_item_includes_content_type_and_source_summary() -> None:
run_id = UUID("22222222-2222-4222-8222-222222222222")
row = SimpleNamespace(
event_id=UUID("33333333-3333-4333-8333-333333333333"),
project_id="awoooi",
channel_type="internal",
provider_event_id="alertmanager:received:alert-1",
channel_chat_id=None,
run_id=run_id,
content_type="text",
content_preview="Alertmanager inbound received",
is_duplicate=True,
received_at=datetime(2026, 6, 4, 13, 15, 2),
source_envelope={
"provider": "alertmanager",
"stage": "received",
"source_refs": {
"alert_ids": ["alert-1"],
"incident_ids": ["INC-20260603-9B2535"],
},
"log_correlation": {
"alertname": "DockerContainerUnhealthy",
"severity": "warning",
"namespace": "default",
"target_resource": "bitan-pharmacy-bitan-1",
"fingerprint": "be6a1821f6336fa44b5ec33855b9f23d",
},
},
)
item = _recent_channel_event_item(row)
assert item["run_id"] == run_id
assert item["content_type"] == "text"
assert item["source_summary"]["provider"] == "alertmanager"
assert item["source_summary"]["stage"] == "received"
assert item["source_summary"]["source_ref_count"] == 2
assert item["source_summary"]["log_correlation"]["alertname"] == (
"DockerContainerUnhealthy"
)
def test_cicd_event_item_preserves_rollout_risk_summary() -> None:
item = _cicd_event_item_from_row(
{
"id": "1da1af11-fd3e-4073-ac85-fd304dbd2dc3",
"action_detail": "收到告警: CI_rollout_risk_pending",
"created_at": datetime(2026, 5, 21, 11, 46, 33),
"context": {
"source": "alertmanager",
"alert_id": "alert-20260521194633",
"labels": {
"alertname": "CI_rollout_risk_pending",
"stage": "rollout-risk",
"status": "pending",
"severity": "warning",
"commit": "8e68dc1e3595a2667831143f76794512bcb302be",
"triggered_by": "wooo",
"duration_seconds": "0",
},
"annotations": {
"summary": "AWOOOI 部署風險已恢復",
"description": "public_health_argocd_wait_http=curl_error_28",
"workflow_url": "http://192.168.0.110:3001/wooo/awoooi/actions/runs/2827",
},
},
},
project_id="awoooi",
)
assert item["stage"] == "rollout-risk"
assert item["status"] == "pending"
assert item["needs_attention"] is True
assert item["summary"] == "AWOOOI 部署風險已恢復"
assert "curl_error_28" in item["description"]
assert item["commit_sha"].startswith("8e68dc1e")
assert ListCicdEventsResponse(items=[item], total=1, limit=1).items[0].stage == (
"rollout-risk"
)
def test_cicd_event_filter_validation_and_duration_safety() -> None:
assert _validate_cicd_stage_filter("Rollout-Risk") == "rollout-risk"
assert _validate_cicd_status_filter("PENDING") == "pending"
assert _cicd_duration_seconds("-3") == 0
assert _cicd_duration_seconds("bad") == 0
with pytest.raises(HTTPException):
_validate_cicd_stage_filter("rollout risk;drop")
with pytest.raises(HTTPException):
_validate_cicd_status_filter("ignored")
def test_outbound_timeline_title_labels_auto_repair_handoff() -> None:
title = _outbound_timeline_title(
"telegram",
"error",
"🤖❌ HANDOFF REQUIRED|AI 自動修復失敗,已轉人工",
)
assert title == "TELEGRAM:AI 自動修復失敗,已轉人工"
def test_outbound_timeline_title_falls_back_to_human_label() -> None:
title = _outbound_timeline_title("telegram", "interim", "正在調用 MCP 工具")
assert title == "TELEGRAM:漸進式狀態回饋"
def test_outbound_timeline_title_labels_callback_reply_fallback() -> None:
callback_reply = {
"status": "callback_reply_fallback_sent",
"action": "history",
"incident_id": "INC-20260513-79ED5E",
"parse_mode": "plain_text",
}
title = _outbound_timeline_title(
"telegram",
"final",
"事件歷史統計 INC-20260513-79ED5E",
callback_reply,
)
summary = _outbound_timeline_summary(
content_preview="事件歷史統計 INC-20260513-79ED5E",
send_error=None,
callback_reply=callback_reply,
)
assert title == "TELEGRAM:歷史回覆 fallback 已送出"
assert _outbound_timeline_status("sent", callback_reply) == (
"callback_reply_fallback_sent"
)
assert "callback=history" in summary
assert "incident=INC-20260513-79ED5E" in summary
assert "parse_mode=plain_text" in summary
def test_outbound_timeline_title_labels_callback_reply_failure() -> None:
callback_reply = {
"status": "callback_reply_failed",
"action": "detail",
"incident_id": "INC-20260513-79ED5E",
"error": "HTTP error: 400",
}
assert _outbound_timeline_title("telegram", "error", None, callback_reply) == (
"TELEGRAM:詳情回覆送出失敗"
)
assert _outbound_timeline_status("failed", callback_reply) == "callback_reply_failed"
assert "error=HTTP error: 400" in _outbound_timeline_summary(
content_preview=None,
send_error="HTTP error: 400",
callback_reply=callback_reply,
)
def test_collect_run_incident_ids_reads_source_refs_and_legacy_text() -> None:
run = SimpleNamespace(
trigger_ref="not-an-incident",
error_detail=None,
)
inbound_events = [
SimpleNamespace(
source_envelope={
"source_refs": {
"incident_ids": ["INC-20260514-F85F21", "INC-20260514-F85F21"],
}
},
content_preview="Alertmanager inbound converged",
content_redacted=None,
)
]
outbound_messages = [
SimpleNamespace(
source_envelope={
"source_refs": {
"incident_ids": ["INC-20260518-CB0001"],
},
},
content_preview="詳情:INC-20260513-79ED5E",
send_error=None,
)
]
incident_ids = _collect_run_incident_ids(
run=run,
inbound_events=inbound_events,
outbound_messages=outbound_messages,
)
assert incident_ids == [
"INC-20260514-F85F21",
"INC-20260518-CB0001",
"INC-20260513-79ED5E",
]
def test_run_callback_reply_summary_marks_latest_fallback() -> None:
summary = _run_callback_reply_summary([
SimpleNamespace(
source_envelope={
"callback_reply": {
"status": "callback_reply_sent",
"action": "detail",
"incident_id": "INC-20260513-79ED5E",
}
},
sent_at=datetime(2026, 5, 18, 6, 1, 0),
queued_at=datetime(2026, 5, 18, 6, 1, 0),
provider_message_id="100",
),
SimpleNamespace(
source_envelope={
"callback_reply": {
"status": "callback_reply_fallback_sent",
"action": "history",
"incident_id": "INC-20260513-79ED5E",
}
},
sent_at=datetime(2026, 5, 18, 6, 2, 0),
queued_at=datetime(2026, 5, 18, 6, 2, 0),
provider_message_id="101",
),
])
assert summary["status"] == "fallback_sent"
assert summary["total"] == 2
assert summary["sent"] == 1
assert summary["fallback_sent"] == 1
assert summary["latest_action"] == "history"
assert summary["latest_incident_id"] == "INC-20260513-79ED5E"
assert summary["latest_provider_message_id"] == "101"
assert summary["needs_human"] is False
assert summary["capture_status"] == "not_captured"
assert summary["capture_not_captured"] == 2
assert summary["latest_capture_missing"] == [
"awooop_status_chain",
"km_stale_completion_summary",
]
def test_run_callback_reply_summary_marks_failed_as_human_attention() -> None:
summary = _run_callback_reply_summary([
SimpleNamespace(
source_envelope={
"callback_reply": {
"status": "callback_reply_failed",
"action": "detail",
"incident_id": "INC-20260513-79ED5E",
}
},
sent_at=None,
queued_at=datetime(2026, 5, 18, 6, 3, 0),
provider_message_id="telegram_callback_reply:failed",
)
])
assert summary["status"] == "failed"
assert summary["failed"] == 1
assert summary["needs_human"] is True
assert summary["capture_status"] == "not_captured"
def test_run_callback_reply_summary_counts_capture_statuses() -> None:
summary = _run_callback_reply_summary([
SimpleNamespace(
source_envelope={
"callback_reply": {
"status": "callback_reply_sent",
"action": "detail",
"incident_id": "INC-20260513-79ED5E",
},
"awooop_status_chain": {
"schema_version": "awooop_status_chain_callback_reply_snapshot_v1",
},
"km_stale_completion_summary": {
"schema_version": (
"km_stale_owner_review_callback_reply_snapshot_v1"
),
},
},
sent_at=datetime(2026, 5, 18, 6, 3, 0),
queued_at=datetime(2026, 5, 18, 6, 3, 0),
provider_message_id="102",
),
SimpleNamespace(
source_envelope={
"callback_reply": {
"status": "callback_reply_sent",
"action": "history",
"incident_id": "INC-20260513-79ED5E",
},
"awooop_status_chain": {
"schema_version": "awooop_status_chain_callback_reply_snapshot_v1",
},
},
sent_at=datetime(2026, 5, 18, 6, 4, 0),
queued_at=datetime(2026, 5, 18, 6, 4, 0),
provider_message_id="103",
),
])
assert summary["capture_status"] == "partial"
assert summary["capture_captured"] == 1
assert summary["capture_partial"] == 1
assert summary["capture_not_captured"] == 0
assert summary["latest_capture_status"] == "partial"
assert summary["latest_capture_missing"] == ["km_stale_completion_summary"]
def test_run_callback_reply_summary_marks_no_callback() -> None:
summary = _run_callback_reply_summary([
SimpleNamespace(
source_envelope={},
sent_at=datetime(2026, 5, 18, 6, 1, 0),
queued_at=datetime(2026, 5, 18, 6, 1, 0),
provider_message_id="100",
)
])
assert summary["status"] == "no_callback"
assert summary["total"] == 0
assert summary["capture_status"] == "no_callback"
def test_list_runs_response_preserves_callback_reply_summary() -> None:
run_id = UUID("5c0306e0-591a-5445-9a33-80f499426b38")
response = ListRunsResponse.model_validate({
"runs": [
{
"run_id": run_id,
"project_id": "awoooi",
"agent_id": "legacy-telegram-gateway",
"state": "completed",
"is_shadow": True,
"cost_usd": Decimal("0.0000"),
"step_count": 0,
"created_at": datetime(2026, 5, 18, 7, 31, 37),
"timeout_at": None,
"remediation_summary": None,
"callback_reply_summary": {
"schema_version": "awooop_run_callback_reply_summary_v1",
"status": "failed",
"total": 1,
"sent": 0,
"fallback_sent": 0,
"rescue_sent": 0,
"failed": 1,
"needs_human": True,
"latest_status": "callback_reply_failed",
"latest_action": "detail",
"latest_incident_id": "INC-20260513-79ED5E",
"latest_at": "2026-05-18T07:31:37",
"latest_provider_message_id": "telegram_callback_reply:failed",
"capture_status": "not_captured",
"capture_captured": 0,
"capture_partial": 0,
"capture_not_captured": 1,
"latest_capture_status": "not_captured",
"latest_capture_missing": [
"awooop_status_chain",
"km_stale_completion_summary",
],
"latest_capture_next_action": (
"press_telegram_detail_or_history_after_rollout"
),
},
}
],
"total": 1,
"page": 1,
"per_page": 1,
})
dumped = response.model_dump(mode="json")
assert dumped["runs"][0]["callback_reply_summary"]["status"] == "failed"
assert dumped["runs"][0]["callback_reply_summary"]["needs_human"] is True
assert dumped["runs"][0]["callback_reply_summary"]["capture_status"] == (
"not_captured"
)
def test_callback_reply_event_item_surfaces_run_link_and_human_flag() -> None:
run_id = UUID("5c0306e0-591a-5445-9a33-80f499426b38")
message_id = UUID("56cdb6ad-46a4-48f5-9d3b-b1ac9c0b2e92")
item = _callback_reply_event_item({
"message_id": message_id,
"run_id": run_id,
"project_id": "awoooi",
"channel_type": "telegram",
"message_type": "error",
"send_status": "failed",
"send_error": "HTTP error: 400",
"provider_message_id": "telegram_callback_reply:failed",
"queued_at": datetime(2026, 5, 18, 7, 31, 37),
"sent_at": None,
"triggered_by_state": "callback_reply",
"content_preview": "無法取得歷史統計",
"run_state": "completed",
"agent_id": "legacy-telegram-gateway",
"run_created_at": datetime(2026, 5, 18, 7, 30, 0),
"callback_reply": {
"status": "callback_reply_failed",
"action": "history",
"incident_id": "INC-20260513-79ED5E",
"error": "HTTP error: 400",
},
"persisted_awooop_status_chain": {
"schema_version": "awooop_status_chain_callback_reply_snapshot_v1",
"repair_state": "blocked_manual_required",
"needs_human": True,
"next_step": "manual_investigation",
},
"persisted_km_stale_completion_summary": {
"schema_version": "km_stale_owner_review_callback_reply_snapshot_v1",
"status": "no_related_owner_review",
"ready_count": 4,
"triage": {
"flow_stage": "callback_observed_owner_review_link_missing",
"ai_lead_agent": "Hermes",
},
},
})
assert item["status"] == "failed"
assert item["needs_human"] is True
assert item["action"] == "history"
assert item["incident_id"] == "INC-20260513-79ED5E"
assert item["event_at"] == datetime(2026, 5, 18, 7, 31, 37)
assert item["run_detail_href"] == (
"/awooop/runs/5c0306e0-591a-5445-9a33-80f499426b38?project_id=awoooi"
)
assert item["persisted_km_stale_completion_summary"]["ready_count"] == 4
assert item["persisted_km_stale_completion_summary"]["triage"]["ai_lead_agent"] == (
"Hermes"
)
assert item["persisted_awooop_status_chain"]["repair_state"] == (
"blocked_manual_required"
)
assert item["evidence_capture_status"]["status"] == "captured"
assert item["evidence_capture_status"]["captured"] == [
"awooop_status_chain",
"km_stale_completion_summary",
]
assert item["evidence_capture_status"]["missing"] == []
assert item["evidence_capture_status"]["next_action"] == "none"
def test_callback_reply_event_item_marks_legacy_snapshot_missing() -> None:
run_id = UUID("5c0306e0-591a-5445-9a33-80f499426b38")
message_id = UUID("56cdb6ad-46a4-48f5-9d3b-b1ac9c0b2e92")
item = _callback_reply_event_item({
"message_id": message_id,
"run_id": run_id,
"project_id": "awoooi",
"channel_type": "telegram",
"message_type": "final",
"send_status": "sent",
"send_error": None,
"provider_message_id": "123",
"queued_at": datetime(2026, 5, 18, 7, 31, 37),
"sent_at": datetime(2026, 5, 18, 7, 31, 38),
"triggered_by_state": "callback_reply",
"content_preview": "事件詳情",
"run_state": "completed",
"agent_id": "legacy-telegram-gateway",
"run_created_at": datetime(2026, 5, 18, 7, 30, 0),
"callback_reply": {
"status": "callback_reply_sent",
"action": "detail",
"incident_id": "INC-20260513-79ED5E",
},
"persisted_awooop_status_chain": None,
"persisted_km_stale_completion_summary": None,
})
capture_status = item["evidence_capture_status"]
assert capture_status["schema_version"] == "callback_evidence_capture_status_v1"
assert capture_status["status"] == "not_captured"
assert capture_status["reason"] == "legacy_callback_before_snapshot_rollout"
assert capture_status["missing"] == [
"awooop_status_chain",
"km_stale_completion_summary",
]
assert capture_status["captured"] == []
assert capture_status["next_action"] == (
"press_telegram_detail_or_history_after_rollout"
)
def test_list_callback_replies_response_preserves_callback_evidence() -> None:
run_id = UUID("5c0306e0-591a-5445-9a33-80f499426b38")
message_id = UUID("56cdb6ad-46a4-48f5-9d3b-b1ac9c0b2e92")
response = ListCallbackRepliesResponse.model_validate({
"items": [
{
"message_id": message_id,
"run_id": run_id,
"project_id": "awoooi",
"status": "fallback_sent",
"needs_human": False,
"action": "detail",
"incident_id": "INC-20260513-79ED5E",
"event_at": datetime(2026, 5, 18, 7, 31, 37),
"channel_type": "telegram",
"message_type": "final",
"send_status": "sent",
"send_error": None,
"provider_message_id": "123",
"triggered_by_state": "callback_reply",
"content_preview": "事件詳情",
"run_state": "completed",
"agent_id": "legacy-telegram-gateway",
"run_created_at": datetime(2026, 5, 18, 7, 30, 0),
"callback_reply": {
"status": "callback_reply_fallback_sent",
"action": "detail",
"incident_id": "INC-20260513-79ED5E",
},
"awooop_status_chain": {
"schema_version": "awooop_status_chain_v1",
"repair_state": "read_only_dry_run",
"needs_human": True,
},
"persisted_awooop_status_chain": {
"schema_version": "awooop_status_chain_callback_reply_snapshot_v1",
"source_schema_version": "awooop_status_chain_v1",
"source": "telegram_callback_reply_snapshot",
"source_id": "INC-20260513-79ED5E",
"incident_ids": ["INC-20260513-79ED5E"],
"current_stage": "approval_required",
"stage_status": "waiting",
"verdict": "approval_required",
"repair_state": "read_only_dry_run",
"verification": "missing",
"needs_human": True,
"next_step": "approve_or_escalate_from_awooop",
"evidence": {
"auto_repair_records": 0,
"operation_records": 0,
"mcp_gateway_total": 1,
"knowledge_entries": 0,
},
"writes": {
"incident": False,
"auto_repair": False,
},
},
"km_stale_completion_summary": {
"schema_version": (
"km_stale_owner_review_completion_callback_summary_v1"
),
"project_id": "awoooi",
"incident_id": "INC-20260513-79ED5E",
"status": "matched_owner_review",
"ready_count": 3,
"blocked_count": 1,
"completed_count": 2,
"failed_count": 0,
"batch_writes_allowed": False,
"manual_review_required": True,
"related_total": 1,
"work_item": None,
"related_items": [
{
"entry_id": "km-1",
"readiness": "ready",
"next_action": "preview_stale_km_review_completion",
}
],
},
"persisted_km_stale_completion_summary": {
"schema_version": (
"km_stale_owner_review_callback_reply_snapshot_v1"
),
"source_schema_version": (
"km_stale_owner_review_completion_callback_summary_v1"
),
"project_id": "awoooi",
"incident_id": "INC-20260513-79ED5E",
"status": "matched_owner_review",
"ready_count": 3,
"blocked_count": 1,
"completed_count": 2,
"failed_count": 0,
"batch_writes_allowed": False,
"manual_review_required": True,
"related_total": 1,
"triage": {
"flow_stage": "callback_observed_owner_review_link_missing",
"ai_lead_agent": "Hermes",
"automation_state": "manual_owner_review_required",
},
},
"evidence_capture_status": {
"schema_version": "callback_evidence_capture_status_v1",
"status": "captured",
"reason": "ok",
"action": "detail",
"captured": [
"awooop_status_chain",
"km_stale_completion_summary",
],
"missing": [],
"snapshot_rollout": "t167_t169",
"next_action": "none",
"event_at": datetime(2026, 5, 18, 7, 31, 37),
},
"run_detail_href": (
"/awooop/runs/5c0306e0-591a-5445-9a33-80f499426b38"
"?project_id=awoooi"
),
}
],
"total": 1,
"page": 1,
"per_page": 20,
"summary": {
"schema_version": "telegram_callback_reply_audit_summary_v1",
"project_id": "awoooi",
"outbound_total": 120,
"outbound_source_envelope_total": 118,
"outbound_source_refs_total": 100,
"outbound_trace_ref_total": 92,
"outbound_incident_ref_total": 80,
"outbound_reply_markup_total": 30,
"outbound_reply_markup_missing_incident_ref_total": 4,
"outbound_reply_markup_missing_incident_ref_recent_1h_total": 1,
"outbound_reply_markup_missing_incident_ref_recent_24h_total": 2,
"outbound_reply_markup_missing_incident_ref_latest_sent_at": (
datetime(2026, 5, 18, 8, 15, 0)
),
"outbound_reply_markup_missing_trace_ref_total": 2,
"outbound_reply_markup_missing_trace_ref_recent_1h_total": 0,
"outbound_reply_markup_missing_trace_ref_recent_24h_total": 1,
"outbound_reply_markup_missing_trace_ref_latest_sent_at": (
datetime(2026, 5, 18, 7, 40, 0)
),
"outbound_reply_markup_trace_ref_gap_status": "recent_backlog",
"outbound_reply_markup_trace_ref_gap_next_action": "watch_24h_decay",
"outbound_reply_markup_trace_ref_after_gap_total": 3,
"outbound_reply_markup_trace_ref_after_gap_first_sent_at": (
datetime(2026, 5, 18, 8, 20, 0)
),
"outbound_reply_markup_trace_ref_after_gap_latest_sent_at": (
datetime(2026, 5, 18, 9, 0, 0)
),
"outbound_reply_markup_trace_ref_gap_recovery_status": (
"recovered_after_gap"
),
"outbound_reply_markup_missing_incident_ref_top_prefixes": [
{
"prefix": "silence",
"total": 3,
"recent_24h_total": 0,
"last_sent_at": datetime(2026, 5, 18, 7, 40, 0),
},
{
"prefix": "drift_view",
"total": 1,
"recent_24h_total": 1,
"last_sent_at": datetime(2026, 5, 18, 8, 15, 0),
},
],
"outbound_reply_markup_missing_trace_ref_top_prefixes": [
{
"prefix": "silence",
"total": 2,
"recent_24h_total": 0,
"last_sent_at": datetime(2026, 5, 18, 7, 40, 0),
},
],
"outbound_failed_total": 1,
"callback_total": 3,
"callback_sent_total": 1,
"callback_fallback_total": 1,
"callback_rescue_total": 0,
"callback_failed_total": 1,
"callback_detail_total": 2,
"callback_history_total": 1,
"callback_snapshot_captured_total": 1,
"callback_snapshot_partial_total": 1,
"callback_snapshot_missing_total": 1,
"callback_incident_total": 2,
"snapshot_status": "partial",
"next_action": "review_legacy_callback_snapshot_gap",
"latest_outbound_at": datetime(2026, 5, 18, 7, 40, 0),
"latest_callback_at": datetime(2026, 5, 18, 7, 31, 37),
},
})
dumped = response.model_dump(mode="json")
assert dumped["items"][0]["status"] == "fallback_sent"
assert dumped["items"][0]["callback_reply"]["action"] == "detail"
assert dumped["items"][0]["awooop_status_chain"]["repair_state"] == (
"read_only_dry_run"
)
assert dumped["items"][0]["persisted_awooop_status_chain"]["next_step"] == (
"approve_or_escalate_from_awooop"
)
assert dumped["items"][0]["km_stale_completion_summary"]["ready_count"] == 3
assert dumped["items"][0]["km_stale_completion_summary"]["related_total"] == 1
assert dumped["items"][0]["persisted_km_stale_completion_summary"]["triage"][
"ai_lead_agent"
] == "Hermes"
assert dumped["items"][0]["evidence_capture_status"]["status"] == "captured"
assert dumped["items"][0]["run_detail_href"].endswith("project_id=awoooi")
assert dumped["summary"]["outbound_total"] == 120
assert dumped["summary"]["outbound_trace_ref_total"] == 92
assert dumped["summary"]["outbound_reply_markup_total"] == 30
assert dumped["summary"]["outbound_reply_markup_missing_incident_ref_total"] == 4
assert dumped["summary"][
"outbound_reply_markup_missing_incident_ref_recent_1h_total"
] == 1
assert dumped["summary"][
"outbound_reply_markup_missing_incident_ref_latest_sent_at"
] == "2026-05-18T08:15:00"
assert dumped["summary"]["outbound_reply_markup_missing_trace_ref_total"] == 2
assert dumped["summary"][
"outbound_reply_markup_missing_trace_ref_recent_24h_total"
] == 1
assert dumped["summary"][
"outbound_reply_markup_missing_trace_ref_latest_sent_at"
] == "2026-05-18T07:40:00"
assert dumped["summary"]["outbound_reply_markup_trace_ref_gap_status"] == (
"recent_backlog"
)
assert dumped["summary"]["outbound_reply_markup_trace_ref_gap_next_action"] == (
"watch_24h_decay"
)
assert dumped["summary"]["outbound_reply_markup_trace_ref_after_gap_total"] == 3
assert dumped["summary"][
"outbound_reply_markup_trace_ref_after_gap_first_sent_at"
] == "2026-05-18T08:20:00"
assert dumped["summary"][
"outbound_reply_markup_trace_ref_after_gap_latest_sent_at"
] == "2026-05-18T09:00:00"
assert dumped["summary"][
"outbound_reply_markup_trace_ref_gap_recovery_status"
] == "recovered_after_gap"
assert dumped["summary"][
"outbound_reply_markup_missing_incident_ref_top_prefixes"
][0] == {
"prefix": "silence",
"total": 3,
"recent_24h_total": 0,
"first_sent_at": None,
"last_sent_at": "2026-05-18T07:40:00",
}
assert dumped["summary"][
"outbound_reply_markup_missing_trace_ref_top_prefixes"
][0] == {
"prefix": "silence",
"total": 2,
"recent_24h_total": 0,
"first_sent_at": None,
"last_sent_at": "2026-05-18T07:40:00",
}
assert dumped["summary"]["callback_snapshot_missing_total"] == 1
assert dumped["summary"]["snapshot_status"] == "partial"
def test_list_callback_replies_keeps_audit_summary_separate_from_km_summary() -> None:
source = inspect.getsource(platform_operator_service.list_callback_replies)
assert "audit_summary = await _fetch_callback_reply_audit_summary" in source
assert '"summary": audit_summary' in source
assert "km_summary = km_completion_summary_cache.get" in source
assert 'item["km_stale_completion_summary"] = km_summary' in source
def test_list_callback_replies_does_not_shadow_summary_cache_key() -> None:
source = inspect.getsource(platform_operator_service.list_callback_replies)
assert "callback_summary_cache_key = {" in source
assert "status_chain_cache_key = (item_project_id, incident_id)" in source
assert "km_summary_cache_key = (item_project_id, incident_id)" in source
assert (
'store_operator_summary_async(\n "callback_replies",\n'
" callback_summary_cache_key"
) in source
assert not any(
line.strip().startswith("summary_cache_key =") for line in source.splitlines()
)
def test_callback_reply_audit_summary_marks_missing_snapshots() -> None:
summary = _callback_reply_audit_summary_from_row(
{
"outbound_total": 5256,
"outbound_source_envelope_total": 5256,
"outbound_source_refs_total": 5000,
"outbound_trace_ref_total": 4300,
"outbound_incident_ref_total": 3200,
"outbound_reply_markup_total": 100,
"outbound_reply_markup_missing_incident_ref_total": 12,
"outbound_reply_markup_missing_incident_ref_recent_1h_total": 2,
"outbound_reply_markup_missing_incident_ref_recent_24h_total": 3,
"outbound_reply_markup_missing_incident_ref_latest_sent_at": (
datetime(2026, 5, 25, 8, 42, 22)
),
"outbound_reply_markup_missing_trace_ref_total": 5,
"outbound_reply_markup_missing_trace_ref_recent_1h_total": 1,
"outbound_reply_markup_missing_trace_ref_recent_24h_total": 2,
"outbound_reply_markup_missing_trace_ref_latest_sent_at": (
datetime(2026, 5, 25, 8, 42, 22)
),
"outbound_reply_markup_trace_ref_after_gap_total": 0,
"outbound_reply_markup_trace_ref_after_gap_first_sent_at": None,
"outbound_reply_markup_trace_ref_after_gap_latest_sent_at": None,
"outbound_reply_markup_missing_incident_ref_top_prefixes": [
{
"prefix": "silence",
"total": 8,
"recent_24h_total": 0,
"last_sent_at": datetime(2026, 5, 18, 7, 40, 0),
},
{
"prefix": "drift_view",
"total": 4,
"recent_24h_total": 2,
"last_sent_at": datetime(2026, 5, 25, 8, 42, 22),
},
],
"outbound_reply_markup_missing_trace_ref_top_prefixes": [
{
"prefix": "unknown",
"total": 5,
"recent_24h_total": 1,
"last_sent_at": datetime(2026, 5, 25, 8, 42, 22),
},
],
"outbound_failed_total": 0,
"callback_total": 2,
"callback_sent_total": 2,
"callback_fallback_total": 0,
"callback_rescue_total": 0,
"callback_failed_total": 0,
"callback_detail_total": 0,
"callback_history_total": 2,
"callback_snapshot_captured_total": 0,
"callback_snapshot_partial_total": 0,
"callback_snapshot_missing_total": 2,
"callback_incident_total": 1,
"latest_outbound_at": datetime(2026, 5, 25, 8, 42, 22),
"latest_callback_at": datetime(2026, 5, 24, 14, 38, 4),
},
project_id="awoooi",
)
assert summary["schema_version"] == "telegram_callback_reply_audit_summary_v1"
assert summary["outbound_total"] == 5256
assert summary["outbound_trace_ref_total"] == 4300
assert summary["outbound_reply_markup_missing_trace_ref_total"] == 5
assert summary["outbound_reply_markup_missing_trace_ref_recent_1h_total"] == 1
assert summary["outbound_reply_markup_missing_trace_ref_recent_24h_total"] == 2
assert summary["outbound_reply_markup_missing_trace_ref_latest_sent_at"] == (
datetime(2026, 5, 25, 8, 42, 22)
)
assert summary["outbound_reply_markup_trace_ref_gap_status"] == "active_gap"
assert summary["outbound_reply_markup_trace_ref_gap_next_action"] == (
"inspect_recent_outbound_source_refs"
)
assert summary["outbound_reply_markup_trace_ref_after_gap_total"] == 0
assert summary["outbound_reply_markup_trace_ref_gap_recovery_status"] == (
"no_recovery_signal"
)
assert summary[
"outbound_reply_markup_missing_incident_ref_recent_24h_total"
] == 3
assert summary["outbound_reply_markup_missing_trace_ref_top_prefixes"][0][
"prefix"
] == "unknown"
assert summary["callback_total"] == 2
assert summary["callback_snapshot_missing_total"] == 2
assert summary["inbound_callback_total"] == 0
assert summary["inbound_callback_mirror_status"] == "reply_only_gap"
assert summary["inbound_callback_next_action"] == (
"press_any_telegram_callback_after_rollout"
)
assert summary["snapshot_status"] == "not_captured"
assert summary["next_action"] == "press_telegram_detail_or_history_after_rollout"
assert summary["outbound_reply_markup_missing_incident_ref_top_prefixes"][0][
"recent_24h_total"
] == 0
assert summary["outbound_reply_markup_missing_incident_ref_top_prefixes"][1][
"last_sent_at"
] == datetime(2026, 5, 25, 8, 42, 22)
def test_callback_reply_audit_summary_marks_mixed_legacy_snapshots_partial() -> None:
summary = _callback_reply_audit_summary_from_row(
{
"outbound_total": 5221,
"outbound_source_envelope_total": 4905,
"outbound_source_refs_total": 4676,
"outbound_trace_ref_total": 4230,
"outbound_incident_ref_total": 920,
"outbound_reply_markup_total": 1322,
"outbound_reply_markup_missing_incident_ref_total": 684,
"outbound_reply_markup_missing_incident_ref_recent_1h_total": 0,
"outbound_reply_markup_missing_incident_ref_recent_24h_total": 0,
"outbound_reply_markup_missing_incident_ref_latest_sent_at": (
datetime(2026, 5, 25, 10, 59, 49)
),
"outbound_reply_markup_missing_trace_ref_total": 154,
"outbound_reply_markup_missing_trace_ref_recent_1h_total": 0,
"outbound_reply_markup_missing_trace_ref_recent_24h_total": 23,
"outbound_reply_markup_missing_trace_ref_latest_sent_at": (
datetime(2026, 5, 25, 12, 13, 1)
),
"outbound_reply_markup_trace_ref_after_gap_total": 8,
"outbound_reply_markup_trace_ref_after_gap_first_sent_at": (
datetime(2026, 5, 25, 12, 20, 0)
),
"outbound_reply_markup_trace_ref_after_gap_latest_sent_at": (
datetime(2026, 5, 25, 13, 26, 8)
),
"outbound_reply_markup_missing_incident_ref_top_prefixes": [
{
"prefix": "silence",
"total": 275,
"recent_24h_total": 0,
"last_sent_at": datetime(2026, 5, 25, 10, 59, 49),
},
{
"prefix": "drift_view",
"total": 144,
"recent_24h_total": 0,
"last_sent_at": datetime(2026, 5, 18, 18, 14, 27),
},
],
"outbound_reply_markup_missing_trace_ref_top_prefixes": [
{
"prefix": "silence",
"total": 120,
"recent_24h_total": 23,
"last_sent_at": datetime(2026, 5, 25, 12, 13, 1),
},
{
"prefix": "ai_advisory_handled",
"total": 34,
"recent_24h_total": 23,
"last_sent_at": datetime(2026, 5, 25, 12, 7, 17),
},
],
"outbound_failed_total": 0,
"callback_total": 3,
"callback_sent_total": 3,
"callback_fallback_total": 0,
"callback_rescue_total": 0,
"callback_failed_total": 0,
"callback_detail_total": 0,
"callback_history_total": 3,
"callback_snapshot_captured_total": 1,
"callback_snapshot_partial_total": 0,
"callback_snapshot_missing_total": 2,
"callback_incident_total": 1,
"latest_outbound_at": datetime(2026, 5, 25, 9, 15, 14),
"latest_callback_at": datetime(2026, 5, 25, 9, 30, 0),
},
project_id="awoooi",
)
assert summary["callback_snapshot_captured_total"] == 1
assert summary["outbound_trace_ref_total"] == 4230
assert summary["outbound_reply_markup_missing_trace_ref_total"] == 154
assert summary["outbound_reply_markup_missing_trace_ref_recent_1h_total"] == 0
assert summary["outbound_reply_markup_missing_trace_ref_recent_24h_total"] == 23
assert summary["outbound_reply_markup_missing_trace_ref_latest_sent_at"] == (
datetime(2026, 5, 25, 12, 13, 1)
)
assert summary["outbound_reply_markup_trace_ref_gap_status"] == "recent_backlog"
assert summary["outbound_reply_markup_trace_ref_gap_next_action"] == (
"watch_24h_decay"
)
assert summary["outbound_reply_markup_trace_ref_after_gap_total"] == 8
assert summary["outbound_reply_markup_trace_ref_after_gap_first_sent_at"] == (
datetime(2026, 5, 25, 12, 20, 0)
)
assert summary["outbound_reply_markup_trace_ref_gap_recovery_status"] == (
"recovered_after_gap"
)
assert summary["outbound_reply_markup_missing_trace_ref_top_prefixes"][0][
"recent_24h_total"
] == 23
assert summary["callback_snapshot_missing_total"] == 2
assert summary["snapshot_status"] == "partial"
assert summary["next_action"] == "review_legacy_callback_snapshot_gap"
assert summary["outbound_reply_markup_missing_incident_ref_top_prefixes"][0][
"last_sent_at"
] == datetime(2026, 5, 25, 10, 59, 49)
@pytest.mark.asyncio
async def test_km_stale_completion_summary_matches_callback_incident(
monkeypatch,
) -> None:
async def fake_query_km_stale_completion_queue(**kwargs):
assert kwargs["project_id"] == "awoooi"
assert kwargs["status_bucket"] == "all"
assert kwargs["limit"] == 100
return SimpleNamespace(
project_id="awoooi",
total=2,
returned=2,
pending_count=2,
ready_count=1,
blocked_count=1,
completed_count=0,
failed_count=0,
writes_on_read=False,
manual_review_required=True,
batch_writes_allowed=False,
items=[
SimpleNamespace(
entry_id="km-1",
title="Bitan pharmacy status drift",
dispatch_id="dispatch-1",
governance_event_id="event-1",
readiness="ready",
workflow_stage="waiting_owner_review",
next_action="preview_stale_km_review_completion",
priority_tier="P0",
recommended_completion_outcome="refresh_with_evidence",
can_preview=True,
related_incident_id="INC-20260513-79ED5E",
),
SimpleNamespace(
entry_id="km-2",
title="Other stale KM",
related_incident_id="INC-20260513-OTHER",
readiness="blocked",
),
],
)
monkeypatch.setattr(
platform_operator_service,
"query_km_stale_owner_review_completion_queue",
fake_query_km_stale_completion_queue,
)
summary = await platform_operator_service._fetch_km_stale_completion_summary_for_incident(
project_id="awoooi",
incident_id="INC-20260513-79ED5E",
queue_cache={},
)
assert summary["schema_version"] == (
"km_stale_owner_review_completion_callback_summary_v1"
)
assert summary["status"] == "matched_owner_review"
assert summary["ready_count"] == 1
assert summary["blocked_count"] == 1
assert summary["batch_writes_allowed"] is False
assert summary["related_items"][0]["entry_id"] == "km-1"
assert summary["related_items"][0]["can_preview"] is True
assert summary["work_item"] is None
def test_km_stale_completion_summary_generates_owner_review_work_item() -> None:
summary = platform_operator_service._build_km_stale_completion_summary(
queue=SimpleNamespace(
total=10,
returned=10,
pending_count=10,
ready_count=10,
blocked_count=0,
completed_count=1,
failed_count=0,
writes_on_read=False,
manual_review_required=True,
batch_writes_allowed=False,
items=[],
),
project_id="awoooi",
incident_id="INC-20260524-16109D",
)
assert summary["status"] == "no_related_owner_review"
assert summary["work_item"] == {
"schema_version": "km_stale_callback_owner_review_work_item_v1",
"work_item_id": (
"km-callback-owner-review:awoooi:INC-20260524-16109D"
),
"kind": "km_stale_callback_owner_review",
"status": "open",
"project_id": "awoooi",
"incident_id": "INC-20260524-16109D",
"reason": "no_matching_completion_item",
"title": (
"Telegram callback incident has no matching KM owner-review item"
),
"next_step": "review_or_queue_km_owner_review",
"target_surface": "awooop_runs_callback_evidence",
"target_href": (
"/awooop/runs?project_id=awoooi"
"&incident_id=INC-20260524-16109D"
"&callback_reply_status=sent"
),
"work_item_href": (
"/awooop/work-items?project_id=awoooi"
"&work_item_id=km-callback-owner-review%3Aawoooi%3AINC-20260524-16109D"
"&incident_id=INC-20260524-16109D"
),
"triage": {
"schema_version": "km_stale_callback_owner_review_triage_v1",
"flow_stage": "callback_observed_owner_review_link_missing",
"ai_lead_agent": "Hermes",
"supporting_agents": ["OpenClaw", "ElephantAlpha"],
"automation_state": "manual_owner_review_required",
"safe_to_auto_repair": False,
"blocking_reason": "no_matching_completion_item",
"matching_strategy": "related_incident_id_exact_match",
"already_done": [
"callback_reply_persisted",
"completion_queue_checked",
"generated_read_only_work_item",
],
"next_actions": [
"review_runs_callback_evidence",
"queue_matching_km_stale_candidate",
"complete_owner_review_after_owner_approval",
],
},
"writes_on_read": False,
"manual_review_required": True,
"batch_writes_allowed": False,
}
def test_list_approvals_response_preserves_status_chain() -> None:
run_id = UUID("5c0306e0-591a-5445-9a33-80f499426b38")
response = ListApprovalsResponse.model_validate({
"items": [
{
"run_id": run_id,
"project_id": "awoooi",
"agent_id": "hermes-approval-router",
"created_at": datetime(2026, 5, 18, 7, 30, 0),
"timeout_at": datetime(2026, 5, 18, 7, 45, 0),
"remediation_summary": {
"status": "read_only_dry_run",
"incident_ids": ["INC-20260513-79ED5E"],
},
"awooop_status_chain": {
"schema_version": "awooop_status_chain_v1",
"source_id": "INC-20260513-79ED5E",
"repair_state": "read_only_dry_run",
"needs_human": True,
"next_step": "approve_or_escalate_from_awooop",
},
}
],
"total": 1,
})
dumped = response.model_dump(mode="json")
assert dumped["items"][0]["remediation_summary"]["status"] == (
"read_only_dry_run"
)
assert dumped["items"][0]["awooop_status_chain"]["source_id"] == (
"INC-20260513-79ED5E"
)
assert dumped["items"][0]["awooop_status_chain"]["needs_human"] is True
def test_callback_reply_action_filter_normalizes_safe_actions() -> None:
assert _validate_callback_reply_action_filter(" History ") == "history"
assert _validate_callback_reply_action_filter("incident:detail-2") == (
"incident:detail-2"
)
assert _validate_callback_reply_action_filter("") is None
def test_callback_reply_action_filter_rejects_unsafe_values() -> None:
with pytest.raises(HTTPException):
_validate_callback_reply_action_filter("detail;drop")
def test_remediation_timeline_summary_surfaces_route_and_write_flags() -> None:
summary = _remediation_timeline_summary({
"incident_id": "INC-20260514-F85F21",
"mode": "replay",
"verification_result_preview": "degraded",
"agent_id": "auto_repair_executor",
"tool_name": "ssh_diagnose",
"required_scope": "read",
"writes_incident_state": False,
"writes_auto_repair_result": False,
})
assert "incident=INC-20260514-F85F21" in summary
assert "route=auto_repair_executor/ssh_diagnose/read" in summary
assert "writes_incident=False" in summary
assert "writes_auto_repair=False" in summary
def test_awooop_status_chain_marks_verified_repair() -> None:
chain = _build_awooop_status_chain(
incident_ids=["INC-20260513-79ED5E"],
source_id="INC-20260513-79ED5E",
truth_chain={
"truth_status": {
"current_stage": "execution_succeeded",
"stage_status": "success",
"needs_human": False,
"blockers": [],
},
"automation_quality": {
"verdict": "auto_repaired_verified",
"facts": {
"auto_repair_execution_records": 1,
"automation_operation_records": 1,
"verification_result": "healthy",
"mcp_gateway_total": 2,
"knowledge_entries": 1,
},
"blockers": [],
},
"mcp": {
"awooop_gateway": {
"total": 2,
"success": 1,
"failed": 1,
"blocked": 0,
"first_class_total": 2,
"legacy_bridge_total": 0,
"policy_enforced_total": 2,
"stage": "provider_failed_after_gateway",
"stage_status": "failed",
"by_tool": [
{
"tool_name": "prometheus.query",
"total": 2,
"success": 1,
"failed": 1,
"blocked": 0,
}
],
},
"legacy": {
"total": 1,
"success": 1,
"failed": 0,
"by_tool": [
{
"tool_name": "ssh_host",
"success": 1,
"failed": 0,
}
],
},
},
"execution": {
"automation_operation_log": [
{
"operation_type": "playbook_executed",
"status": "success",
"actor": "auto_repair_executor",
"input_action": "restart_service",
"input_executor": "ansible",
"input_playbook_id": "pb-host-restart",
"input_playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
}
],
"ansible": {
"considered": True,
"records": [
{
"operation_type": "ansible_check_mode_executed",
"status": "success",
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
"check_mode": "true",
}
],
"candidate_catalog": {
"candidates": [
{
"catalog_id": "ansible:188-ai-web",
"playbook_path": "infra/ansible/playbooks/188-ai-web.yml",
"risk_level": "medium",
"match_score": 3,
}
]
},
"not_used_reason": None,
},
},
"channel": {
"inbound_events": [
{
"channel_type": "alertmanager",
"provider_event_id": "alert-1",
"content_type": "application/json",
"is_duplicate": False,
"received_at": "2026-05-20T00:00:00Z",
"source_envelope": {
"source_refs": {
"alert_ids": ["alert-1"],
"sentry_issue_ids": ["SENTRY-1"],
"signoz_alerts": ["signoz:abc"],
"fingerprints": ["fp-1"],
}
},
}
],
"outbound_messages": [
{
"channel_type": "telegram",
"message_type": "incident_detail",
"send_status": "sent",
"sent_at": "2026-05-20T00:01:00Z",
}
],
},
},
remediation_history={
"total": 1,
"items": [
{
"incident_id": "INC-20260513-79ED5E",
"agent_id": "auto_repair_executor",
"tool_name": "rollout_restart",
"required_scope": "write",
"verification_result_preview": "healthy",
"writes_incident_state": True,
"writes_auto_repair_result": True,
}
],
},
)
assert chain["repair_state"] == "auto_repaired_verified"
assert chain["verification"] == "healthy"
assert chain["needs_human"] is False
assert chain["next_step"] == "monitor_for_regression"
assert chain["operator_outcome"]["state"] == "completed_verified"
assert chain["operator_outcome"]["needs_human"] is False
assert chain["evidence"]["latest_route"] == "auto_repair_executor/rollout_restart/write"
assert chain["mcp"]["gateway"]["success"] == 1
assert chain["mcp"]["gateway"]["failed"] == 1
assert chain["mcp"]["gateway"]["policy_enforced_total"] == 2
assert chain["mcp"]["legacy"]["total"] == 1
assert chain["mcp"]["top_tools"][0]["tool_name"] == "prometheus.query"
assert chain["execution"]["operation_total"] == 1
assert chain["execution"]["latest_executor"] == "ansible"
assert chain["execution"]["playbook_ids"] == ["pb-host-restart"]
assert chain["execution"]["ansible"]["considered"] is True
assert chain["execution"]["ansible"]["candidate_count"] == 1
assert chain["execution"]["ansible"]["check_mode_total"] == 1
assert chain["execution"]["ansible"]["apply_total"] == 0
assert chain["source_refs"]["inbound_total"] == 1
assert chain["source_refs"]["outbound_total"] == 1
assert chain["source_refs"]["refs"]["sentry_issue_ids"] == ["SENTRY-1"]
assert chain["source_refs"]["refs"]["signoz_alerts"] == ["signoz:abc"]
def test_awooop_status_chain_surfaces_controlled_ansible_apply_proof() -> None:
chain = _build_awooop_status_chain(
incident_ids=["INC-20260531-D6A3C4"],
source_id="INC-20260531-D6A3C4",
truth_chain={
"truth_status": {
"current_stage": "execution_succeeded",
"stage_status": "success",
"needs_human": False,
"blockers": [],
},
"automation_quality": {
"verdict": "execution_succeeded",
"facts": {
"auto_repair_execution_records": 0,
"automation_operation_records": 2,
"effective_execution_records": 1,
"verification_result": "healthy",
"mcp_gateway_total": 2,
"knowledge_entries": 1,
},
"blockers": [],
},
"execution": {
"automation_operation_log": [
{
"operation_type": "ansible_apply_executed",
"status": "success",
"actor": "platform_operator",
"input_executor": "ansible",
"input_catalog_id": "ansible:188-momo-backup-user",
"input_playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
}
],
"ansible": {
"considered": True,
"records": [
{
"operation_type": "ansible_apply_executed",
"status": "success",
"actor": "platform_operator",
"catalog_id": "ansible:188-momo-backup-user",
"playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
"execution_mode": "apply",
"check_mode": False,
"apply_executed": True,
"approval_source": "user_chat_approved_continue",
"returncode": 0,
},
{
"operation_type": "ansible_check_mode_executed",
"status": "success",
"actor": "ansible_check_mode_worker",
"catalog_id": "ansible:188-momo-backup-user",
"playbook_path": "infra/ansible/playbooks/188-momo-backup-user.yml",
"execution_mode": "check_mode",
"check_mode": True,
"apply_executed": False,
"returncode": 0,
},
],
"candidate_catalog": {"candidates": []},
},
},
},
remediation_history={"total": 0},
)
ansible = chain["execution"]["ansible"]
assert ansible["check_mode_total"] == 1
assert ansible["apply_total"] == 1
assert ansible["applied"] is True
assert ansible["controlled_apply"] is True
assert ansible["latest_catalog_id"] == "ansible:188-momo-backup-user"
assert ansible["latest_execution_mode"] == "apply"
assert ansible["latest_returncode"] == 0
assert ansible["approval_source"] == "user_chat_approved_continue"
def test_awooop_status_chain_includes_source_provider_correlation() -> None:
chain = _build_awooop_status_chain(
incident_ids=["INC-20260520-4D1124"],
source_id="INC-20260520-4D1124",
source_correlation={
"schema_version": "source_provider_correlation_v1",
"status": "candidate_found",
"verification_status": "candidate_only",
"direct_ref_total": 0,
"candidate_total": 2,
"applied_link_total": 0,
"provider_event_total": 2,
"providers": {
"sentry": {
"direct_ref_total": 0,
"candidate_total": 1,
"applied_link_total": 0,
},
"signoz": {
"direct_ref_total": 0,
"candidate_total": 1,
"applied_link_total": 0,
},
},
"top_candidates": [
{
"provider": "sentry",
"provider_event_id": "sentry:issue:1",
"score": 65,
"match_type": "candidate",
"reasons": ["alertname_overlap", "target_overlap"],
}
],
},
)
correlation = chain["source_refs"]["correlation"]
assert correlation["status"] == "candidate_found"
assert correlation["candidate_total"] == 2
assert correlation["verification_status"] == "candidate_only"
assert correlation["providers"]["sentry"]["candidate_total"] == 1
assert correlation["top_candidates"][0]["provider_event_id"] == "sentry:issue:1"
def test_awooop_status_chain_preserves_applied_source_link_verification() -> None:
chain = _build_awooop_status_chain(
incident_ids=["INC-20260520-4D1124"],
source_id="INC-20260520-4D1124",
source_correlation={
"schema_version": "source_provider_correlation_v1",
"status": "linked",
"verification_status": "applied_link_verified",
"direct_ref_total": 1,
"candidate_total": 0,
"applied_link_total": 1,
"latest_applied_link_at": "2026-05-21T02:03:04",
"provider_event_total": 1,
"providers": {
"sentry": {
"direct_ref_total": 1,
"candidate_total": 0,
"applied_link_total": 1,
"latest_applied_link_at": "2026-05-21T02:03:04",
},
},
"top_candidates": [
{
"provider": "sentry",
"provider_event_id": "sentry:source_correlation_linked:issue-1",
"stage": "source_correlation_linked",
"score": 100,
"match_type": "direct",
"link_state": "applied",
"verification_status": "applied_link_verified",
"reasons": ["direct_incident_ref"],
}
],
},
)
correlation = chain["source_refs"]["correlation"]
assert correlation["status"] == "linked"
assert correlation["verification_status"] == "applied_link_verified"
assert correlation["applied_link_total"] == 1
assert correlation["latest_applied_link_at"] == "2026-05-21T02:03:04"
assert correlation["providers"]["sentry"]["applied_link_total"] == 1
assert correlation["top_candidates"][0]["link_state"] == "applied"
def test_source_correlation_scoring_distinguishes_direct_and_candidate() -> None:
incident_context = {
"incident_ids": ["INC-20260520-4D1124"],
"alertnames": ["highcpuusage"],
"severities": ["p3"],
"fingerprints": ["fp-abc"],
"namespaces": ["awoooi-prod"],
"targets": ["api"],
}
direct_event = {
"incident_ids": ["INC-20260520-4D1124"],
"alertnames": ["other"],
"severities": [],
"fingerprints": [],
"namespaces": [],
"targets": [],
}
candidate_event = {
"incident_ids": [],
"alertnames": ["highcpuusage"],
"severities": ["p3"],
"fingerprints": [],
"namespaces": ["awoooi-prod"],
"targets": ["api"],
}
unrelated_event = {
"incident_ids": [],
"alertnames": ["configdrift"],
"severities": ["p3"],
"fingerprints": [],
"namespaces": ["awoooi-prod"],
"targets": [],
}
direct = _score_source_correlation_event(incident_context, direct_event)
candidate = _score_source_correlation_event(incident_context, candidate_event)
unrelated = _score_source_correlation_event(incident_context, unrelated_event)
assert direct["is_direct"] is True
assert direct["is_candidate"] is True
assert candidate["is_direct"] is False
assert candidate["is_candidate"] is True
assert "alertname_overlap" in candidate["reasons"]
assert unrelated["is_candidate"] is False
def test_source_correlation_applied_link_requires_stage_and_direct_match() -> None:
incident_context = {
"incident_ids": ["INC-20260520-4D1124"],
"alertnames": ["sentry issue"],
"severities": ["error"],
"fingerprints": ["fp-sentry-1"],
"namespaces": ["awoooi-prod"],
"targets": ["web"],
}
event_context = _source_event_correlation_context({
"provider": "sentry",
"provider_event_id": "sentry:source_correlation_linked:issue-1",
"received_at": datetime(2026, 5, 21, 2, 3, 4),
"source_envelope": {
"provider": "sentry",
"stage": "source_correlation_linked",
"source_refs": {
"incident_ids": ["INC-20260520-4D1124"],
"fingerprints": ["fp-sentry-1"],
},
"log_correlation": {
"alertname": "Sentry Issue",
"severity": "error",
"namespace": "awoooi-prod",
"target_resource": "web",
},
},
})
scored = _score_source_correlation_event(incident_context, event_context)
assert scored["is_direct"] is True
assert _is_source_correlation_applied_link(event_context, scored) is True
candidate_only_context = {
**event_context,
"incident_ids": [],
"fingerprints": [],
}
candidate_only = _score_source_correlation_event(
incident_context,
candidate_only_context,
)
non_link_stage = {
**event_context,
"stage": "upstream_canary",
}
assert candidate_only["is_direct"] is False
assert candidate_only["is_candidate"] is True
assert _is_source_correlation_applied_link(
candidate_only_context,
candidate_only,
) is False
assert _is_source_correlation_applied_link(non_link_stage, scored) is False
def test_awooop_status_chain_marks_read_only_manual_gate() -> None:
chain = _build_awooop_status_chain(
incident_ids=["INC-20260513-79ED5E"],
source_id="INC-20260513-79ED5E",
truth_chain={
"truth_status": {
"current_stage": "approval_required",
"stage_status": "waiting",
"needs_human": True,
"blockers": ["pending_human_approval"],
},
"automation_quality": {
"verdict": "approval_required",
"facts": {
"auto_repair_execution_records": 0,
"automation_operation_records": 0,
"verification_result": "missing",
"mcp_gateway_total": 1,
"knowledge_entries": 0,
},
"blockers": [],
},
},
remediation_history={
"total": 2,
"items": [
{
"incident_id": "INC-20260513-79ED5E",
"agent_id": "investigator",
"tool_name": "ssh_diagnose",
"required_scope": "read",
"verification_result_preview": "degraded",
"writes_incident_state": False,
"writes_auto_repair_result": False,
}
],
},
)
assert chain["repair_state"] == "read_only_dry_run"
assert chain["needs_human"] is True
assert chain["next_step"] == "approve_or_escalate_from_awooop"
assert chain["blockers"] == ["pending_human_approval"]
def test_awooop_status_chain_does_not_treat_audit_ops_as_repair() -> None:
chain = _build_awooop_status_chain(
incident_ids=["INC-20260530-88D960"],
source_id="INC-20260530-88D960",
truth_chain={
"truth_status": {
"current_stage": "execution_succeeded",
"stage_status": "success",
"needs_human": False,
"blockers": [],
},
"automation_quality": {
"verdict": "auto_repaired_verification_degraded",
"facts": {
"auto_repair_execution_records": 0,
"automation_operation_records": 1,
"effective_execution_records": 0,
"verification_result": "degraded",
"mcp_gateway_total": 22,
"knowledge_entries": 4,
},
"blockers": ["verification_recorded"],
},
},
remediation_history={"total": 0},
)
assert chain["repair_state"] == "diagnostic_or_audit_recorded"
assert chain["next_step"] == "manual_review_or_collect_repair_evidence"
assert chain["needs_human"] is True
assert chain["operator_outcome"]["state"] == "diagnostic_only_manual_review"
assert chain["operator_outcome"]["notification"]["mode"] == "action_required"
assert chain["evidence"]["operation_records"] == 1
assert chain["evidence"]["auto_repair_records"] == 0
def test_awooop_status_chain_surfaces_rejected_approval_outcome() -> None:
chain = _build_awooop_status_chain(
incident_ids=["INC-REJECTED"],
source_id="INC-REJECTED",
truth_chain={
"truth_status": {
"current_stage": "approval_rejected",
"stage_status": "closed",
"needs_human": False,
"blockers": [],
},
"automation_quality": {
"verdict": "approval_rejected_no_execution",
"facts": {
"auto_repair_execution_records": 0,
"automation_operation_records": 0,
"effective_execution_records": 0,
"verification_result": "missing",
"mcp_gateway_total": 1,
"knowledge_entries": 0,
},
"blockers": [],
},
},
remediation_history={"total": 0},
)
assert chain["repair_state"] == "approval_rejected_no_execution"
assert chain["needs_human"] is False
assert chain["operator_outcome"]["state"] == "approval_rejected_no_execution"
assert chain["operator_outcome"]["notification"]["mode"] == "result_only"
def test_awooop_status_chain_surfaces_expired_approval_outcome() -> None:
chain = _build_awooop_status_chain(
incident_ids=["INC-EXPIRED"],
source_id="INC-EXPIRED",
truth_chain={
"truth_status": {
"current_stage": "approval_expired",
"stage_status": "expired",
"needs_human": True,
"blockers": ["approval_expired_without_operator_decision"],
},
"automation_quality": {
"verdict": "approval_expired_manual_review",
"facts": {
"auto_repair_execution_records": 0,
"automation_operation_records": 0,
"effective_execution_records": 0,
"verification_result": "missing",
"mcp_gateway_total": 1,
"knowledge_entries": 0,
},
"blockers": [],
},
},
remediation_history={"total": 0},
)
assert chain["repair_state"] == "approval_expired_manual_review"
assert chain["needs_human"] is True
assert chain["operator_outcome"]["state"] == "approval_expired_manual_review"
assert chain["operator_outcome"]["notification"]["mode"] == "action_required"
def test_legacy_mcp_timeline_summary_surfaces_tool_context() -> None:
record = {
"incident_id": "INC-20260514-F85F21",
"agent_role": "pre_decision_investigator",
"flywheel_node": "investigator",
"duration_ms": 127,
"success": True,
"error_message": None,
}
assert _legacy_mcp_timeline_status(record) == "success"
summary = _legacy_mcp_timeline_summary(record)
assert "incident=INC-20260514-F85F21" in summary
assert "agent=pre_decision_investigator" in summary
assert "node=investigator" in summary
assert "duration_ms=127" in summary
def test_legacy_mcp_timeline_status_marks_failed_and_unknown() -> None:
assert _legacy_mcp_timeline_status({"success": False}) == "failed"
assert _legacy_mcp_timeline_status({"success": None}) == "warning"
def test_run_remediation_list_summary_marks_read_only_dry_run() -> None:
run = SimpleNamespace(state="waiting_approval")
summary = _run_remediation_list_summary(
run=run,
incident_ids=["INC-20260514-F85F21"],
items=[
{
"created_at": "2026-05-14T23:04:00+00:00",
"incident_id": "INC-20260514-F85F21",
"mode": "replay",
"verification_result_preview": "degraded",
"agent_id": "auto_repair_executor",
"tool_name": "ssh_diagnose",
"required_scope": "read",
"writes_incident_state": False,
"writes_auto_repair_result": False,
}
],
)
assert summary["status"] == "read_only_dry_run"
assert summary["has_dry_run"] is True
assert summary["is_read_only"] is True
assert summary["human_gate_open"] is True
assert summary["latest_route"] == "auto_repair_executor/ssh_diagnose/read"
def test_run_remediation_list_summary_marks_mcp_observed_without_dry_run() -> None:
run = SimpleNamespace(state="completed")
summary = _run_remediation_list_summary(
run=run,
incident_ids=["INC-20260518-792684"],
items=[],
legacy_mcp_records=[
{
"created_at": "2026-05-18T04:31:30+00:00",
"incident_id": "INC-20260518-792684",
"agent_role": "pre_decision_investigator",
"mcp_server": "ssh_host",
"tool_name": "ssh_diagnose",
"success": True,
},
{
"created_at": "2026-05-18T04:31:29+00:00",
"incident_id": "INC-20260518-792684",
"agent_role": "pre_decision_investigator",
"mcp_server": "signoz",
"tool_name": "query_logs",
"success": False,
},
],
)
assert summary["status"] == "mcp_observed"
assert summary["source"] == "mcp_audit_log"
assert summary["total"] == 0
assert summary["evidence_total"] == 2
assert summary["has_dry_run"] is False
assert summary["has_mcp_investigation"] is True
assert summary["mcp_observation_total"] == 2
assert summary["mcp_observation_success"] == 1
assert summary["mcp_observation_failed"] == 1
assert summary["latest_route"] == "pre_decision_investigator/ssh_host.ssh_diagnose/read"
def test_run_remediation_list_summary_flags_write_observed() -> None:
run = SimpleNamespace(state="completed")
summary = _run_remediation_list_summary(
run=run,
incident_ids=["INC-20260514-F85F21"],
items=[
{
"created_at": "2026-05-14T23:05:00+00:00",
"incident_id": "INC-20260514-F85F21",
"agent_id": "auto_repair_executor",
"tool_name": "state_update",
"required_scope": "write",
"writes_incident_state": True,
"writes_auto_repair_result": False,
}
],
)
assert summary["status"] == "write_observed"
assert summary["is_read_only"] is False
assert summary["writes_incident_state"] is True
def test_remediation_summary_matches_status_filter() -> None:
assert _remediation_summary_matches_status(
{"status": "mcp_observed"},
"mcp_observed",
)
assert _remediation_summary_matches_status(
{"status": "read_only_dry_run"},
"read_only_dry_run",
)
assert not _remediation_summary_matches_status(
{"status": "write_observed"},
"read_only_dry_run",
)
assert _remediation_summary_matches_status(None, "no_evidence")
def test_callback_reply_summary_matches_status_filter() -> None:
assert _callback_reply_summary_matches_status(
{"status": "failed"},
"failed",
)
assert _callback_reply_summary_matches_status(
{"status": "fallback_sent"},
"fallback_sent",
)
assert not _callback_reply_summary_matches_status(
{"status": "sent"},
"failed",
)
assert _callback_reply_summary_matches_status(
{"status": "sent"},
"observed",
)
assert _callback_reply_summary_matches_status(
{"status": "fallback_sent"},
"observed",
)
assert _callback_reply_summary_matches_status(
{"status": "failed"},
"observed",
)
assert not _callback_reply_summary_matches_status(None, "observed")
assert _callback_reply_summary_matches_status(None, "no_callback")
def test_callback_reply_status_filter_rejects_unknown_value() -> None:
_validate_callback_reply_status_filter("failed")
with pytest.raises(HTTPException) as exc_info:
_validate_callback_reply_status_filter("telegram_error")
assert exc_info.value.status_code == 422
assert "callback_reply_status" in str(exc_info.value.detail)
def test_list_callback_replies_observed_filter_keeps_delivered_statuses() -> None:
source = inspect.getsource(platform_operator_service.list_callback_replies)
assert "callback_reply_status == \"observed\"" not in source
assert "callback_reply_sent" not in source
assert "callback_reply_fallback_sent" not in source
assert "callback_reply_rescue_sent" not in source
assert "callback_reply_failed" not in source
def test_remediation_summary_matches_incident_id_filter() -> None:
assert _remediation_summary_matches_incident_id(
{"incident_ids": ["INC-20260514-F85F21"]},
"INC-20260514-F85F21",
)
assert not _remediation_summary_matches_incident_id(
{"incident_ids": ["INC-20260514-F85F21"]},
"INC-20260513-79ED5E",
)
assert _remediation_summary_matches_incident_id(None, None)
def test_list_filter_context_limit_scales_with_candidate_rows() -> None:
assert _list_filter_context_limit(2) == 500
assert _list_filter_context_limit(4176) == 16704
assert _list_filter_context_limit(10000) == 20000
def test_run_context_batches_stay_under_asyncpg_parameter_ceiling() -> None:
runs = [
SimpleNamespace(
run_id=UUID(int=index + 1),
trigger_ref=str(UUID(int=index + 10_000)),
)
for index in range((_RUN_CONTEXT_QUERY_CHUNK_SIZE * 2) + 7)
]
batches = _iter_run_context_batches(runs)
assert [len(batch["run_ids"]) for batch in batches] == [
_RUN_CONTEXT_QUERY_CHUNK_SIZE,
_RUN_CONTEXT_QUERY_CHUNK_SIZE,
7,
]
assert batches[0]["trigger_ref_to_run"][str(UUID(int=10_000))] == UUID(int=1)
for batch in batches:
worst_case_inbound_params = (
len(batch["run_ids"])
+ len(batch["trigger_refs"])
+ len(batch["trigger_event_ids"])
)
assert len(batch["run_ids"]) <= _RUN_CONTEXT_QUERY_CHUNK_SIZE
assert worst_case_inbound_params <= _RUN_CONTEXT_QUERY_CHUNK_SIZE * 3
assert worst_case_inbound_params < 32_767
def test_timeline_sort_key_normalizes_datetime_and_iso_string() -> None:
fallback = datetime(2026, 5, 14, 10, 0, 0)
keys = [
_timeline_sort_key({"ts": datetime(2026, 5, 14, 10, 0, 1)}, fallback),
_timeline_sort_key({"ts": "2026-05-14T10:00:02+00:00"}, fallback),
_timeline_sort_key({"ts": None}, fallback),
]
assert keys == [
"2026-05-14T10:00:01",
"2026-05-14T10:00:02+00:00",
"2026-05-14T10:00:00",
]
assert sorted(keys) == [
"2026-05-14T10:00:00",
"2026-05-14T10:00:01",
"2026-05-14T10:00:02+00:00",
]
def test_ai_route_policy_order_exposes_global_ollama_then_gemini() -> None:
policy = _ai_route_policy_order("deep_rca")
assert [item["provider_name"] for item in policy] == [
"ollama_gcp_a",
"ollama_gcp_b",
"ollama_local",
"gemini",
]
assert policy[-1]["role"] == "final_fallback"
assert policy[-1]["runtime"] == "cloud"
def test_ai_route_health_map_marks_standby_as_not_checked() -> None:
route = OllamaRoutingResult(
primary=OllamaEndpoint(
url="http://gcp-a:11434",
provider_name="ollama_gcp_a",
model="qwen3:14b",
),
fallback_chain=[
OllamaEndpoint(
url="http://gcp-b:11434",
provider_name="ollama_gcp_b",
model="qwen3:14b",
),
OllamaEndpoint(
url="http://local-111:11434",
provider_name="ollama_local",
model="qwen3:14b",
),
],
routing_reason="primary healthy",
health_gcp_a=HealthReport(
status=HealthStatus.HEALTHY,
host="http://gcp-a:11434",
latency_ms=123.4,
reason="ok",
),
health_gcp_b=None,
health_local=None,
)
health = _ai_route_health_map(route)
assert health["ollama_gcp_a"]["status"] == "healthy"
assert health["ollama_gcp_a"]["checked"] is True
assert health["ollama_gcp_b"]["status"] == "not_checked"
assert health["ollama_local"]["reason"] == "standby_not_checked_primary_healthy"
def test_ai_route_status_response_preserves_route_fields() -> None:
response = AiRouteStatusResponse.model_validate({
"schema_version": "awooop_ai_route_status_v1",
"workload_type": "deep_rca",
"policy_order": _ai_route_policy_order("deep_rca"),
"selected_provider": "ollama_gcp_a",
"selected_url": "http://gcp-a:11434",
"selected_model": "qwen3:14b",
"fallback_chain": [
{
"priority": 2,
"provider_name": "ollama_gcp_b",
"url": "http://gcp-b:11434",
"model": "qwen3:14b",
"runtime": "ollama",
}
],
"route_reason": "primary healthy",
"route_source": "ollama_failover_manager",
"route_error": None,
"health": {
"ollama_gcp_a": {
"status": "healthy",
"host": "http://gcp-a:11434",
"latency_ms": 123.4,
"reason": "ok",
"checked_at": 0,
"from_cache": False,
"checked": True,
},
},
"lane_mode": "primary",
"active_lane": {
"provider_name": "ollama_gcp_a",
"health_status": "healthy",
"action_required": False,
},
"skipped_lanes": [],
"operator_action": {
"human_required": False,
"action": "monitor",
"reason": "primary_lane_active",
},
"repair_evidence": {
"provider": "ai_route_repair",
"stage": "repair_diagnosis",
"target_resource": "ollama_gcp_a",
"access_blockers": ["gcloud_compute_instances_get_missing"],
},
"checked_at": datetime(2026, 5, 19, 12, 0, 0),
})
dumped = response.model_dump(mode="json")
assert dumped["policy_order"][-1]["provider_name"] == "gemini"
assert dumped["selected_provider"] == "ollama_gcp_a"
assert dumped["lane_mode"] == "primary"
assert dumped["repair_evidence"]["target_resource"] == "ollama_gcp_a"
def test_ai_route_repair_evidence_item_summarizes_operator_safe_fields() -> None:
row = {
"event_id": UUID("dff309f0-f159-4537-8f58-47714ce94dca"),
"run_id": UUID("ca67ebcc-a24f-53e7-9505-2db15d855ecc"),
"provider_event_id": (
"ai_route_repair:repair_diagnosis:"
"gcp-a-primary-lane-down-20260525T060415Z"
),
"provider_ts": datetime(2026, 5, 25, 6, 4, 15),
"received_at": datetime(2026, 5, 25, 6, 5, 3),
"source_envelope": {
"provider": "ai_route_repair",
"stage": "repair_diagnosis",
"log_correlation": {
"alertname": "GcpAPrimaryLaneDown",
"severity": "warning",
"target_resource": "ollama_gcp_a",
"fingerprint": "ai-route-gcp-a-primary-down",
},
"source_refs": {
"alert_ids": ["gcp-a-primary-lane-down"],
"signoz_alerts": ["signoz:gcp-a"],
"sentry_issue_ids": [],
"fingerprints": ["ai-route-gcp-a-primary-down"],
"run_ids": ["ca67ebcc-a24f-53e7-9505-2db15d855ecc"],
},
"extra": {
"payload": {
"schema_version": "ai_route_repair_diagnosis_v1",
"observed_state": {
"target_resource": "ollama_gcp_a",
"lane_mode": "degraded_failover",
},
"live_probe": {
"gcp_a_direct_22": "connection_refused",
"gcp_a_direct_11434": "connection_refused",
"gcp_b_direct_11434": "http_200",
},
"access_blockers": [
"gcloud_compute_instances_get_missing",
"gcp_a_ssh_refused",
"gcp_a_ollama_11434_refused",
],
"side_effects": {
"incident_created": False,
"telegram_sent": False,
"approval_created": False,
"runtime_route_changed": False,
},
},
},
},
}
item = _ai_route_repair_evidence_item(row)
assert item["provider"] == "ai_route_repair"
assert item["stage"] == "repair_diagnosis"
assert item["target_resource"] == "ollama_gcp_a"
assert item["run_id"] == "ca67ebcc-a24f-53e7-9505-2db15d855ecc"
assert item["source_ref_count"] == 4
assert item["access_blockers"] == [
"gcloud_compute_instances_get_missing",
"gcp_a_ssh_refused",
"gcp_a_ollama_11434_refused",
]
assert item["live_probe"]["gcp_a_direct_11434"] == "connection_refused"
assert item["side_effects"] == {
"incident_created": False,
"telegram_sent": False,
"approval_created": False,
"runtime_route_changed": False,
}
assert item["work_item"] == {
"schema_version": "awooop_ai_route_repair_work_item_v1",
"work_item_id": "ai-route-repair:ollama_gcp_a",
"status": "open",
"kind": "ai_route_primary_lane_repair",
"next_step": "restore_primary_ollama_lane_access",
"reason": "primary_lane_unavailable",
"needs_human": True,
"owner": "cloud_sre_operator",
"target_resource": "ollama_gcp_a",
"target_href": "/awooop/runs",
"decision_effect": "none",
"safety_level": "read_only_work_item_projection",
"writes_incident_state": False,
"writes_auto_repair_result": False,
"writes_runtime_route": False,
}
assert item["playbook_recommendation"]["playbook_id"] == (
"ai_route_primary_lane_recovery"
)
assert item["playbook_recommendation"]["safe_to_auto_execute"] is False
assert [step["step"] for step in item["playbook_recommendation"]["steps"]] == [
"verify_cloud_control_plane_access",
"restore_gcp_a_os_access",
"restore_ollama_service_on_gcp_a",
"verify_ai_route_status_returns_primary",
]
assert item["owner_action"]["lead_agent"] == "Hermes"
assert item["owner_action"]["safe_to_auto_repair"] is False
def test_ai_route_lane_state_marks_degraded_failover() -> None:
policy = _ai_route_policy_order("deep_rca")
health = {
"ollama_gcp_a": {
"status": "offline",
"reason": "recent_endpoint_failure_cooldown:25s",
},
"ollama_gcp_b": {
"status": "healthy",
"reason": "",
},
"ollama_local": {
"status": "healthy",
"reason": "",
},
}
state = _ai_route_lane_state(
policy_order=policy,
selected_provider="ollama_gcp_b",
health=health,
)
assert state["lane_mode"] == "degraded_failover"
assert state["active_lane"]["provider_name"] == "ollama_gcp_b"
assert len(state["skipped_lanes"]) == 1
assert state["skipped_lanes"][0]["provider_name"] == "ollama_gcp_a"
assert state["skipped_lanes"][0]["role"] == "primary"
assert state["skipped_lanes"][0]["health_status"] == "offline"
assert state["skipped_lanes"][0]["reason"] == "recent_endpoint_failure_cooldown:25s"
assert state["skipped_lanes"][0]["action_required"] is True
assert state["operator_action"] == {
"human_required": True,
"action": "repair_skipped_primary_lane",
"reason": "fallback_lane_active",
}
def test_ai_route_lane_state_marks_cloud_fallback() -> None:
policy = _ai_route_policy_order("deep_rca")
health = {
"ollama_gcp_a": {"status": "offline", "reason": "timeout"},
"ollama_gcp_b": {"status": "offline", "reason": "timeout"},
"ollama_local": {"status": "offline", "reason": "timeout"},
}
state = _ai_route_lane_state(
policy_order=policy,
selected_provider="gemini",
health=health,
)
assert state["lane_mode"] == "cloud_fallback"
assert state["active_lane"]["provider_name"] == "gemini"
assert [lane["provider_name"] for lane in state["skipped_lanes"]] == [
"ollama_gcp_a",
"ollama_gcp_b",
"ollama_local",
]
assert state["operator_action"]["action"] == "restore_ollama_lanes"
@pytest.mark.asyncio
async def test_ai_route_status_times_out_before_slow_provider_checks(monkeypatch) -> None:
class SlowFailoverManager:
async def select_provider(self, task_type: str = "general") -> None:
await asyncio.sleep(0.05)
async def fake_connectivity(endpoint):
if endpoint.provider_name == "ollama_gcp_a":
return HealthReport(
status=HealthStatus.OFFLINE,
host=endpoint.url,
reason="timeout",
)
return HealthReport(
status=HealthStatus.HEALTHY,
host=endpoint.url,
latency_ms=12.3,
reason="status_only_connectivity_ok",
)
monkeypatch.setattr(
platform_operator_service,
"_AI_ROUTE_STATUS_SELECT_TIMEOUT_SECONDS",
0.001,
)
monkeypatch.setattr(
platform_operator_service,
"get_ollama_failover_manager",
lambda: SlowFailoverManager(),
)
monkeypatch.setattr(
platform_operator_service,
"_ai_route_probe_connectivity",
fake_connectivity,
)
async def no_repair_evidence(**_kwargs):
return None
monkeypatch.setattr(
platform_operator_service,
"_latest_ai_route_repair_evidence",
no_repair_evidence,
)
response = await platform_operator_service.get_ai_route_status("deep_rca")
assert response["route_reason"] == (
"route_check_timeout; lightweight connectivity selected ollama_gcp_b"
)
assert response["route_error"] is None
assert response["route_source"] == "lightweight_connectivity_fallback"
assert response["selected_provider"] == "ollama_gcp_b"
assert response["health"]["ollama_gcp_a"]["status"] == "offline"
assert response["health"]["ollama_gcp_b"]["status"] == "healthy"
assert [item["provider_name"] for item in response["fallback_chain"]] == [
"ollama_local",
"gemini",
]
assert [item["provider_name"] for item in response["policy_order"]] == [
"ollama_gcp_a",
"ollama_gcp_b",
"ollama_local",
"gemini",
]
@pytest.mark.asyncio
async def test_ai_route_status_lightweight_fallback_keeps_gemini_policy_only(
monkeypatch,
) -> None:
class SlowFailoverManager:
async def select_provider(self, task_type: str = "general") -> None:
await asyncio.sleep(0.05)
async def fake_offline_connectivity(endpoint):
return HealthReport(
status=HealthStatus.OFFLINE,
host=endpoint.url,
reason="offline",
)
monkeypatch.setattr(
platform_operator_service,
"_AI_ROUTE_STATUS_SELECT_TIMEOUT_SECONDS",
0.001,
)
monkeypatch.setattr(
platform_operator_service,
"get_ollama_failover_manager",
lambda: SlowFailoverManager(),
)
monkeypatch.setattr(
platform_operator_service,
"_ai_route_probe_connectivity",
fake_offline_connectivity,
)
async def no_repair_evidence(**_kwargs):
return None
monkeypatch.setattr(
platform_operator_service,
"_latest_ai_route_repair_evidence",
no_repair_evidence,
)
response = await platform_operator_service.get_ai_route_status("deep_rca")
assert response["selected_provider"] == "gemini"
assert response["selected_model"] is None
assert response["route_source"] == "lightweight_connectivity_fallback"
assert response["route_error"] is None
assert "final fallback policy is Gemini" in response["route_reason"]
assert all(item["status"] == "offline" for item in response["health"].values())
def test_ai_route_workload_validation_rejects_unknown_value() -> None:
assert _validate_ai_route_workload(" hermes ") == "hermes"
with pytest.raises(HTTPException) as exc_info:
_validate_ai_route_workload("charge_money")
assert "Unsupported workload_type" in str(exc_info.value.detail)