feat(awooop): surface ai provider route status
All checks were successful
Code Review / ai-code-review (push) Successful in 14s
CD Pipeline / tests (push) Successful in 6m2s
CD Pipeline / build-and-deploy (push) Successful in 4m21s
CD Pipeline / post-deploy-checks (push) Successful in 1m21s

This commit is contained in:
Your Name
2026-05-19 13:45:04 +08:00
parent 3477c7569a
commit 56a8085dcf
6 changed files with 594 additions and 1 deletions

View File

@@ -25,6 +25,9 @@ from src.core.awooop_operator_auth import (
from src.services.platform_operator_service import (
decide_approval as decide_approval_svc,
)
from src.services.platform_operator_service import (
get_ai_route_status as get_ai_route_status_svc,
)
from src.services.platform_operator_service import (
get_awooop_status_chain as get_awooop_status_chain_svc,
)
@@ -99,6 +102,21 @@ class ListCallbackRepliesResponse(BaseModel):
per_page: int
class AiRouteStatusResponse(BaseModel):
schema_version: str
workload_type: str
policy_order: list[dict[str, Any]]
selected_provider: str | None = None
selected_url: str | None = None
selected_model: str | None = None
fallback_chain: list[dict[str, Any]]
route_reason: str
route_source: str
route_error: str | None = None
health: dict[str, dict[str, Any]]
checked_at: datetime
class ApprovalItem(BaseModel):
run_id: UUID
project_id: str
@@ -198,6 +216,24 @@ async def list_callback_replies(
)
@router.get(
"/ai-route-status",
response_model=AiRouteStatusResponse,
summary="查詢 AI Provider 路由狀態",
description=(
"回傳目前 Ollama/Gemini 路由策略、即時 primary、fallback chain 與健康狀態;"
"只讀,不觸發推理或自動修復。"
),
)
async def get_ai_route_status(
workload_type: str | None = Query(
"deep_rca",
description="工作負載類型,例如 deep_rca/hermes/interactive/embedding/rag/code_review/image_analysis",
),
) -> dict[str, Any]:
return await get_ai_route_status_svc(workload_type=workload_type)
@router.get(
"/runs/{run_id}/detail",
summary="查詢 Run 詳細時間線",

View File

@@ -13,7 +13,7 @@ import uuid
from collections import defaultdict
from collections.abc import Mapping
from datetime import UTC, datetime
from typing import Any
from typing import Any, get_args
from uuid import UUID
import structlog
@@ -38,6 +38,17 @@ from src.services.awooop_truth_chain_service import (
_summarize_mcp,
fetch_truth_chain,
)
from src.services.ollama_endpoint_resolver import (
OllamaEndpointSelection,
OllamaWorkloadType,
resolve_ollama_order,
)
from src.services.ollama_failover_manager import (
OllamaEndpoint,
OllamaRoutingResult,
get_ollama_failover_manager,
)
from src.services.ollama_health_monitor import HealthReport
from src.services.run_state_machine import transition
logger = structlog.get_logger(__name__)
@@ -74,6 +85,8 @@ _CALLBACK_REPLY_RAW_STATUS_BY_FILTER = {
"failed": "callback_reply_failed",
}
_CALLBACK_REPLY_ACTION_RE = re.compile(r"^[a-z0-9_:-]{1,64}$", re.IGNORECASE)
_AI_ROUTE_STATUS_SCHEMA_VERSION = "awooop_ai_route_status_v1"
_AI_ROUTE_WORKLOADS = set(get_args(OllamaWorkloadType))
# =============================================================================
# Tenants
@@ -283,6 +296,7 @@ async def list_callback_replies(
"limit": per_page,
"offset": (page - 1) * per_page,
}
if project_id:
where_clauses.append("m.project_id = :project_id")
params["project_id"] = project_id
@@ -395,6 +409,150 @@ async def list_callback_replies(
}
async def get_ai_route_status(
workload_type: str | None = None,
) -> dict[str, Any]:
"""回傳目前 AI/Ollama provider routing 的只讀狀態,供 Operator Console 顯示。"""
workload = _validate_ai_route_workload(workload_type)
policy_order = _ai_route_policy_order(workload)
checked_at = _utc_now_naive()
try:
route = await get_ollama_failover_manager().select_provider(task_type=workload)
except Exception as exc:
return {
"schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION,
"workload_type": workload,
"policy_order": policy_order,
"selected_provider": None,
"selected_url": None,
"selected_model": None,
"fallback_chain": [],
"route_reason": "route_check_failed",
"route_source": "ollama_failover_manager",
"route_error": str(exc),
"health": {},
"checked_at": checked_at,
}
return {
"schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION,
"workload_type": workload,
"policy_order": policy_order,
"selected_provider": route.primary.provider_name,
"selected_url": route.primary.url or None,
"selected_model": route.primary.model,
"fallback_chain": [
_ai_route_runtime_endpoint_item(endpoint, priority=index + 2)
for index, endpoint in enumerate(route.fallback_chain)
],
"route_reason": route.routing_reason,
"route_source": "ollama_failover_manager",
"route_error": None,
"health": _ai_route_health_map(route),
"checked_at": checked_at,
}
def _validate_ai_route_workload(workload_type: str | None) -> OllamaWorkloadType:
"""Normalize and validate workload filter for the public route status endpoint."""
workload = str(workload_type or "deep_rca").strip() or "deep_rca"
if workload not in _AI_ROUTE_WORKLOADS:
allowed = ", ".join(sorted(_AI_ROUTE_WORKLOADS))
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
detail=f"Unsupported workload_type: {workload}. Allowed: {allowed}",
)
return workload # type: ignore[return-value]
def _ai_route_policy_order(workload: OllamaWorkloadType) -> list[dict[str, Any]]:
"""Expose configured policy order: GCP-A -> GCP-B -> 111 -> Gemini."""
items = [
_ai_route_policy_endpoint_item(endpoint, priority=index + 1)
for index, endpoint in enumerate(resolve_ollama_order(workload))
]
items.append({
"priority": len(items) + 1,
"provider_name": "gemini",
"url": None,
"workload_type": workload,
"reason": "final_cloud_fallback_after_all_ollama_endpoints",
"role": "final_fallback",
"runtime": "cloud",
})
return items
def _ai_route_policy_endpoint_item(
endpoint: OllamaEndpointSelection,
*,
priority: int,
) -> dict[str, Any]:
role = {
"ollama_gcp_a": "primary",
"ollama_gcp_b": "secondary",
"ollama_local": "local_fallback",
}.get(endpoint.provider_name, "ollama")
return {
"priority": priority,
"provider_name": endpoint.provider_name,
"url": endpoint.url,
"workload_type": endpoint.workload_type,
"reason": endpoint.reason,
"role": role,
"runtime": "ollama",
}
def _ai_route_runtime_endpoint_item(
endpoint: OllamaEndpoint,
*,
priority: int,
) -> dict[str, Any]:
return {
"priority": priority,
"provider_name": endpoint.provider_name,
"url": endpoint.url or None,
"model": endpoint.model,
"runtime": "ollama" if endpoint.provider_name.startswith("ollama") else "cloud",
}
def _ai_route_health_map(route: OllamaRoutingResult) -> dict[str, dict[str, Any]]:
"""Convert failover health reports into provider keyed status for the UI."""
health: dict[str, dict[str, Any]] = {
"ollama_gcp_a": _ai_route_health_item(route.health_gcp_a),
}
if route.health_gcp_b:
health["ollama_gcp_b"] = _ai_route_health_item(route.health_gcp_b)
else:
health["ollama_gcp_b"] = _ai_route_not_checked_health_item()
if route.health_local:
health["ollama_local"] = _ai_route_health_item(route.health_local)
else:
health["ollama_local"] = _ai_route_not_checked_health_item()
return health
def _ai_route_health_item(report: HealthReport) -> dict[str, Any]:
payload = report.to_dict()
payload["checked"] = True
return payload
def _ai_route_not_checked_health_item() -> dict[str, Any]:
return {
"status": "not_checked",
"host": "",
"latency_ms": None,
"reason": "standby_not_checked_primary_healthy",
"checked_at": None,
"from_cache": False,
"checked": False,
}
def _timeline_item(
*,
ts: Any,

View File

@@ -7,11 +7,16 @@ import pytest
from fastapi import HTTPException
from src.api.v1.platform.operator_runs import (
AiRouteStatusResponse,
ListApprovalsResponse,
ListCallbackRepliesResponse,
ListRunsResponse,
)
from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult
from src.services.ollama_health_monitor import HealthReport, HealthStatus
from src.services.platform_operator_service import (
_ai_route_health_map,
_ai_route_policy_order,
_build_awooop_status_chain,
_callback_reply_event_item,
_callback_reply_summary_matches_status,
@@ -28,6 +33,7 @@ from src.services.platform_operator_service import (
_run_callback_reply_summary,
_run_remediation_list_summary,
_timeline_sort_key,
_validate_ai_route_workload,
_validate_callback_reply_action_filter,
_validate_callback_reply_status_filter,
)
@@ -725,3 +731,101 @@ def test_timeline_sort_key_normalizes_datetime_and_iso_string() -> None:
"2026-05-14T10:00:01",
"2026-05-14T10:00:02+00:00",
]
def test_ai_route_policy_order_exposes_global_ollama_then_gemini() -> None:
policy = _ai_route_policy_order("deep_rca")
assert [item["provider_name"] for item in policy] == [
"ollama_gcp_a",
"ollama_gcp_b",
"ollama_local",
"gemini",
]
assert policy[-1]["role"] == "final_fallback"
assert policy[-1]["runtime"] == "cloud"
def test_ai_route_health_map_marks_standby_as_not_checked() -> None:
route = OllamaRoutingResult(
primary=OllamaEndpoint(
url="http://gcp-a:11434",
provider_name="ollama_gcp_a",
model="qwen3:14b",
),
fallback_chain=[
OllamaEndpoint(
url="http://gcp-b:11434",
provider_name="ollama_gcp_b",
model="qwen3:14b",
),
OllamaEndpoint(
url="http://local-111:11434",
provider_name="ollama_local",
model="qwen3:14b",
),
],
routing_reason="primary healthy",
health_gcp_a=HealthReport(
status=HealthStatus.HEALTHY,
host="http://gcp-a:11434",
latency_ms=123.4,
reason="ok",
),
health_gcp_b=None,
health_local=None,
)
health = _ai_route_health_map(route)
assert health["ollama_gcp_a"]["status"] == "healthy"
assert health["ollama_gcp_a"]["checked"] is True
assert health["ollama_gcp_b"]["status"] == "not_checked"
assert health["ollama_local"]["reason"] == "standby_not_checked_primary_healthy"
def test_ai_route_status_response_preserves_route_fields() -> None:
response = AiRouteStatusResponse.model_validate({
"schema_version": "awooop_ai_route_status_v1",
"workload_type": "deep_rca",
"policy_order": _ai_route_policy_order("deep_rca"),
"selected_provider": "ollama_gcp_a",
"selected_url": "http://gcp-a:11434",
"selected_model": "qwen3:14b",
"fallback_chain": [
{
"priority": 2,
"provider_name": "ollama_gcp_b",
"url": "http://gcp-b:11434",
"model": "qwen3:14b",
"runtime": "ollama",
}
],
"route_reason": "primary healthy",
"route_source": "ollama_failover_manager",
"route_error": None,
"health": {
"ollama_gcp_a": {
"status": "healthy",
"host": "http://gcp-a:11434",
"latency_ms": 123.4,
"reason": "ok",
"checked_at": 0,
"from_cache": False,
"checked": True,
},
},
"checked_at": datetime(2026, 5, 19, 12, 0, 0),
})
dumped = response.model_dump(mode="json")
assert dumped["policy_order"][-1]["provider_name"] == "gemini"
assert dumped["selected_provider"] == "ollama_gcp_a"
def test_ai_route_workload_validation_rejects_unknown_value() -> None:
assert _validate_ai_route_workload(" hermes ") == "hermes"
with pytest.raises(HTTPException) as exc_info:
_validate_ai_route_workload("charge_money")
assert "Unsupported workload_type" in str(exc_info.value.detail)

View File

@@ -2210,6 +2210,45 @@
"openRun": "Open Run"
}
},
"aiRouteStatus": {
"title": "AI Provider Routing",
"subtitle": "Current policy and health checks across GCP-A, GCP-B, 111, and Gemini handoff order",
"selected": "Primary: {provider}",
"selectedEmpty": "Primary: --",
"empty": "AI provider route status is not available yet.",
"error": "AI provider route failed to load: {error}",
"badges": {
"active": "Active",
"standby": "Standby"
},
"fields": {
"workload": "Workload",
"primary": "Current Primary",
"reason": "Route Reason",
"checkedAt": "Checked at {time}",
"model": "Model: {model}",
"modelEmpty": "Model: --",
"routeError": "Route check failed: {error}",
"health": "Health: {status}",
"latency": "Latency: {latency}",
"noUrl": "No HTTP URL"
},
"health": {
"healthy": "Healthy",
"slow": "Slow",
"degraded": "Degraded",
"offline": "Offline",
"not_checked": "Standby not checked",
"unknown": "Unknown"
},
"roles": {
"primary": "First priority",
"secondary": "Second priority",
"local_fallback": "111 local fallback",
"final_fallback": "Gemini final fallback",
"ollama": "Ollama node"
}
},
"incidentEvidence": {
"title": "Incident Evidence",
"subtitle": "Telegram, Run, Approval, and Work Item share the same remediation evidence",

View File

@@ -2211,6 +2211,45 @@
"openRun": "開啟 Run"
}
},
"aiRouteStatus": {
"title": "AI Provider 路由",
"subtitle": "目前策略與健康檢查,顯示 GCP-A、GCP-B、111、Gemini 的接手順序",
"selected": "Primary{provider}",
"selectedEmpty": "Primary--",
"empty": "尚未取得 AI provider route 狀態。",
"error": "AI provider route 載入失敗:{error}",
"badges": {
"active": "使用中",
"standby": "備援"
},
"fields": {
"workload": "Workload",
"primary": "目前 Primary",
"reason": "路由原因",
"checkedAt": "檢查時間 {time}",
"model": "Model{model}",
"modelEmpty": "Model--",
"routeError": "路由檢查失敗:{error}",
"health": "健康:{status}",
"latency": "延遲:{latency}",
"noUrl": "無 HTTP URL"
},
"health": {
"healthy": "健康",
"slow": "偏慢",
"degraded": "降級",
"offline": "離線",
"not_checked": "待命未檢查",
"unknown": "未知"
},
"roles": {
"primary": "第一順位",
"secondary": "第二順位",
"local_fallback": "111 本機備援",
"final_fallback": "Gemini 最終備援",
"ollama": "Ollama 節點"
}
},
"incidentEvidence": {
"title": "Incident Evidence",
"subtitle": "Telegram、Run、Approval 與 Work Item 共用同一組補救證據",

View File

@@ -305,6 +305,42 @@ interface CallbackRepliesResponse {
per_page: number;
}
interface AiRoutePolicyItem {
priority: number;
provider_name: string;
url?: string | null;
workload_type?: string | null;
reason?: string | null;
role?: string | null;
runtime?: string | null;
model?: string | null;
}
interface AiRouteHealthItem {
status?: string | null;
host?: string | null;
latency_ms?: number | null;
reason?: string | null;
checked_at?: number | null;
from_cache?: boolean;
checked?: boolean;
}
interface AiRouteStatusResponse {
schema_version: string;
workload_type: string;
policy_order: AiRoutePolicyItem[];
selected_provider?: string | null;
selected_url?: string | null;
selected_model?: string | null;
fallback_chain: AiRoutePolicyItem[];
route_reason: string;
route_source: string;
route_error?: string | null;
health: Record<string, AiRouteHealthItem>;
checked_at: string;
}
// =============================================================================
// 常數
// =============================================================================
@@ -1384,6 +1420,161 @@ function CallbackReplyEvidencePanel({
);
}
function aiRouteHealthLabelKey(status?: string | null) {
if (
status === "healthy" ||
status === "slow" ||
status === "degraded" ||
status === "offline" ||
status === "not_checked"
) {
return `health.${status}`;
}
return "health.unknown";
}
function aiRouteRoleLabelKey(role?: string | null) {
if (
role === "primary" ||
role === "secondary" ||
role === "local_fallback" ||
role === "final_fallback"
) {
return `roles.${role}`;
}
return "roles.ollama";
}
function AiRouteStatusPanel({
status,
error,
}: {
status: AiRouteStatusResponse | null;
error: string | null;
}) {
const t = useTranslations("awooop.aiRouteStatus");
const policy = status?.policy_order ?? [];
const selectedProvider = status?.selected_provider ?? null;
const selectedModel = status?.selected_model ?? null;
const checkedAt = status?.checked_at
? new Date(status.checked_at).toLocaleTimeString("zh-TW", {
hour: "2-digit",
minute: "2-digit",
})
: "--";
return (
<section className="border border-[#e0ddd4] bg-white">
<div className="flex flex-wrap items-center justify-between gap-3 border-b border-[#e0ddd4] bg-[#faf9f3] px-4 py-3">
<div className="flex items-center gap-2">
<Cpu className="h-4 w-4 text-[#1f5b9b]" aria-hidden="true" />
<div>
<h3 className="text-sm font-semibold text-[#141413]">{t("title")}</h3>
<p className="text-xs text-[#77736a]">{t("subtitle")}</p>
</div>
</div>
<span className="border border-[#9bb6d9] bg-[#eef5ff] px-2 py-0.5 text-xs font-semibold text-[#1f5b9b]">
{selectedProvider
? t("selected", { provider: selectedProvider })
: t("selectedEmpty")}
</span>
</div>
{error ? (
<div className="px-4 py-4 text-sm text-[#9f2f25]">
{t("error", { error })}
</div>
) : !status ? (
<div className="px-4 py-4 text-sm text-[#5f5b52]">
{t("empty")}
</div>
) : (
<>
<div className="grid gap-px bg-[#e0ddd4] md:grid-cols-3">
<div className="bg-white px-4 py-3">
<p className="text-xs font-semibold text-[#77736a]">{t("fields.workload")}</p>
<p className="mt-2 font-mono text-sm font-semibold text-[#141413]">
{status.workload_type || "--"}
</p>
<p className="mt-2 text-xs leading-5 text-[#5f5b52]">
{t("fields.checkedAt", { time: checkedAt })}
</p>
</div>
<div className="bg-white px-4 py-3">
<p className="text-xs font-semibold text-[#77736a]">{t("fields.primary")}</p>
<p className="mt-2 truncate font-mono text-sm font-semibold text-[#141413]">
{selectedProvider ?? "--"}
</p>
<p className="mt-2 text-xs leading-5 text-[#5f5b52]">
{selectedModel
? t("fields.model", { model: selectedModel })
: t("fields.modelEmpty")}
</p>
</div>
<div className="bg-white px-4 py-3">
<p className="text-xs font-semibold text-[#77736a]">{t("fields.reason")}</p>
<p className="mt-2 line-clamp-2 text-xs leading-5 text-[#5f5b52]">
{status.route_error
? t("fields.routeError", { error: status.route_error })
: status.route_reason || "--"}
</p>
<p className="mt-2 font-mono text-xs text-[#77736a]">
{status.route_source}
</p>
</div>
</div>
<div className="grid gap-px bg-[#eee9dd] md:grid-cols-2 xl:grid-cols-4">
{policy.map((item) => {
const health = status.health?.[item.provider_name];
const healthKey = aiRouteHealthLabelKey(health?.status);
const roleKey = aiRouteRoleLabelKey(item.role);
const isSelected = selectedProvider === item.provider_name;
const latency = typeof health?.latency_ms === "number"
? `${health.latency_ms.toFixed(1)}ms`
: "--";
return (
<article key={`${item.priority}-${item.provider_name}`} className="bg-white px-4 py-3">
<div className="flex items-start justify-between gap-3">
<div className="min-w-0">
<p className="truncate font-mono text-xs font-semibold text-[#141413]">
{item.provider_name}
</p>
<p className="mt-1 text-xs text-[#77736a]">
{t(roleKey as never)}
</p>
</div>
<span
className={cn(
"shrink-0 border px-2 py-0.5 text-xs font-semibold",
isSelected
? "border-[#9bc7a4] bg-[#f0faf2] text-[#17602a]"
: item.runtime === "cloud"
? "border-[#d9b36f] bg-[#fff7e8] text-[#8a5a08]"
: "border-[#d8d3c7] bg-[#faf9f3] text-[#5f5b52]"
)}
>
{isSelected ? t("badges.active") : t("badges.standby")}
</span>
</div>
<div className="mt-3 space-y-1 text-xs leading-5 text-[#5f5b52]">
<p>{t("fields.health", { status: t(healthKey as never) })}</p>
<p>{t("fields.latency", { latency })}</p>
<p className="truncate font-mono text-[#77736a]">
{item.url || t("fields.noUrl")}
</p>
</div>
</article>
);
})}
</div>
</>
)}
</section>
);
}
// =============================================================================
// Main Component
// =============================================================================
@@ -1400,6 +1591,8 @@ export default function RunsPage() {
const [callbackEvents, setCallbackEvents] = useState<CallbackReplyEvent[]>([]);
const [callbackEventsTotal, setCallbackEventsTotal] = useState(0);
const [callbackEventsError, setCallbackEventsError] = useState<string | null>(null);
const [aiRouteStatus, setAiRouteStatus] = useState<AiRouteStatusResponse | null>(null);
const [aiRouteStatusError, setAiRouteStatusError] = useState<string | null>(null);
const [tenants, setTenants] = useState<Tenant[]>([]);
const [total, setTotal] = useState(0);
const [loading, setLoading] = useState(true);
@@ -1532,6 +1725,25 @@ export default function RunsPage() {
setCallbackEventsError(`HTTP ${callbackRes.status}`);
}
try {
const routeStatusRes = await fetch(
`${API_BASE}/api/v1/platform/ai-route-status?workload_type=deep_rca`
);
if (routeStatusRes.ok) {
const routeStatusData: AiRouteStatusResponse = await routeStatusRes.json();
setAiRouteStatus(routeStatusData);
setAiRouteStatusError(null);
} else {
setAiRouteStatus(null);
setAiRouteStatusError(`HTTP ${routeStatusRes.status}`);
}
} catch (routeStatusError) {
setAiRouteStatus(null);
setAiRouteStatusError(
routeStatusError instanceof Error ? routeStatusError.message : "route status failed"
);
}
setLastRefresh(new Date());
} catch (err) {
setError(err instanceof Error ? err.message : "載入失敗");
@@ -1721,6 +1933,11 @@ export default function RunsPage() {
})}
</section>
<AiRouteStatusPanel
status={aiRouteStatus}
error={aiRouteStatusError}
/>
<SourceDossierCoveragePanel
coverage={dossierCoverage}
error={dossierCoverageError}