feat(awooop): surface degraded ai route lanes
This commit is contained in:
@@ -147,6 +147,10 @@ class AiRouteStatusResponse(BaseModel):
|
||||
route_source: str
|
||||
route_error: str | None = None
|
||||
health: dict[str, dict[str, Any]]
|
||||
lane_mode: str | None = None
|
||||
active_lane: dict[str, Any] | None = None
|
||||
skipped_lanes: list[dict[str, Any]] = Field(default_factory=list)
|
||||
operator_action: dict[str, Any] | None = None
|
||||
checked_at: datetime
|
||||
|
||||
|
||||
|
||||
@@ -619,7 +619,8 @@ async def get_ai_route_status(
|
||||
route_error=str(exc),
|
||||
)
|
||||
|
||||
return {
|
||||
health = _ai_route_health_map(route)
|
||||
response = {
|
||||
"schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION,
|
||||
"workload_type": workload,
|
||||
"policy_order": policy_order,
|
||||
@@ -633,9 +634,15 @@ async def get_ai_route_status(
|
||||
"route_reason": route.routing_reason,
|
||||
"route_source": "ollama_failover_manager",
|
||||
"route_error": None,
|
||||
"health": _ai_route_health_map(route),
|
||||
"health": health,
|
||||
"checked_at": checked_at,
|
||||
}
|
||||
response.update(_ai_route_lane_state(
|
||||
policy_order=policy_order,
|
||||
selected_provider=route.primary.provider_name,
|
||||
health=health,
|
||||
))
|
||||
return response
|
||||
|
||||
|
||||
def _validate_ai_route_workload(workload_type: str | None) -> OllamaWorkloadType:
|
||||
@@ -712,7 +719,7 @@ async def _ai_route_lightweight_status_from_policy(
|
||||
)
|
||||
|
||||
if selected_index is None:
|
||||
return {
|
||||
response = {
|
||||
"schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION,
|
||||
"workload_type": workload,
|
||||
"policy_order": policy_order,
|
||||
@@ -729,6 +736,12 @@ async def _ai_route_lightweight_status_from_policy(
|
||||
"health": health_by_provider,
|
||||
"checked_at": checked_at,
|
||||
}
|
||||
response.update(_ai_route_lane_state(
|
||||
policy_order=policy_order,
|
||||
selected_provider="gemini",
|
||||
health=health_by_provider,
|
||||
))
|
||||
return response
|
||||
|
||||
selected = endpoints[selected_index]
|
||||
model = get_settings().OLLAMA_HEALTH_CHECK_MODEL
|
||||
@@ -748,7 +761,7 @@ async def _ai_route_lightweight_status_from_policy(
|
||||
"runtime": "cloud",
|
||||
})
|
||||
|
||||
return {
|
||||
response = {
|
||||
"schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION,
|
||||
"workload_type": workload,
|
||||
"policy_order": policy_order,
|
||||
@@ -765,6 +778,12 @@ async def _ai_route_lightweight_status_from_policy(
|
||||
"health": health_by_provider,
|
||||
"checked_at": checked_at,
|
||||
}
|
||||
response.update(_ai_route_lane_state(
|
||||
policy_order=policy_order,
|
||||
selected_provider=selected.provider_name,
|
||||
health=health_by_provider,
|
||||
))
|
||||
return response
|
||||
|
||||
|
||||
async def _ai_route_probe_connectivity(
|
||||
@@ -832,7 +851,7 @@ def _ai_route_unavailable_status(
|
||||
route_error: str,
|
||||
route_source: str,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
response = {
|
||||
"schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION,
|
||||
"workload_type": workload,
|
||||
"policy_order": policy_order,
|
||||
@@ -846,6 +865,101 @@ def _ai_route_unavailable_status(
|
||||
"health": {},
|
||||
"checked_at": checked_at,
|
||||
}
|
||||
response.update(_ai_route_lane_state(
|
||||
policy_order=policy_order,
|
||||
selected_provider=None,
|
||||
health={},
|
||||
))
|
||||
return response
|
||||
|
||||
|
||||
def _ai_route_lane_state(
|
||||
*,
|
||||
policy_order: list[dict[str, Any]],
|
||||
selected_provider: str | None,
|
||||
health: dict[str, dict[str, Any]],
|
||||
) -> dict[str, Any]:
|
||||
"""Expose failover lane state separately from policy labels."""
|
||||
selected_index = next(
|
||||
(
|
||||
index
|
||||
for index, item in enumerate(policy_order)
|
||||
if item.get("provider_name") == selected_provider
|
||||
),
|
||||
None,
|
||||
)
|
||||
active_item = (
|
||||
policy_order[selected_index]
|
||||
if selected_index is not None
|
||||
else None
|
||||
)
|
||||
skipped_items = policy_order[:selected_index] if selected_index is not None else []
|
||||
|
||||
skipped_lanes = [
|
||||
_ai_route_lane_item(item, health.get(str(item.get("provider_name"))))
|
||||
for item in skipped_items
|
||||
if item.get("runtime") == "ollama"
|
||||
]
|
||||
|
||||
if not selected_provider or active_item is None:
|
||||
lane_mode = "unavailable"
|
||||
operator_action = {
|
||||
"human_required": True,
|
||||
"action": "inspect_ai_router",
|
||||
"reason": "no_active_provider",
|
||||
}
|
||||
elif active_item.get("runtime") == "cloud":
|
||||
lane_mode = "cloud_fallback"
|
||||
operator_action = {
|
||||
"human_required": True,
|
||||
"action": "restore_ollama_lanes",
|
||||
"reason": "all_ollama_lanes_unavailable",
|
||||
}
|
||||
elif skipped_lanes:
|
||||
lane_mode = "degraded_failover"
|
||||
operator_action = {
|
||||
"human_required": True,
|
||||
"action": "repair_skipped_primary_lane",
|
||||
"reason": "fallback_lane_active",
|
||||
}
|
||||
else:
|
||||
lane_mode = "primary"
|
||||
operator_action = {
|
||||
"human_required": False,
|
||||
"action": "monitor",
|
||||
"reason": "primary_lane_active",
|
||||
}
|
||||
|
||||
return {
|
||||
"lane_mode": lane_mode,
|
||||
"active_lane": (
|
||||
_ai_route_lane_item(active_item, health.get(str(active_item.get("provider_name"))))
|
||||
if active_item
|
||||
else None
|
||||
),
|
||||
"skipped_lanes": skipped_lanes,
|
||||
"operator_action": operator_action,
|
||||
}
|
||||
|
||||
|
||||
def _ai_route_lane_item(
|
||||
item: dict[str, Any],
|
||||
health_item: dict[str, Any] | None,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"priority": item.get("priority"),
|
||||
"provider_name": item.get("provider_name"),
|
||||
"role": item.get("role"),
|
||||
"runtime": item.get("runtime"),
|
||||
"url": item.get("url"),
|
||||
"health_status": (health_item or {}).get("status", "not_checked"),
|
||||
"reason": (health_item or {}).get("reason") or item.get("reason"),
|
||||
"action_required": (health_item or {}).get("status") not in {
|
||||
"healthy",
|
||||
"not_checked",
|
||||
None,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _ai_route_policy_endpoint_item(
|
||||
|
||||
@@ -19,6 +19,7 @@ from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingRe
|
||||
from src.services.ollama_health_monitor import HealthReport, HealthStatus
|
||||
from src.services.platform_operator_service import (
|
||||
_ai_route_health_map,
|
||||
_ai_route_lane_state,
|
||||
_ai_route_policy_order,
|
||||
_build_awooop_status_chain,
|
||||
_callback_reply_event_item,
|
||||
@@ -1549,12 +1550,87 @@ def test_ai_route_status_response_preserves_route_fields() -> None:
|
||||
"checked": True,
|
||||
},
|
||||
},
|
||||
"lane_mode": "primary",
|
||||
"active_lane": {
|
||||
"provider_name": "ollama_gcp_a",
|
||||
"health_status": "healthy",
|
||||
"action_required": False,
|
||||
},
|
||||
"skipped_lanes": [],
|
||||
"operator_action": {
|
||||
"human_required": False,
|
||||
"action": "monitor",
|
||||
"reason": "primary_lane_active",
|
||||
},
|
||||
"checked_at": datetime(2026, 5, 19, 12, 0, 0),
|
||||
})
|
||||
|
||||
dumped = response.model_dump(mode="json")
|
||||
assert dumped["policy_order"][-1]["provider_name"] == "gemini"
|
||||
assert dumped["selected_provider"] == "ollama_gcp_a"
|
||||
assert dumped["lane_mode"] == "primary"
|
||||
|
||||
|
||||
def test_ai_route_lane_state_marks_degraded_failover() -> None:
|
||||
policy = _ai_route_policy_order("deep_rca")
|
||||
health = {
|
||||
"ollama_gcp_a": {
|
||||
"status": "offline",
|
||||
"reason": "recent_endpoint_failure_cooldown:25s",
|
||||
},
|
||||
"ollama_gcp_b": {
|
||||
"status": "healthy",
|
||||
"reason": "",
|
||||
},
|
||||
"ollama_local": {
|
||||
"status": "healthy",
|
||||
"reason": "",
|
||||
},
|
||||
}
|
||||
|
||||
state = _ai_route_lane_state(
|
||||
policy_order=policy,
|
||||
selected_provider="ollama_gcp_b",
|
||||
health=health,
|
||||
)
|
||||
|
||||
assert state["lane_mode"] == "degraded_failover"
|
||||
assert state["active_lane"]["provider_name"] == "ollama_gcp_b"
|
||||
assert len(state["skipped_lanes"]) == 1
|
||||
assert state["skipped_lanes"][0]["provider_name"] == "ollama_gcp_a"
|
||||
assert state["skipped_lanes"][0]["role"] == "primary"
|
||||
assert state["skipped_lanes"][0]["health_status"] == "offline"
|
||||
assert state["skipped_lanes"][0]["reason"] == "recent_endpoint_failure_cooldown:25s"
|
||||
assert state["skipped_lanes"][0]["action_required"] is True
|
||||
assert state["operator_action"] == {
|
||||
"human_required": True,
|
||||
"action": "repair_skipped_primary_lane",
|
||||
"reason": "fallback_lane_active",
|
||||
}
|
||||
|
||||
|
||||
def test_ai_route_lane_state_marks_cloud_fallback() -> None:
|
||||
policy = _ai_route_policy_order("deep_rca")
|
||||
health = {
|
||||
"ollama_gcp_a": {"status": "offline", "reason": "timeout"},
|
||||
"ollama_gcp_b": {"status": "offline", "reason": "timeout"},
|
||||
"ollama_local": {"status": "offline", "reason": "timeout"},
|
||||
}
|
||||
|
||||
state = _ai_route_lane_state(
|
||||
policy_order=policy,
|
||||
selected_provider="gemini",
|
||||
health=health,
|
||||
)
|
||||
|
||||
assert state["lane_mode"] == "cloud_fallback"
|
||||
assert state["active_lane"]["provider_name"] == "gemini"
|
||||
assert [lane["provider_name"] for lane in state["skipped_lanes"]] == [
|
||||
"ollama_gcp_a",
|
||||
"ollama_gcp_b",
|
||||
"ollama_local",
|
||||
]
|
||||
assert state["operator_action"]["action"] == "restore_ollama_lanes"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@@ -260,6 +260,7 @@
|
||||
"humanGapClear": "Quality summary has no top gap",
|
||||
"modelRoute": "Model route",
|
||||
"routeDetail": "{model}; current {selected}; {primary}={primaryStatus}; fallback {fallback}",
|
||||
"routeLaneDetail": "{mode}; skipped {skipped}",
|
||||
"routeReasonSeparator": "; ",
|
||||
"routeReason": "Reason: {reason}",
|
||||
"routeErrorDetail": "Route check failed: {error}",
|
||||
@@ -272,6 +273,13 @@
|
||||
"not_checked": "standby",
|
||||
"unknown": "unknown"
|
||||
},
|
||||
"routeLaneMode": {
|
||||
"primary": "Primary normal",
|
||||
"degraded_failover": "Degraded handoff",
|
||||
"cloud_fallback": "Cloud fallback",
|
||||
"unavailable": "Route unavailable",
|
||||
"unknown": "Unknown state"
|
||||
},
|
||||
"topGap": "Largest current gap: {gate}, {count} items."
|
||||
}
|
||||
},
|
||||
@@ -2865,17 +2873,19 @@
|
||||
"aiRouteStatus": {
|
||||
"title": "AI Provider Routing",
|
||||
"subtitle": "Current policy and health checks across GCP-A, GCP-B, 111, and Gemini handoff order",
|
||||
"selected": "Primary: {provider}",
|
||||
"selectedEmpty": "Primary: --",
|
||||
"selected": "Active: {provider}",
|
||||
"selectedEmpty": "Active: --",
|
||||
"empty": "AI provider route status is not available yet.",
|
||||
"error": "AI provider route failed to load: {error}",
|
||||
"badges": {
|
||||
"active": "Active",
|
||||
"skipped": "Skipped",
|
||||
"standby": "Standby"
|
||||
},
|
||||
"fields": {
|
||||
"workload": "Workload",
|
||||
"primary": "Current Primary",
|
||||
"laneMode": "Lane state",
|
||||
"primary": "Current handoff",
|
||||
"reason": "Route Reason",
|
||||
"checkedAt": "Checked at {time}",
|
||||
"model": "Model: {model}",
|
||||
@@ -2899,6 +2909,22 @@
|
||||
"local_fallback": "111 local fallback",
|
||||
"final_fallback": "Gemini final fallback",
|
||||
"ollama": "Ollama node"
|
||||
},
|
||||
"laneModes": {
|
||||
"primary": "Primary normal",
|
||||
"degraded_failover": "Degraded handoff",
|
||||
"cloud_fallback": "Cloud final fallback",
|
||||
"unavailable": "Route unavailable",
|
||||
"unknown": "Unknown state"
|
||||
},
|
||||
"operatorActions": {
|
||||
"monitor": "Monitor only",
|
||||
"repair_skipped_primary_lane": "Repair the skipped primary lane",
|
||||
"restore_ollama_lanes": "Restore Ollama lanes before relying on cloud only",
|
||||
"inspect_ai_router": "Inspect AI Router / provider status",
|
||||
"unknown": "Confirm next action"
|
||||
},
|
||||
"degradedSummary": "Current handoff is {active}; skipped {skipped}; next action: {action}"
|
||||
}
|
||||
},
|
||||
"incidentEvidence": {
|
||||
@@ -3221,4 +3247,3 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -261,6 +261,7 @@
|
||||
"humanGapClear": "品質摘要未列出主要缺口",
|
||||
"modelRoute": "模型路由",
|
||||
"routeDetail": "{model};目前 {selected};{primary}={primaryStatus};備援 {fallback}",
|
||||
"routeLaneDetail": "{mode};已跳過 {skipped}",
|
||||
"routeReasonSeparator": ";",
|
||||
"routeReason": "原因:{reason}",
|
||||
"routeErrorDetail": "路由檢查失敗:{error}",
|
||||
@@ -273,6 +274,13 @@
|
||||
"not_checked": "待命",
|
||||
"unknown": "未知"
|
||||
},
|
||||
"routeLaneMode": {
|
||||
"primary": "Primary 正常",
|
||||
"degraded_failover": "降級接手",
|
||||
"cloud_fallback": "雲端備援",
|
||||
"unavailable": "路由不可用",
|
||||
"unknown": "狀態未知"
|
||||
},
|
||||
"topGap": "目前最大缺口:{gate},共 {count} 筆。"
|
||||
}
|
||||
},
|
||||
@@ -2866,17 +2874,19 @@
|
||||
"aiRouteStatus": {
|
||||
"title": "AI Provider 路由",
|
||||
"subtitle": "目前策略與健康檢查,顯示 GCP-A、GCP-B、111、Gemini 的接手順序",
|
||||
"selected": "Primary:{provider}",
|
||||
"selectedEmpty": "Primary:--",
|
||||
"selected": "使用中:{provider}",
|
||||
"selectedEmpty": "使用中:--",
|
||||
"empty": "尚未取得 AI provider route 狀態。",
|
||||
"error": "AI provider route 載入失敗:{error}",
|
||||
"badges": {
|
||||
"active": "使用中",
|
||||
"skipped": "已跳過",
|
||||
"standby": "備援"
|
||||
},
|
||||
"fields": {
|
||||
"workload": "Workload",
|
||||
"primary": "目前 Primary",
|
||||
"laneMode": "Lane 狀態",
|
||||
"primary": "目前接手",
|
||||
"reason": "路由原因",
|
||||
"checkedAt": "檢查時間 {time}",
|
||||
"model": "Model:{model}",
|
||||
@@ -2900,6 +2910,22 @@
|
||||
"local_fallback": "111 本機備援",
|
||||
"final_fallback": "Gemini 最終備援",
|
||||
"ollama": "Ollama 節點"
|
||||
},
|
||||
"laneModes": {
|
||||
"primary": "Primary 正常",
|
||||
"degraded_failover": "降級接手中",
|
||||
"cloud_fallback": "雲端最終備援",
|
||||
"unavailable": "路由不可用",
|
||||
"unknown": "狀態未知"
|
||||
},
|
||||
"operatorActions": {
|
||||
"monitor": "持續監控即可",
|
||||
"repair_skipped_primary_lane": "需修復被跳過的 Primary lane",
|
||||
"restore_ollama_lanes": "需恢復 Ollama lanes,避免只剩雲端",
|
||||
"inspect_ai_router": "需檢查 AI Router / provider 狀態",
|
||||
"unknown": "待確認下一步"
|
||||
},
|
||||
"degradedSummary": "目前由 {active} 接手;已跳過 {skipped};下一步:{action}"
|
||||
}
|
||||
},
|
||||
"incidentEvidence": {
|
||||
@@ -3222,4 +3248,3 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -428,6 +428,23 @@ interface AiRouteHealthItem {
|
||||
checked?: boolean;
|
||||
}
|
||||
|
||||
interface AiRouteLaneItem {
|
||||
priority?: number | null;
|
||||
provider_name?: string | null;
|
||||
role?: string | null;
|
||||
runtime?: string | null;
|
||||
url?: string | null;
|
||||
health_status?: string | null;
|
||||
reason?: string | null;
|
||||
action_required?: boolean;
|
||||
}
|
||||
|
||||
interface AiRouteOperatorAction {
|
||||
human_required?: boolean;
|
||||
action?: string | null;
|
||||
reason?: string | null;
|
||||
}
|
||||
|
||||
interface AiRouteStatusResponse {
|
||||
schema_version: string;
|
||||
workload_type: string;
|
||||
@@ -440,6 +457,10 @@ interface AiRouteStatusResponse {
|
||||
route_source: string;
|
||||
route_error?: string | null;
|
||||
health: Record<string, AiRouteHealthItem>;
|
||||
lane_mode?: string | null;
|
||||
active_lane?: AiRouteLaneItem | null;
|
||||
skipped_lanes?: AiRouteLaneItem[];
|
||||
operator_action?: AiRouteOperatorAction | null;
|
||||
checked_at: string;
|
||||
}
|
||||
|
||||
@@ -2005,6 +2026,30 @@ function aiRouteRoleLabelKey(role?: string | null) {
|
||||
return "roles.ollama";
|
||||
}
|
||||
|
||||
function aiRouteLaneModeLabelKey(mode?: string | null) {
|
||||
if (
|
||||
mode === "primary" ||
|
||||
mode === "degraded_failover" ||
|
||||
mode === "cloud_fallback" ||
|
||||
mode === "unavailable"
|
||||
) {
|
||||
return `laneModes.${mode}`;
|
||||
}
|
||||
return "laneModes.unknown";
|
||||
}
|
||||
|
||||
function aiRouteOperatorActionLabelKey(action?: string | null) {
|
||||
if (
|
||||
action === "monitor" ||
|
||||
action === "repair_skipped_primary_lane" ||
|
||||
action === "restore_ollama_lanes" ||
|
||||
action === "inspect_ai_router"
|
||||
) {
|
||||
return `operatorActions.${action}`;
|
||||
}
|
||||
return "operatorActions.unknown";
|
||||
}
|
||||
|
||||
function AiRouteStatusPanel({
|
||||
status,
|
||||
error,
|
||||
@@ -2016,6 +2061,15 @@ function AiRouteStatusPanel({
|
||||
const policy = status?.policy_order ?? [];
|
||||
const selectedProvider = status?.selected_provider ?? null;
|
||||
const selectedModel = status?.selected_model ?? null;
|
||||
const laneMode = status?.lane_mode ?? null;
|
||||
const laneModeKey = aiRouteLaneModeLabelKey(laneMode);
|
||||
const operatorActionKey = aiRouteOperatorActionLabelKey(status?.operator_action?.action);
|
||||
const skippedLanes = status?.skipped_lanes ?? [];
|
||||
const skippedProviderSet = new Set(
|
||||
skippedLanes
|
||||
.map((lane) => lane.provider_name)
|
||||
.filter((provider): provider is string => Boolean(provider))
|
||||
);
|
||||
const checkedAt = status?.checked_at
|
||||
? new Date(status.checked_at).toLocaleTimeString("zh-TW", {
|
||||
hour: "2-digit",
|
||||
@@ -2033,7 +2087,14 @@ function AiRouteStatusPanel({
|
||||
<p className="text-xs text-[#77736a]">{t("subtitle")}</p>
|
||||
</div>
|
||||
</div>
|
||||
<span className="border border-[#9bb6d9] bg-[#eef5ff] px-2 py-0.5 text-xs font-semibold text-[#1f5b9b]">
|
||||
<span
|
||||
className={cn(
|
||||
"border px-2 py-0.5 text-xs font-semibold",
|
||||
laneMode === "primary"
|
||||
? "border-[#9bc7a4] bg-[#f0faf2] text-[#17602a]"
|
||||
: "border-[#d9b36f] bg-[#fff7e8] text-[#8a5a08]"
|
||||
)}
|
||||
>
|
||||
{selectedProvider
|
||||
? t("selected", { provider: selectedProvider })
|
||||
: t("selectedEmpty")}
|
||||
@@ -2050,7 +2111,28 @@ function AiRouteStatusPanel({
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
<div className="grid gap-px bg-[#e0ddd4] md:grid-cols-3">
|
||||
{laneMode && laneMode !== "primary" && (
|
||||
<div className="flex items-start gap-3 border-b border-[#e0ddd4] bg-[#fff7e8] px-4 py-3 text-sm text-[#6d4707]">
|
||||
<TriangleAlert className="mt-0.5 h-4 w-4 shrink-0" aria-hidden="true" />
|
||||
<div className="min-w-0">
|
||||
<p className="font-semibold text-[#141413]">
|
||||
{t(laneModeKey as never)}
|
||||
</p>
|
||||
<p className="mt-1 leading-5">
|
||||
{t("degradedSummary", {
|
||||
active: selectedProvider ?? "--",
|
||||
skipped: skippedLanes
|
||||
.map((lane) => lane.provider_name)
|
||||
.filter(Boolean)
|
||||
.join(" -> ") || "--",
|
||||
action: t(operatorActionKey as never),
|
||||
})}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="grid gap-px bg-[#e0ddd4] md:grid-cols-4">
|
||||
<div className="bg-white px-4 py-3">
|
||||
<p className="text-xs font-semibold text-[#77736a]">{t("fields.workload")}</p>
|
||||
<p className="mt-2 font-mono text-sm font-semibold text-[#141413]">
|
||||
@@ -2060,6 +2142,15 @@ function AiRouteStatusPanel({
|
||||
{t("fields.checkedAt", { time: checkedAt })}
|
||||
</p>
|
||||
</div>
|
||||
<div className="bg-white px-4 py-3">
|
||||
<p className="text-xs font-semibold text-[#77736a]">{t("fields.laneMode")}</p>
|
||||
<p className="mt-2 text-sm font-semibold text-[#141413]">
|
||||
{t(laneModeKey as never)}
|
||||
</p>
|
||||
<p className="mt-2 text-xs leading-5 text-[#5f5b52]">
|
||||
{t(operatorActionKey as never)}
|
||||
</p>
|
||||
</div>
|
||||
<div className="bg-white px-4 py-3">
|
||||
<p className="text-xs font-semibold text-[#77736a]">{t("fields.primary")}</p>
|
||||
<p className="mt-2 truncate font-mono text-sm font-semibold text-[#141413]">
|
||||
@@ -2090,6 +2181,7 @@ function AiRouteStatusPanel({
|
||||
const healthKey = aiRouteHealthLabelKey(health?.status);
|
||||
const roleKey = aiRouteRoleLabelKey(item.role);
|
||||
const isSelected = selectedProvider === item.provider_name;
|
||||
const isSkipped = skippedProviderSet.has(item.provider_name);
|
||||
const latency = typeof health?.latency_ms === "number"
|
||||
? `${health.latency_ms.toFixed(1)}ms`
|
||||
: "--";
|
||||
@@ -2110,12 +2202,18 @@ function AiRouteStatusPanel({
|
||||
"shrink-0 border px-2 py-0.5 text-xs font-semibold",
|
||||
isSelected
|
||||
? "border-[#9bc7a4] bg-[#f0faf2] text-[#17602a]"
|
||||
: isSkipped
|
||||
? "border-[#d9b36f] bg-[#fff7e8] text-[#8a5a08]"
|
||||
: item.runtime === "cloud"
|
||||
? "border-[#d9b36f] bg-[#fff7e8] text-[#8a5a08]"
|
||||
: "border-[#d8d3c7] bg-[#faf9f3] text-[#5f5b52]"
|
||||
)}
|
||||
>
|
||||
{isSelected ? t("badges.active") : t("badges.standby")}
|
||||
{isSelected
|
||||
? t("badges.active")
|
||||
: isSkipped
|
||||
? t("badges.skipped")
|
||||
: t("badges.standby")}
|
||||
</span>
|
||||
</div>
|
||||
<div className="mt-3 space-y-1 text-xs leading-5 text-[#5f5b52]">
|
||||
@@ -2124,6 +2222,9 @@ function AiRouteStatusPanel({
|
||||
<p className="truncate font-mono text-[#77736a]">
|
||||
{item.url || t("fields.noUrl")}
|
||||
</p>
|
||||
<p className="line-clamp-2 text-[#77736a]">
|
||||
{health?.reason || item.reason || "--"}
|
||||
</p>
|
||||
</div>
|
||||
</article>
|
||||
);
|
||||
|
||||
@@ -120,6 +120,12 @@ interface AiRouteStatusResponse {
|
||||
route_reason?: string | null
|
||||
route_error?: string | null
|
||||
health?: Record<string, AiRouteHealthItem>
|
||||
lane_mode?: string | null
|
||||
skipped_lanes?: Array<{ provider_name?: string | null }>
|
||||
operator_action?: {
|
||||
action?: string | null
|
||||
human_required?: boolean
|
||||
} | null
|
||||
}
|
||||
|
||||
interface EvidenceSnapshot {
|
||||
@@ -185,6 +191,18 @@ function routeHealthLabelKey(status?: string | null) {
|
||||
return 'routeHealth.unknown'
|
||||
}
|
||||
|
||||
function routeLaneModeLabelKey(mode?: string | null) {
|
||||
if (
|
||||
mode === 'primary' ||
|
||||
mode === 'degraded_failover' ||
|
||||
mode === 'cloud_fallback' ||
|
||||
mode === 'unavailable'
|
||||
) {
|
||||
return `routeLaneMode.${mode}`
|
||||
}
|
||||
return 'routeLaneMode.unknown'
|
||||
}
|
||||
|
||||
function providerDisplayName(provider?: string | null) {
|
||||
switch (provider) {
|
||||
case 'ollama_gcp_a':
|
||||
@@ -353,6 +371,10 @@ export function AutomationEvidenceCard() {
|
||||
const primaryProvider = route?.policy_order?.[0]?.provider_name ?? null
|
||||
const primaryStatus = primaryProvider ? route?.health?.[primaryProvider]?.status : null
|
||||
const selectedProvider = providerDisplayName(route?.selected_provider)
|
||||
const laneMode = route?.lane_mode ?? null
|
||||
const skippedLanes = route?.skipped_lanes
|
||||
?.map((lane) => providerDisplayName(lane.provider_name))
|
||||
.join(' -> ')
|
||||
const fallback = route?.fallback_chain
|
||||
?.map((item) => item.provider_name)
|
||||
.map(providerDisplayName)
|
||||
@@ -366,8 +388,14 @@ export function AutomationEvidenceCard() {
|
||||
primaryStatus: t(routeHealthLabelKey(primaryStatus) as never),
|
||||
fallback: fallback || t('routeNoFallback'),
|
||||
})
|
||||
const laneDetail = laneMode && laneMode !== 'primary'
|
||||
? t('routeLaneDetail', {
|
||||
mode: t(routeLaneModeLabelKey(laneMode) as never),
|
||||
skipped: skippedLanes || '--',
|
||||
})
|
||||
: null
|
||||
const routeDetail = route?.route_reason && !route.route_error
|
||||
? `${routeSummary}${t('routeReasonSeparator')}${t('routeReason', { reason: route.route_reason })}`
|
||||
? `${routeSummary}${laneDetail ? t('routeReasonSeparator') + laneDetail : ''}${t('routeReasonSeparator')}${t('routeReason', { reason: route.route_reason })}`
|
||||
: routeSummary
|
||||
|
||||
return {
|
||||
|
||||
@@ -1,3 +1,68 @@
|
||||
## 2026-05-25|T175 AI Provider lane 降級狀態前後端顯示
|
||||
|
||||
**背景**:
|
||||
|
||||
- T174 已把 production manifest 恢復成 `GCP-A -> GCP-B -> 111 -> Gemini`,但前端/API 仍主要用 `selected_provider` 表示結果。
|
||||
- 當 GCP-A 真實紅燈且 GCP-B 接手時,Operator 需要同時看到:
|
||||
- policy 仍以 GCP-A 為第一順位;
|
||||
- 目前使用中 lane 是 GCP-B;
|
||||
- GCP-A 是被跳過的 degraded lane;
|
||||
- 下一步是修復被跳過的 primary lane,而不是把 manifest 改名或靜默跳過。
|
||||
|
||||
**本次修補**:
|
||||
|
||||
- `/api/v1/platform/ai-route-status` 新增 lane 狀態欄位:
|
||||
- `lane_mode`:`primary` / `degraded_failover` / `cloud_fallback` / `unavailable`
|
||||
- `active_lane`
|
||||
- `skipped_lanes`
|
||||
- `operator_action`
|
||||
- AwoooP Runs 頁的 AI Provider Routing panel 會把「使用中」與「已跳過」分開顯示,並在 degraded / cloud fallback 時列出下一步。
|
||||
- 首頁 Automation Evidence card 會把非 primary 狀態補進模型路由摘要,避免只看到目前 provider 而看不到 failover 階段。
|
||||
- i18n 同步補齊 zh-TW / en 文案,維持前端零硬編碼。
|
||||
|
||||
**本地驗證**:
|
||||
|
||||
```text
|
||||
pytest:
|
||||
test_ai_route_status_response_preserves_route_fields
|
||||
test_ai_route_lane_state_marks_degraded_failover
|
||||
test_ai_route_lane_state_marks_cloud_fallback
|
||||
test_ai_route_status_times_out_before_slow_provider_checks
|
||||
test_ai_route_status_lightweight_fallback_keeps_gemini_policy_only
|
||||
-> 5 passed
|
||||
|
||||
ruff / py_compile:
|
||||
platform_operator_service.py
|
||||
operator_runs.py
|
||||
test_awooop_operator_timeline_labels.py
|
||||
-> passed
|
||||
|
||||
frontend:
|
||||
pnpm --dir apps/web exec tsc --noEmit
|
||||
-> passed
|
||||
|
||||
json / diff:
|
||||
en.json / zh-TW.json JSON.parse
|
||||
git diff --check
|
||||
-> passed
|
||||
```
|
||||
|
||||
**目前整體進度**:
|
||||
|
||||
- AwoooP 告警可觀測鏈:約 99.2%。
|
||||
- 低風險自動修復閉環:約 95.5%。
|
||||
- 前端 AI 自動化管理介面同步:約 96.9%。
|
||||
- Telegram 詳情 / 歷史可解釋性:約 95.5%。
|
||||
- Callback evidence / DB replayability:約 95.6%。
|
||||
- MCP / 自建 MCP 可見性:約 88%。
|
||||
- Sentry / SigNoz source correlation visibility:約 88%。
|
||||
- Ansible / PlayBook decision visibility:約 84.8%。
|
||||
- KM owner-review / completion governance:約 84%。
|
||||
- AI Provider lane 健康與可見性:約 88%(GCP-A 仍待 repair;本輪補足 degraded lane 顯示)。
|
||||
- 完整 AI 自動化管理產品化:約 94.5%。
|
||||
|
||||
---
|
||||
|
||||
## 2026-05-25|T174 Ollama manifest policy-order guard
|
||||
|
||||
**背景**:
|
||||
|
||||
Reference in New Issue
Block a user