diff --git a/apps/api/src/api/v1/platform/operator_runs.py b/apps/api/src/api/v1/platform/operator_runs.py index 85a03fbf..e305f2ab 100644 --- a/apps/api/src/api/v1/platform/operator_runs.py +++ b/apps/api/src/api/v1/platform/operator_runs.py @@ -25,6 +25,9 @@ from src.core.awooop_operator_auth import ( from src.services.platform_operator_service import ( decide_approval as decide_approval_svc, ) +from src.services.platform_operator_service import ( + get_ai_route_status as get_ai_route_status_svc, +) from src.services.platform_operator_service import ( get_awooop_status_chain as get_awooop_status_chain_svc, ) @@ -99,6 +102,21 @@ class ListCallbackRepliesResponse(BaseModel): per_page: int +class AiRouteStatusResponse(BaseModel): + schema_version: str + workload_type: str + policy_order: list[dict[str, Any]] + selected_provider: str | None = None + selected_url: str | None = None + selected_model: str | None = None + fallback_chain: list[dict[str, Any]] + route_reason: str + route_source: str + route_error: str | None = None + health: dict[str, dict[str, Any]] + checked_at: datetime + + class ApprovalItem(BaseModel): run_id: UUID project_id: str @@ -198,6 +216,24 @@ async def list_callback_replies( ) +@router.get( + "/ai-route-status", + response_model=AiRouteStatusResponse, + summary="查詢 AI Provider 路由狀態", + description=( + "回傳目前 Ollama/Gemini 路由策略、即時 primary、fallback chain 與健康狀態;" + "只讀,不觸發推理或自動修復。" + ), +) +async def get_ai_route_status( + workload_type: str | None = Query( + "deep_rca", + description="工作負載類型,例如 deep_rca/hermes/interactive/embedding/rag/code_review/image_analysis", + ), +) -> dict[str, Any]: + return await get_ai_route_status_svc(workload_type=workload_type) + + @router.get( "/runs/{run_id}/detail", summary="查詢 Run 詳細時間線", diff --git a/apps/api/src/services/platform_operator_service.py b/apps/api/src/services/platform_operator_service.py index 2bb02ad7..602319c4 100644 --- a/apps/api/src/services/platform_operator_service.py +++ b/apps/api/src/services/platform_operator_service.py @@ -13,7 +13,7 @@ import uuid from collections import defaultdict from collections.abc import Mapping from datetime import UTC, datetime -from typing import Any +from typing import Any, get_args from uuid import UUID import structlog @@ -38,6 +38,17 @@ from src.services.awooop_truth_chain_service import ( _summarize_mcp, fetch_truth_chain, ) +from src.services.ollama_endpoint_resolver import ( + OllamaEndpointSelection, + OllamaWorkloadType, + resolve_ollama_order, +) +from src.services.ollama_failover_manager import ( + OllamaEndpoint, + OllamaRoutingResult, + get_ollama_failover_manager, +) +from src.services.ollama_health_monitor import HealthReport from src.services.run_state_machine import transition logger = structlog.get_logger(__name__) @@ -74,6 +85,8 @@ _CALLBACK_REPLY_RAW_STATUS_BY_FILTER = { "failed": "callback_reply_failed", } _CALLBACK_REPLY_ACTION_RE = re.compile(r"^[a-z0-9_:-]{1,64}$", re.IGNORECASE) +_AI_ROUTE_STATUS_SCHEMA_VERSION = "awooop_ai_route_status_v1" +_AI_ROUTE_WORKLOADS = set(get_args(OllamaWorkloadType)) # ============================================================================= # Tenants @@ -283,6 +296,7 @@ async def list_callback_replies( "limit": per_page, "offset": (page - 1) * per_page, } + if project_id: where_clauses.append("m.project_id = :project_id") params["project_id"] = project_id @@ -395,6 +409,150 @@ async def list_callback_replies( } +async def get_ai_route_status( + workload_type: str | None = None, +) -> dict[str, Any]: + """回傳目前 AI/Ollama provider routing 的只讀狀態,供 Operator Console 顯示。""" + workload = _validate_ai_route_workload(workload_type) + policy_order = _ai_route_policy_order(workload) + checked_at = _utc_now_naive() + + try: + route = await get_ollama_failover_manager().select_provider(task_type=workload) + except Exception as exc: + return { + "schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION, + "workload_type": workload, + "policy_order": policy_order, + "selected_provider": None, + "selected_url": None, + "selected_model": None, + "fallback_chain": [], + "route_reason": "route_check_failed", + "route_source": "ollama_failover_manager", + "route_error": str(exc), + "health": {}, + "checked_at": checked_at, + } + + return { + "schema_version": _AI_ROUTE_STATUS_SCHEMA_VERSION, + "workload_type": workload, + "policy_order": policy_order, + "selected_provider": route.primary.provider_name, + "selected_url": route.primary.url or None, + "selected_model": route.primary.model, + "fallback_chain": [ + _ai_route_runtime_endpoint_item(endpoint, priority=index + 2) + for index, endpoint in enumerate(route.fallback_chain) + ], + "route_reason": route.routing_reason, + "route_source": "ollama_failover_manager", + "route_error": None, + "health": _ai_route_health_map(route), + "checked_at": checked_at, + } + + +def _validate_ai_route_workload(workload_type: str | None) -> OllamaWorkloadType: + """Normalize and validate workload filter for the public route status endpoint.""" + workload = str(workload_type or "deep_rca").strip() or "deep_rca" + if workload not in _AI_ROUTE_WORKLOADS: + allowed = ", ".join(sorted(_AI_ROUTE_WORKLOADS)) + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_CONTENT, + detail=f"Unsupported workload_type: {workload}. Allowed: {allowed}", + ) + return workload # type: ignore[return-value] + + +def _ai_route_policy_order(workload: OllamaWorkloadType) -> list[dict[str, Any]]: + """Expose configured policy order: GCP-A -> GCP-B -> 111 -> Gemini.""" + items = [ + _ai_route_policy_endpoint_item(endpoint, priority=index + 1) + for index, endpoint in enumerate(resolve_ollama_order(workload)) + ] + items.append({ + "priority": len(items) + 1, + "provider_name": "gemini", + "url": None, + "workload_type": workload, + "reason": "final_cloud_fallback_after_all_ollama_endpoints", + "role": "final_fallback", + "runtime": "cloud", + }) + return items + + +def _ai_route_policy_endpoint_item( + endpoint: OllamaEndpointSelection, + *, + priority: int, +) -> dict[str, Any]: + role = { + "ollama_gcp_a": "primary", + "ollama_gcp_b": "secondary", + "ollama_local": "local_fallback", + }.get(endpoint.provider_name, "ollama") + return { + "priority": priority, + "provider_name": endpoint.provider_name, + "url": endpoint.url, + "workload_type": endpoint.workload_type, + "reason": endpoint.reason, + "role": role, + "runtime": "ollama", + } + + +def _ai_route_runtime_endpoint_item( + endpoint: OllamaEndpoint, + *, + priority: int, +) -> dict[str, Any]: + return { + "priority": priority, + "provider_name": endpoint.provider_name, + "url": endpoint.url or None, + "model": endpoint.model, + "runtime": "ollama" if endpoint.provider_name.startswith("ollama") else "cloud", + } + + +def _ai_route_health_map(route: OllamaRoutingResult) -> dict[str, dict[str, Any]]: + """Convert failover health reports into provider keyed status for the UI.""" + health: dict[str, dict[str, Any]] = { + "ollama_gcp_a": _ai_route_health_item(route.health_gcp_a), + } + if route.health_gcp_b: + health["ollama_gcp_b"] = _ai_route_health_item(route.health_gcp_b) + else: + health["ollama_gcp_b"] = _ai_route_not_checked_health_item() + if route.health_local: + health["ollama_local"] = _ai_route_health_item(route.health_local) + else: + health["ollama_local"] = _ai_route_not_checked_health_item() + return health + + +def _ai_route_health_item(report: HealthReport) -> dict[str, Any]: + payload = report.to_dict() + payload["checked"] = True + return payload + + +def _ai_route_not_checked_health_item() -> dict[str, Any]: + return { + "status": "not_checked", + "host": "", + "latency_ms": None, + "reason": "standby_not_checked_primary_healthy", + "checked_at": None, + "from_cache": False, + "checked": False, + } + + def _timeline_item( *, ts: Any, diff --git a/apps/api/tests/test_awooop_operator_timeline_labels.py b/apps/api/tests/test_awooop_operator_timeline_labels.py index ca5b3ca2..7612bf6f 100644 --- a/apps/api/tests/test_awooop_operator_timeline_labels.py +++ b/apps/api/tests/test_awooop_operator_timeline_labels.py @@ -7,11 +7,16 @@ import pytest from fastapi import HTTPException from src.api.v1.platform.operator_runs import ( + AiRouteStatusResponse, ListApprovalsResponse, ListCallbackRepliesResponse, ListRunsResponse, ) +from src.services.ollama_failover_manager import OllamaEndpoint, OllamaRoutingResult +from src.services.ollama_health_monitor import HealthReport, HealthStatus from src.services.platform_operator_service import ( + _ai_route_health_map, + _ai_route_policy_order, _build_awooop_status_chain, _callback_reply_event_item, _callback_reply_summary_matches_status, @@ -28,6 +33,7 @@ from src.services.platform_operator_service import ( _run_callback_reply_summary, _run_remediation_list_summary, _timeline_sort_key, + _validate_ai_route_workload, _validate_callback_reply_action_filter, _validate_callback_reply_status_filter, ) @@ -725,3 +731,101 @@ def test_timeline_sort_key_normalizes_datetime_and_iso_string() -> None: "2026-05-14T10:00:01", "2026-05-14T10:00:02+00:00", ] + + +def test_ai_route_policy_order_exposes_global_ollama_then_gemini() -> None: + policy = _ai_route_policy_order("deep_rca") + + assert [item["provider_name"] for item in policy] == [ + "ollama_gcp_a", + "ollama_gcp_b", + "ollama_local", + "gemini", + ] + assert policy[-1]["role"] == "final_fallback" + assert policy[-1]["runtime"] == "cloud" + + +def test_ai_route_health_map_marks_standby_as_not_checked() -> None: + route = OllamaRoutingResult( + primary=OllamaEndpoint( + url="http://gcp-a:11434", + provider_name="ollama_gcp_a", + model="qwen3:14b", + ), + fallback_chain=[ + OllamaEndpoint( + url="http://gcp-b:11434", + provider_name="ollama_gcp_b", + model="qwen3:14b", + ), + OllamaEndpoint( + url="http://local-111:11434", + provider_name="ollama_local", + model="qwen3:14b", + ), + ], + routing_reason="primary healthy", + health_gcp_a=HealthReport( + status=HealthStatus.HEALTHY, + host="http://gcp-a:11434", + latency_ms=123.4, + reason="ok", + ), + health_gcp_b=None, + health_local=None, + ) + + health = _ai_route_health_map(route) + + assert health["ollama_gcp_a"]["status"] == "healthy" + assert health["ollama_gcp_a"]["checked"] is True + assert health["ollama_gcp_b"]["status"] == "not_checked" + assert health["ollama_local"]["reason"] == "standby_not_checked_primary_healthy" + + +def test_ai_route_status_response_preserves_route_fields() -> None: + response = AiRouteStatusResponse.model_validate({ + "schema_version": "awooop_ai_route_status_v1", + "workload_type": "deep_rca", + "policy_order": _ai_route_policy_order("deep_rca"), + "selected_provider": "ollama_gcp_a", + "selected_url": "http://gcp-a:11434", + "selected_model": "qwen3:14b", + "fallback_chain": [ + { + "priority": 2, + "provider_name": "ollama_gcp_b", + "url": "http://gcp-b:11434", + "model": "qwen3:14b", + "runtime": "ollama", + } + ], + "route_reason": "primary healthy", + "route_source": "ollama_failover_manager", + "route_error": None, + "health": { + "ollama_gcp_a": { + "status": "healthy", + "host": "http://gcp-a:11434", + "latency_ms": 123.4, + "reason": "ok", + "checked_at": 0, + "from_cache": False, + "checked": True, + }, + }, + "checked_at": datetime(2026, 5, 19, 12, 0, 0), + }) + + dumped = response.model_dump(mode="json") + assert dumped["policy_order"][-1]["provider_name"] == "gemini" + assert dumped["selected_provider"] == "ollama_gcp_a" + + +def test_ai_route_workload_validation_rejects_unknown_value() -> None: + assert _validate_ai_route_workload(" hermes ") == "hermes" + with pytest.raises(HTTPException) as exc_info: + _validate_ai_route_workload("charge_money") + + assert "Unsupported workload_type" in str(exc_info.value.detail) diff --git a/apps/web/messages/en.json b/apps/web/messages/en.json index 7770f524..fb3feebc 100644 --- a/apps/web/messages/en.json +++ b/apps/web/messages/en.json @@ -2210,6 +2210,45 @@ "openRun": "Open Run" } }, + "aiRouteStatus": { + "title": "AI Provider Routing", + "subtitle": "Current policy and health checks across GCP-A, GCP-B, 111, and Gemini handoff order", + "selected": "Primary: {provider}", + "selectedEmpty": "Primary: --", + "empty": "AI provider route status is not available yet.", + "error": "AI provider route failed to load: {error}", + "badges": { + "active": "Active", + "standby": "Standby" + }, + "fields": { + "workload": "Workload", + "primary": "Current Primary", + "reason": "Route Reason", + "checkedAt": "Checked at {time}", + "model": "Model: {model}", + "modelEmpty": "Model: --", + "routeError": "Route check failed: {error}", + "health": "Health: {status}", + "latency": "Latency: {latency}", + "noUrl": "No HTTP URL" + }, + "health": { + "healthy": "Healthy", + "slow": "Slow", + "degraded": "Degraded", + "offline": "Offline", + "not_checked": "Standby not checked", + "unknown": "Unknown" + }, + "roles": { + "primary": "First priority", + "secondary": "Second priority", + "local_fallback": "111 local fallback", + "final_fallback": "Gemini final fallback", + "ollama": "Ollama node" + } + }, "incidentEvidence": { "title": "Incident Evidence", "subtitle": "Telegram, Run, Approval, and Work Item share the same remediation evidence", diff --git a/apps/web/messages/zh-TW.json b/apps/web/messages/zh-TW.json index 83e7d3d3..57e92c65 100644 --- a/apps/web/messages/zh-TW.json +++ b/apps/web/messages/zh-TW.json @@ -2211,6 +2211,45 @@ "openRun": "開啟 Run" } }, + "aiRouteStatus": { + "title": "AI Provider 路由", + "subtitle": "目前策略與健康檢查,顯示 GCP-A、GCP-B、111、Gemini 的接手順序", + "selected": "Primary:{provider}", + "selectedEmpty": "Primary:--", + "empty": "尚未取得 AI provider route 狀態。", + "error": "AI provider route 載入失敗:{error}", + "badges": { + "active": "使用中", + "standby": "備援" + }, + "fields": { + "workload": "Workload", + "primary": "目前 Primary", + "reason": "路由原因", + "checkedAt": "檢查時間 {time}", + "model": "Model:{model}", + "modelEmpty": "Model:--", + "routeError": "路由檢查失敗:{error}", + "health": "健康:{status}", + "latency": "延遲:{latency}", + "noUrl": "無 HTTP URL" + }, + "health": { + "healthy": "健康", + "slow": "偏慢", + "degraded": "降級", + "offline": "離線", + "not_checked": "待命未檢查", + "unknown": "未知" + }, + "roles": { + "primary": "第一順位", + "secondary": "第二順位", + "local_fallback": "111 本機備援", + "final_fallback": "Gemini 最終備援", + "ollama": "Ollama 節點" + } + }, "incidentEvidence": { "title": "Incident Evidence", "subtitle": "Telegram、Run、Approval 與 Work Item 共用同一組補救證據", diff --git a/apps/web/src/app/[locale]/awooop/runs/page.tsx b/apps/web/src/app/[locale]/awooop/runs/page.tsx index de60edc8..14ae6f6a 100644 --- a/apps/web/src/app/[locale]/awooop/runs/page.tsx +++ b/apps/web/src/app/[locale]/awooop/runs/page.tsx @@ -305,6 +305,42 @@ interface CallbackRepliesResponse { per_page: number; } +interface AiRoutePolicyItem { + priority: number; + provider_name: string; + url?: string | null; + workload_type?: string | null; + reason?: string | null; + role?: string | null; + runtime?: string | null; + model?: string | null; +} + +interface AiRouteHealthItem { + status?: string | null; + host?: string | null; + latency_ms?: number | null; + reason?: string | null; + checked_at?: number | null; + from_cache?: boolean; + checked?: boolean; +} + +interface AiRouteStatusResponse { + schema_version: string; + workload_type: string; + policy_order: AiRoutePolicyItem[]; + selected_provider?: string | null; + selected_url?: string | null; + selected_model?: string | null; + fallback_chain: AiRoutePolicyItem[]; + route_reason: string; + route_source: string; + route_error?: string | null; + health: Record; + checked_at: string; +} + // ============================================================================= // 常數 // ============================================================================= @@ -1384,6 +1420,161 @@ function CallbackReplyEvidencePanel({ ); } +function aiRouteHealthLabelKey(status?: string | null) { + if ( + status === "healthy" || + status === "slow" || + status === "degraded" || + status === "offline" || + status === "not_checked" + ) { + return `health.${status}`; + } + return "health.unknown"; +} + +function aiRouteRoleLabelKey(role?: string | null) { + if ( + role === "primary" || + role === "secondary" || + role === "local_fallback" || + role === "final_fallback" + ) { + return `roles.${role}`; + } + return "roles.ollama"; +} + +function AiRouteStatusPanel({ + status, + error, +}: { + status: AiRouteStatusResponse | null; + error: string | null; +}) { + const t = useTranslations("awooop.aiRouteStatus"); + const policy = status?.policy_order ?? []; + const selectedProvider = status?.selected_provider ?? null; + const selectedModel = status?.selected_model ?? null; + const checkedAt = status?.checked_at + ? new Date(status.checked_at).toLocaleTimeString("zh-TW", { + hour: "2-digit", + minute: "2-digit", + }) + : "--"; + + return ( +
+
+
+
+ + {selectedProvider + ? t("selected", { provider: selectedProvider }) + : t("selectedEmpty")} + +
+ + {error ? ( +
+ {t("error", { error })} +
+ ) : !status ? ( +
+ {t("empty")} +
+ ) : ( + <> +
+
+

{t("fields.workload")}

+

+ {status.workload_type || "--"} +

+

+ {t("fields.checkedAt", { time: checkedAt })} +

+
+
+

{t("fields.primary")}

+

+ {selectedProvider ?? "--"} +

+

+ {selectedModel + ? t("fields.model", { model: selectedModel }) + : t("fields.modelEmpty")} +

+
+
+

{t("fields.reason")}

+

+ {status.route_error + ? t("fields.routeError", { error: status.route_error }) + : status.route_reason || "--"} +

+

+ {status.route_source} +

+
+
+ +
+ {policy.map((item) => { + const health = status.health?.[item.provider_name]; + const healthKey = aiRouteHealthLabelKey(health?.status); + const roleKey = aiRouteRoleLabelKey(item.role); + const isSelected = selectedProvider === item.provider_name; + const latency = typeof health?.latency_ms === "number" + ? `${health.latency_ms.toFixed(1)}ms` + : "--"; + + return ( +
+
+
+

+ {item.provider_name} +

+

+ {t(roleKey as never)} +

+
+ + {isSelected ? t("badges.active") : t("badges.standby")} + +
+
+

{t("fields.health", { status: t(healthKey as never) })}

+

{t("fields.latency", { latency })}

+

+ {item.url || t("fields.noUrl")} +

+
+
+ ); + })} +
+ + )} +
+ ); +} + // ============================================================================= // Main Component // ============================================================================= @@ -1400,6 +1591,8 @@ export default function RunsPage() { const [callbackEvents, setCallbackEvents] = useState([]); const [callbackEventsTotal, setCallbackEventsTotal] = useState(0); const [callbackEventsError, setCallbackEventsError] = useState(null); + const [aiRouteStatus, setAiRouteStatus] = useState(null); + const [aiRouteStatusError, setAiRouteStatusError] = useState(null); const [tenants, setTenants] = useState([]); const [total, setTotal] = useState(0); const [loading, setLoading] = useState(true); @@ -1532,6 +1725,25 @@ export default function RunsPage() { setCallbackEventsError(`HTTP ${callbackRes.status}`); } + try { + const routeStatusRes = await fetch( + `${API_BASE}/api/v1/platform/ai-route-status?workload_type=deep_rca` + ); + if (routeStatusRes.ok) { + const routeStatusData: AiRouteStatusResponse = await routeStatusRes.json(); + setAiRouteStatus(routeStatusData); + setAiRouteStatusError(null); + } else { + setAiRouteStatus(null); + setAiRouteStatusError(`HTTP ${routeStatusRes.status}`); + } + } catch (routeStatusError) { + setAiRouteStatus(null); + setAiRouteStatusError( + routeStatusError instanceof Error ? routeStatusError.message : "route status failed" + ); + } + setLastRefresh(new Date()); } catch (err) { setError(err instanceof Error ? err.message : "載入失敗"); @@ -1721,6 +1933,11 @@ export default function RunsPage() { })} + +