From 9bac5718da98b6cbcca663547fe82af0f00892c5 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 24 May 2026 11:44:37 +0800 Subject: [PATCH] feat(health): expose ollama provider chain --- apps/api/src/api/v1/health.py | 134 +++++++++++++++--- .../test_health_ollama_provider_chain.py | 68 +++++++++ apps/web/messages/en.json | 7 + apps/web/messages/zh-TW.json | 7 + .../src/components/shared/ai-model-status.tsx | 75 +++++++--- apps/web/src/lib/api-client.ts | 7 +- 6 files changed, 259 insertions(+), 39 deletions(-) create mode 100644 apps/api/tests/test_health_ollama_provider_chain.py diff --git a/apps/api/src/api/v1/health.py b/apps/api/src/api/v1/health.py index dc6228e6..ce284106 100644 --- a/apps/api/src/api/v1/health.py +++ b/apps/api/src/api/v1/health.py @@ -11,7 +11,7 @@ Endpoints: Components Checked: - PostgreSQL (192.168.0.188:5432) - Redis (192.168.0.188:6380) -- Ollama (settings.OLLAMA_URL / ADR-110 provider pool) +- Ollama ADR-110 provider pool (GCP-A -> GCP-B -> 111) - OpenClaw (192.168.0.188:8089) - SigNoz (192.168.0.188:3301) """ @@ -26,9 +26,11 @@ from pydantic import BaseModel from src.core.config import settings from src.core.logging import get_logger from src.services.health_check_service import get_health_check_service +from src.services.ollama_endpoint_resolver import resolve_ollama_order router = APIRouter() logger = get_logger("awoooi.health") +CORE_COMPONENTS = ("api", "postgresql", "redis", "ollama", "openclaw", "signoz") # ============================================================================= @@ -50,6 +52,7 @@ class HealthResponse(BaseModel): mock_mode: bool timestamp: datetime components: dict[str, ComponentHealth] + ollama_route_order: list[str] = [] # ============================================================================= @@ -106,8 +109,82 @@ async def check_redis() -> ComponentHealth: async def check_ollama() -> ComponentHealth: - """Async Ollama health check via /api/tags""" - return await _http_health_check("ollama", settings.OLLAMA_URL, "/api/tags") + """Async aggregate Ollama health check via ADR-110 provider chain.""" + aggregate, _details = await check_ollama_provider_chain() + return aggregate + + +async def check_ollama_provider_chain() -> tuple[ComponentHealth, dict[str, ComponentHealth]]: + """ + Check the full Ollama provider chain. + + The aggregate ``ollama`` component represents route availability: + - up: GCP-A is reachable + - degraded: GCP-A is unavailable but GCP-B or 111 is reachable + - down: no configured Ollama endpoint is reachable + """ + selections = tuple( + selection + for selection in resolve_ollama_order("healthcheck") + if selection.url and selection.provider_name != "ollama_unconfigured" + ) + if not selections: + aggregate = ComponentHealth( + status="down", + error="no Ollama endpoints configured", + ) + return aggregate, {} + + checked = await asyncio.gather( + *( + _http_health_check( + selection.provider_name, + selection.url, + "/api/tags", + ) + for selection in selections + ) + ) + details = { + selection.provider_name: result + for selection, result in zip(selections, checked, strict=False) + } + + primary = selections[0] + primary_status = details[primary.provider_name].status + if primary.provider_name == "ollama_gcp_a" and primary_status == "up": + return details[primary.provider_name], details + + first_available = next( + ( + selection + for selection in selections + if details[selection.provider_name].status == "up" + ), + None, + ) + if first_available: + fallback = details[first_available.provider_name] + return ( + ComponentHealth( + status="degraded", + latency_ms=fallback.latency_ms, + error=f"primary unavailable; fallback active: {first_available.provider_name}", + ), + details, + ) + + errors = ", ".join( + f"{provider}={health.error or health.status}" + for provider, health in details.items() + ) + return ( + ComponentHealth( + status="down", + error=f"all Ollama endpoints unavailable: {errors}", + ), + details, + ) async def check_openclaw() -> ComponentHealth: @@ -120,6 +197,30 @@ async def check_signoz() -> ComponentHealth: return await _http_health_check("signoz", settings.SIGNOZ_URL, "/api/v1/health") +def _determine_overall_status( + components: dict[str, ComponentHealth], +) -> Literal["healthy", "degraded", "unhealthy"]: + """Determine overall health from core aggregate components only.""" + statuses = [ + components[name].status + for name in CORE_COMPONENTS + if name in components + ] + down_count = statuses.count("down") + degraded_count = statuses.count("degraded") + + critical_down = ( + components.get("postgresql", ComponentHealth(status="down")).status == "down" + or components.get("redis", ComponentHealth(status="down")).status == "down" + ) + + if critical_down or down_count >= 3: + return "unhealthy" + if down_count >= 1 or degraded_count > 0: + return "degraded" + return "healthy" + + # ============================================================================= # Endpoints # ============================================================================= @@ -142,34 +243,28 @@ async def get_health() -> HealthResponse: results = await asyncio.gather( check_postgresql(), check_redis(), - check_ollama(), + check_ollama_provider_chain(), check_openclaw(), check_signoz(), ) + ollama_aggregate, ollama_details = results[2] components = { "api": ComponentHealth(status="up", latency_ms=0.0), "postgresql": results[0], "redis": results[1], - "ollama": results[2], + "ollama": ollama_aggregate, "openclaw": results[3], "signoz": results[4], } + components.update(ollama_details) - # Determine overall status - statuses = [c.status for c in components.values()] - down_count = statuses.count("down") - degraded_count = statuses.count("degraded") - - # Critical services: postgresql, redis - critical_down = components["postgresql"].status == "down" or components["redis"].status == "down" - - if critical_down or down_count >= 3: - overall_status: Literal["healthy", "degraded", "unhealthy"] = "unhealthy" - elif down_count >= 1 or degraded_count > 0: - overall_status = "degraded" - else: - overall_status = "healthy" + overall_status = _determine_overall_status(components) + ollama_route_order = [ + selection.provider_name + for selection in resolve_ollama_order("healthcheck") + if selection.url and selection.provider_name != "ollama_unconfigured" + ] logger.info( "health_check_complete", @@ -185,6 +280,7 @@ async def get_health() -> HealthResponse: mock_mode=settings.MOCK_MODE, timestamp=datetime.now(UTC), components=components, + ollama_route_order=ollama_route_order, ) diff --git a/apps/api/tests/test_health_ollama_provider_chain.py b/apps/api/tests/test_health_ollama_provider_chain.py new file mode 100644 index 00000000..0aaabece --- /dev/null +++ b/apps/api/tests/test_health_ollama_provider_chain.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +import pytest + +from src.api.v1 import health + + +def _set_ollama_settings(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(health.settings, "OLLAMA_URL", "http://gcp-a:11434") + monkeypatch.setattr(health.settings, "OLLAMA_SECONDARY_URL", "http://gcp-b:11434") + monkeypatch.setattr(health.settings, "OLLAMA_FALLBACK_URL", "http://local-111:11434") + + +@pytest.mark.asyncio +async def test_ollama_provider_chain_reports_fallback_when_primary_down(monkeypatch: pytest.MonkeyPatch) -> None: + _set_ollama_settings(monkeypatch) + + async def fake_http_check(name: str, _url: str, _path: str) -> health.ComponentHealth: + if name == "ollama_gcp_a": + return health.ComponentHealth(status="down", error="timeout") + if name == "ollama_gcp_b": + return health.ComponentHealth(status="up", latency_ms=42.0) + return health.ComponentHealth(status="up", latency_ms=9.0) + + monkeypatch.setattr(health, "_http_health_check", fake_http_check) + + aggregate, details = await health.check_ollama_provider_chain() + + assert aggregate.status == "degraded" + assert aggregate.latency_ms == 42.0 + assert aggregate.error == "primary unavailable; fallback active: ollama_gcp_b" + assert details["ollama_gcp_a"].status == "down" + assert details["ollama_gcp_b"].status == "up" + assert details["ollama_local"].status == "up" + + +@pytest.mark.asyncio +async def test_ollama_provider_chain_reports_all_endpoints_when_down(monkeypatch: pytest.MonkeyPatch) -> None: + _set_ollama_settings(monkeypatch) + + async def fake_http_check(name: str, _url: str, _path: str) -> health.ComponentHealth: + return health.ComponentHealth(status="down", error=f"{name} timeout") + + monkeypatch.setattr(health, "_http_health_check", fake_http_check) + + aggregate, details = await health.check_ollama_provider_chain() + + assert aggregate.status == "down" + assert "ollama_gcp_a=ollama_gcp_a timeout" in (aggregate.error or "") + assert "ollama_gcp_b=ollama_gcp_b timeout" in (aggregate.error or "") + assert "ollama_local=ollama_local timeout" in (aggregate.error or "") + assert set(details) == {"ollama_gcp_a", "ollama_gcp_b", "ollama_local"} + + +def test_overall_status_uses_aggregate_ollama_not_endpoint_details() -> None: + components = { + "api": health.ComponentHealth(status="up"), + "postgresql": health.ComponentHealth(status="up"), + "redis": health.ComponentHealth(status="up"), + "ollama": health.ComponentHealth(status="degraded"), + "openclaw": health.ComponentHealth(status="up"), + "signoz": health.ComponentHealth(status="up"), + "ollama_gcp_a": health.ComponentHealth(status="down"), + "ollama_gcp_b": health.ComponentHealth(status="up"), + "ollama_local": health.ComponentHealth(status="up"), + } + + assert health._determine_overall_status(components) == "degraded" diff --git a/apps/web/messages/en.json b/apps/web/messages/en.json index 0b80b3af..62d9b6c6 100644 --- a/apps/web/messages/en.json +++ b/apps/web/messages/en.json @@ -210,6 +210,13 @@ "viewAllAuth": "View All Authorizations", "viewAllReport": "View Full Report", "aiModelStatus": "AI Model Status", + "aiModelRoles": { + "primary": "Primary", + "backup": "Backup", + "local": "Local", + "agent": "Agent", + "provider": "Provider" + }, "loading": "Loading...", "trendUp": "↑{pct}%", "searchPlaceholderShort": "Search...", diff --git a/apps/web/messages/zh-TW.json b/apps/web/messages/zh-TW.json index 87a8c469..b24465d1 100644 --- a/apps/web/messages/zh-TW.json +++ b/apps/web/messages/zh-TW.json @@ -211,6 +211,13 @@ "viewAllAuth": "查看全部授權", "viewAllReport": "查看完整報表", "aiModelStatus": "AI 模型狀態", + "aiModelRoles": { + "primary": "主用", + "backup": "備援", + "local": "本機", + "agent": "Agent", + "provider": "Provider" + }, "loading": "載入中...", "trendUp": "↑{pct}%", "searchPlaceholderShort": "搜尋...", diff --git a/apps/web/src/components/shared/ai-model-status.tsx b/apps/web/src/components/shared/ai-model-status.tsx index 20df7047..2c078e9a 100644 --- a/apps/web/src/components/shared/ai-model-status.tsx +++ b/apps/web/src/components/shared/ai-model-status.tsx @@ -1,7 +1,7 @@ 'use client' /** - * AIModelStatus — AI 模型狀態 2×2 網格 + * AIModelStatus — AI provider route health grid * Sprint 5R S9: 設計稿 L531-545 * @created 2026-04-09 Claude Opus 4.6 Asia/Taipei */ @@ -13,33 +13,66 @@ const API_BASE = process.env.NEXT_PUBLIC_API_URL ?? '' interface ModelInfo { name: string - tag: string - healthy: boolean + role: 'primary' | 'backup' | 'local' | 'agent' | 'provider' + status: 'up' | 'down' | 'degraded' | 'unknown' + latencyMs?: number | null +} + +interface HealthComponent { + status?: 'up' | 'down' | 'degraded' + latency_ms?: number | null +} + +interface HealthResponse { + components?: Record + ollama_route_order?: string[] +} + +const PROVIDER_LABELS: Record = { + ollama_gcp_a: 'GCP-A', + ollama_gcp_b: 'GCP-B', + ollama_local: '111', + openclaw: 'OpenClaw', +} + +const PROVIDER_ROLES: Record = { + ollama_gcp_a: 'primary', + ollama_gcp_b: 'backup', + ollama_local: 'local', + openclaw: 'agent', +} + +function statusColor(status: ModelInfo['status']) { + if (status === 'up') return '#22C55E' + if (status === 'degraded') return '#F59E0B' + if (status === 'down') return '#cc2200' + return '#87867f' } export function AIModelStatus() { const t = useTranslations('dashboard') const [models, setModels] = useState([ - { name: 'OpenClaw Nemo', tag: 'local', healthy: false }, - { name: 'Ollama gemma3', tag: 'local', healthy: false }, - { name: 'Gemini Pro', tag: 'cloud', healthy: false }, - { name: 'NVIDIA NIM', tag: 'cloud', healthy: false }, + { name: 'GCP-A', role: 'primary', status: 'unknown' }, + { name: 'GCP-B', role: 'backup', status: 'unknown' }, + { name: '111', role: 'local', status: 'unknown' }, + { name: 'OpenClaw', role: 'agent', status: 'unknown' }, ]) useEffect(() => { fetch(`${API_BASE}/api/v1/health`) .then(r => r.ok ? r.json() : null) - .then(d => { + .then((d: HealthResponse | null) => { if (!d?.components) return - setModels(prev => prev.map(m => { - if (m.name.includes('OpenClaw') && d.components.openclaw) return { ...m, healthy: d.components.openclaw.status === 'up' } - if (m.name.includes('Ollama') && d.components.ollama) return { ...m, healthy: d.components.ollama.status === 'up' } - // 2026-04-09 Claude Sonnet 4.6: 移除假數據 — /api/v1/health 無 gemini/nvidia component - // cloud 模型狀態未知,保持 false,不顯示假綠燈 - if (m.name.includes('Gemini') && d.components.gemini) return { ...m, healthy: d.components.gemini.status === 'up' } - if (m.name.includes('NVIDIA') && d.components.nvidia) return { ...m, healthy: d.components.nvidia.status === 'up' } - return m - })) + const routeOrder = d.ollama_route_order?.length + ? d.ollama_route_order + : ['ollama_gcp_a', 'ollama_gcp_b', 'ollama_local'] + const providerKeys = [...routeOrder, 'openclaw'] + setModels(providerKeys.map(key => ({ + name: PROVIDER_LABELS[key] ?? key, + role: PROVIDER_ROLES[key] ?? 'provider', + status: d.components?.[key]?.status ?? 'unknown', + latencyMs: d.components?.[key]?.latency_ms, + }))) }) .catch(() => {}) }, []) @@ -62,9 +95,13 @@ export function AIModelStatus() { border: '0.5px solid #e0ddd4', borderRadius: 6, padding: '6px 8px', display: 'flex', alignItems: 'center', gap: 6, }}> - + {m.name} - {m.tag} + + {typeof m.latencyMs === 'number' + ? `${Math.round(m.latencyMs)}ms` + : t(`aiModelRoles.${m.role}` as never)} + ))} diff --git a/apps/web/src/lib/api-client.ts b/apps/web/src/lib/api-client.ts index 8d670426..628f8c7f 100644 --- a/apps/web/src/lib/api-client.ts +++ b/apps/web/src/lib/api-client.ts @@ -54,7 +54,12 @@ export const apiClient = { status: 'healthy' | 'degraded' | 'unhealthy' version: string timestamp: string - components: Record + components: Record + ollama_route_order?: string[] }>(res) },