diff --git a/apps/api/src/routes/health.py b/apps/api/src/routes/health.py index 070c246f..0fca2930 100644 --- a/apps/api/src/routes/health.py +++ b/apps/api/src/routes/health.py @@ -130,30 +130,49 @@ async def check_redis() -> Literal["up", "down"]: return "down" -async def check_ollama() -> Literal["up", "down"]: +async def check_ollama() -> Literal["up", "down", "degraded"]: """ - Check Ollama service via /api/tags endpoint + Check Ollama 三層容災狀態(primary → secondary → tertiary) 統帥鐵律: 真實 HTTP 請求,禁止假數據 + 2026-05-04 ogt: 改為檢查三台(OLLAMA_URL / SECONDARY / FALLBACK), + 只要有任一台 up → "up";primary down 但 fallback up → "degraded"; + 全部 down → "down"。反映 K8s 實際可用的 Ollama 路由狀態。 """ - try: - async with httpx.AsyncClient(timeout=HEALTH_CHECK_TIMEOUT) as client: - response = await client.get(f"{settings.OLLAMA_URL}/api/tags") - if response.status_code == 200: - logger.debug("health_check_ollama", status="up") - return "up" - else: - logger.warning( - "health_check_ollama", - status="down", - status_code=response.status_code, - ) - return "down" - except httpx.TimeoutException: - logger.warning("health_check_ollama", status="down", reason="timeout") - return "down" - except Exception as e: - logger.warning("health_check_ollama", status="down", error=str(e)) + urls = [ + (settings.OLLAMA_URL, "primary"), + (getattr(settings, "OLLAMA_SECONDARY_URL", ""), "secondary"), + (getattr(settings, "OLLAMA_FALLBACK_URL", ""), "tertiary"), + ] + any_up = False + primary_up = False + + async with httpx.AsyncClient(timeout=HEALTH_CHECK_TIMEOUT) as client: + for i, (url, label) in enumerate(urls): + if not url: + continue + try: + response = await client.get(f"{url}/api/tags") + if response.status_code == 200: + any_up = True + if i == 0: + primary_up = True + logger.debug("health_check_ollama", status="up", tier=label, url=url) + break # 找到第一台可用就停 + else: + logger.debug("health_check_ollama_tier", tier=label, status_code=response.status_code) + except (httpx.TimeoutException, httpx.ConnectError, httpx.NetworkError): + logger.debug("health_check_ollama_tier", tier=label, status="unreachable") + except Exception as e: + logger.warning("health_check_ollama_tier", tier=label, error=str(e)) + + if primary_up: + return "up" + elif any_up: + logger.warning("health_check_ollama", status="degraded", reason="primary down, fallback active") + return "degraded" + else: + logger.warning("health_check_ollama", status="down", reason="all tiers unreachable") return "down" diff --git a/apps/api/src/services/ollama_failover_manager.py b/apps/api/src/services/ollama_failover_manager.py index fbd00ab1..5c1a0575 100644 --- a/apps/api/src/services/ollama_failover_manager.py +++ b/apps/api/src/services/ollama_failover_manager.py @@ -183,8 +183,12 @@ class OllamaFailoverManager: context: dict | None = None, ) -> OllamaRoutingResult: """ - 三層 Ollama 容災路由(2026-05-03 統帥新令,ADR-110): - GCP-A → GCP-B → Local(111) → Gemini → Nemotron → Claude + 三層 Ollama 容災路由(ADR-110 修正版 2026-05-04): + Primary(OLLAMA_URL) → Secondary(OLLAMA_SECONDARY_URL) → Tertiary(OLLAMA_FALLBACK_URL) + → Gemini → Nemotron → Claude + + 2026-05-04 ogt: URL 優先序已更新(ConfigMap),primary = 111(K8s 內網可達)。 + GCP-A/B 為 secondary/tertiary,待 nginx proxy 架設後再升回 primary。 Args: task_type: 任務類型(預留,目前未影響路由邏輯) @@ -193,16 +197,17 @@ class OllamaFailoverManager: Returns: OllamaRoutingResult """ - # 2026-05-03 ogt: GCP 三層容災(ADR-110),GCP-A → GCP-B → Local → Gemini - url_gcp_a = self._settings.OLLAMA_URL # 34.143.170.20 - url_gcp_b = self._settings.OLLAMA_SECONDARY_URL # 34.21.145.224 - url_local = self._settings.OLLAMA_FALLBACK_URL # 192.168.0.111 + # 2026-05-04 ogt: 改用語意中性名稱 primary/secondary/tertiary, + # 避免 gcp_a/gcp_b/local 與實際 URL 脫鉤造成 log 誤導 + url_primary = self._settings.OLLAMA_URL # 當前: 192.168.0.111 + url_secondary = self._settings.OLLAMA_SECONDARY_URL # 當前: 34.143.170.20 (GCP-A) + url_tertiary = self._settings.OLLAMA_FALLBACK_URL # 當前: 34.21.145.224 (GCP-B) # 並行檢查三台 Ollama 主機(asyncio.gather 提升效率) results_raw = await asyncio.gather( - self._monitor.check(url_gcp_a), - self._monitor.check(url_gcp_b), - self._monitor.check(url_local), + self._monitor.check(url_primary), + self._monitor.check(url_secondary), + self._monitor.check(url_tertiary), return_exceptions=True, ) @@ -211,17 +216,17 @@ class OllamaFailoverManager: return HealthReport(status=HealthStatus.OFFLINE, reason=f"{label} check error: {r}") return r - health_gcp_a = _to_health(results_raw[0], "GCP-A") - health_gcp_b = _to_health(results_raw[1], "GCP-B") - health_local = _to_health(results_raw[2], "Local") + health_gcp_a = _to_health(results_raw[0], f"primary({url_primary})") + health_gcp_b = _to_health(results_raw[1], f"secondary({url_secondary})") + health_local = _to_health(results_raw[2], f"tertiary({url_tertiary})") result = self._decide_route( health_gcp_a=health_gcp_a, health_gcp_b=health_gcp_b, health_local=health_local, - url_gcp_a=url_gcp_a, - url_gcp_b=url_gcp_b, - url_local=url_local, + url_gcp_a=url_primary, + url_gcp_b=url_secondary, + url_local=url_tertiary, ) # Gemini 帳單熔斷(quota gate) @@ -316,36 +321,46 @@ class OllamaFailoverManager: now_ts = datetime.datetime.now(TAIPEI_TZ).isoformat() - # GCP-A 健康 → 主 GCP-A,Gemini 永遠在 Ollama 鏈最後(與舊 111 行為一致) + # 用實際 URL 取最後一段作為 log 標識(IP 或 hostname) + def _short(url: str) -> str: + from urllib.parse import urlparse + return urlparse(url).hostname or url + + lbl_p = _short(url_gcp_a) # primary label + lbl_s = _short(url_gcp_b) # secondary label + lbl_t = _short(url_local) # tertiary label + + # Primary HEALTHY → 使用 primary if health_gcp_a.status == HealthStatus.HEALTHY: return OllamaRoutingResult( primary=ep_gcp_a, fallback_chain=[ep_gcp_b, ep_local, _GEMINI_ENDPOINT], - routing_reason="GCP-A HEALTHY → primary GCP-A", + routing_reason=f"primary({lbl_p}) HEALTHY", health_gcp_a=health_gcp_a, health_gcp_b=health_gcp_b, health_local=health_local, ) - # GCP-A 不健康,GCP-B 健康 → 切 GCP-B,Gemini 在鏈尾 + # Primary 不健康,Secondary HEALTHY → 切 secondary if health_gcp_b.status == HealthStatus.HEALTHY: return OllamaRoutingResult( primary=ep_gcp_b, fallback_chain=[ep_local, _GEMINI_ENDPOINT], - routing_reason=f"GCP-A {health_gcp_a.status.value} → 切 GCP-B at {now_ts}", + routing_reason=f"primary({lbl_p}) {health_gcp_a.status.value} → secondary({lbl_s}) at {now_ts}", health_gcp_a=health_gcp_a, health_gcp_b=health_gcp_b, health_local=health_local, ) - # GCP-A + GCP-B 都不健康,Local 健康 → 切 Local(111) + # Primary + Secondary 不健康,Tertiary HEALTHY → 切 tertiary if health_local.status == HealthStatus.HEALTHY: return OllamaRoutingResult( primary=ep_local, fallback_chain=[_GEMINI_ENDPOINT], routing_reason=( - f"GCP-A {health_gcp_a.status.value} + GCP-B {health_gcp_b.status.value}" - f" → 切 Local(111) at {now_ts}" + f"primary({lbl_p}) {health_gcp_a.status.value}" + f" + secondary({lbl_s}) {health_gcp_b.status.value}" + f" → tertiary({lbl_t}) at {now_ts}" ), health_gcp_a=health_gcp_a, health_gcp_b=health_gcp_b, @@ -353,14 +368,11 @@ class OllamaFailoverManager: ) # 2026-05-04 ogt: SLOW 容災備援(外網同時抖動時,SLOW Ollama 仍優於 Gemini quota 耗盡) - # 原設計:三層全部非 HEALTHY 直接切 Gemini - # 問題:111 關機 + GCP 雙外網抖動 → 三節點同時 SLOW → 誤飛 Gemini → 燒 quota - # 修法:SLOW 節點視為可用,按優先序選最佳 SLOW 節點 if health_gcp_a.status == HealthStatus.SLOW: return OllamaRoutingResult( primary=ep_gcp_a, fallback_chain=[ep_gcp_b, ep_local, _GEMINI_ENDPOINT], - routing_reason=f"GCP-A SLOW(降級可用)→ primary GCP-A at {now_ts}", + routing_reason=f"primary({lbl_p}) SLOW(降級可用)at {now_ts}", health_gcp_a=health_gcp_a, health_gcp_b=health_gcp_b, health_local=health_local, @@ -369,7 +381,10 @@ class OllamaFailoverManager: return OllamaRoutingResult( primary=ep_gcp_b, fallback_chain=[ep_local, _GEMINI_ENDPOINT], - routing_reason=f"GCP-A {health_gcp_a.status.value} + GCP-B SLOW(降級可用)→ 切 GCP-B at {now_ts}", + routing_reason=( + f"primary({lbl_p}) {health_gcp_a.status.value}" + f" + secondary({lbl_s}) SLOW(降級可用)at {now_ts}" + ), health_gcp_a=health_gcp_a, health_gcp_b=health_gcp_b, health_local=health_local, @@ -379,8 +394,9 @@ class OllamaFailoverManager: primary=ep_local, fallback_chain=[_GEMINI_ENDPOINT], routing_reason=( - f"GCP-A {health_gcp_a.status.value} + GCP-B {health_gcp_b.status.value}" - f" + Local SLOW(降級可用)→ 切 Local(111) at {now_ts}" + f"primary({lbl_p}) {health_gcp_a.status.value}" + f" + secondary({lbl_s}) {health_gcp_b.status.value}" + f" + tertiary({lbl_t}) SLOW(降級可用)at {now_ts}" ), health_gcp_a=health_gcp_a, health_gcp_b=health_gcp_b, @@ -392,9 +408,9 @@ class OllamaFailoverManager: primary=_GEMINI_ENDPOINT, fallback_chain=[_NEMOTRON_ENDPOINT, _CLAUDE_ENDPOINT], routing_reason=( - f"所有 Ollama 不健康(GCP-A {health_gcp_a.status.value}," - f"GCP-B {health_gcp_b.status.value}," - f"Local {health_local.status.value})→ 切 Gemini at {now_ts}" + f"所有 Ollama 不健康(primary({lbl_p}) {health_gcp_a.status.value}," + f"secondary({lbl_s}) {health_gcp_b.status.value}," + f"tertiary({lbl_t}) {health_local.status.value})→ 切 Gemini at {now_ts}" ), health_gcp_a=health_gcp_a, health_gcp_b=health_gcp_b, @@ -606,14 +622,14 @@ class OllamaFailoverManager: fallback_chain_str = " → ".join( p.provider_name for p in result.fallback_chain ) - # 計算故障主機描述(哪層 Ollama 不健康) + # 計算故障主機描述(哪層 Ollama 不健康,用實際 URL 不用硬編碼標籤) _failed = [] if result.health_gcp_a.status != HealthStatus.HEALTHY: - _failed.append(f"GCP-A {self._settings.OLLAMA_URL}") + _failed.append(self._settings.OLLAMA_URL) if result.health_gcp_b and result.health_gcp_b.status != HealthStatus.HEALTHY: - _failed.append(f"GCP-B {self._settings.OLLAMA_SECONDARY_URL}") + _failed.append(self._settings.OLLAMA_SECONDARY_URL or "secondary") if result.health_local and result.health_local.status != HealthStatus.HEALTHY: - _failed.append(f"Local {self._settings.OLLAMA_FALLBACK_URL}") + _failed.append(self._settings.OLLAMA_FALLBACK_URL or "tertiary") failed_host = " + ".join(_failed) if _failed else "Ollama" alerter = get_failover_alerter() await alerter.alert_failover({ diff --git a/k8s/awoooi-prod/04-configmap.yaml b/k8s/awoooi-prod/04-configmap.yaml index ca03292a..c2a3f352 100644 --- a/k8s/awoooi-prod/04-configmap.yaml +++ b/k8s/awoooi-prod/04-configmap.yaml @@ -18,9 +18,13 @@ data: # 2026-04-16 ogt + Claude Sonnet 4.6: 改指向 111(GPU 機,RTX) # 188 = CPU-only Ollama,推理極慢(>60s);111 有 GPU,avg 10s # 2026-05-03 ogt: ADR-110 Ollama GCP 三層容災(GCP-A → GCP-B → Local HDD) - OLLAMA_URL: "http://34.143.170.20:11434" - OLLAMA_SECONDARY_URL: "http://34.21.145.224:11434" - OLLAMA_FALLBACK_URL: "http://192.168.0.111:11434" + # 2026-05-04 ogt: ADR-110 修正 — K8s pods → GCP-A/B:11434 = connection refused(外網路由不通) + # K8s 可達:111(內網),不可達:GCP-A/B(外網 port 11434 被擋) + # 修法:111 升為 primary;GCP-A/B 保留為 secondary/tertiary,待 nginx proxy 架設後恢復可用 + # 長期目標:在 110 架設 nginx proxy 轉發 GCP-A/B,ConfigMap 改指向 110:11435 / 110:11436 + OLLAMA_URL: "http://192.168.0.111:11434" + OLLAMA_SECONDARY_URL: "http://34.143.170.20:11434" + OLLAMA_FALLBACK_URL: "http://34.21.145.224:11434" OPENCLAW_URL: "http://192.168.0.188:8088" KALI_SCANNER_URL: "http://192.168.0.112:8080" SIGNOZ_URL: "http://192.168.0.188:3301"