fix(ollama): 解「111 關機 → GCP 也斷」三主機 retry 鏈 + lazy host

統帥 2026-05-04 反饋：「111 關機後，兩台 GCP Ollama 也跟著斷線不可用」根因（雙 bug 連動）： 1. OllamaService.__init__ 凍結 self.host： `self.host = host or resolve_ollama_host()` 是 instance 級凍結容器啟動時若 GCP cold start 觸發 fallback 111 → self.host 永遠卡 111 即使 cache 過期，instance 不會重新 resolve 2. generate() 失敗無 retry 鏈：原邏輯只 mark_unhealthy(self.host) + return failure 沒有「換下一台主機再試」邏輯 → 111 死則全死修補（雙管齊下）： A. self.host 改 @property：每次存取走 resolve_ollama_host() - caller 顯式指定（_explicit_host）才凍結 - 內部 _RESOLVE_TTL 120s cache 控制 HTTP probe 成本 B. generate() 三主機 retry 鏈： for attempt in range(3): current_host = self.host # property lazy resolve if attempted → break (避免無限迴圈) post → success ? return : mark_unhealthy + retry mark_unhealthy 自動清空 resolve cache，下次 self.host 取新主機行為對比：戰前：GCP cold start 卡 111 → 111 關機 → 全部失敗戰後：GCP cold start 卡 111 → 111 關機 → mark_unhealthy(111) → self.host 重 resolve → GCP Primary（已暖機）→ 成功 generate_embedding 同類修補延後（caller 多走 explicit host 路徑風險較低） regression: 36 unit tests 全綠（test_ollama_resolve + test_ai_call_logger） Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 08:42:52 +08:00
parent c29ce83653
commit e862a9040c
1 changed files with 69 additions and 66 deletions
--- a/services/ollama_service.py
+++ b/services/ollama_service.py
@@ -167,11 +167,23 @@ class OllamaService:
    _CACHE_TTL = 60  # 快取 60 秒

    def __init__(self, host: str = None, model: str = DEFAULT_MODEL):
-        # V-New: 若未指定 host，使用 resolve_ollama_host() 自動選擇 GCP 或 111
-        self.host = host or resolve_ollama_host()
+        # HOTFIX 2026-05-04 統帥反饋「111 關機 → GCP 也斷」根因修補：
+        # 原邏輯 self.host = resolve_ollama_host() 是 __init__ 凍結，容器啟動時若
+        # GCP cold start 觸發 fallback 111，self.host 永遠卡 111。即使 cache 過期，
+        # OllamaService instance 不會重新 resolve。
+        # 修補：self._explicit_host 只在 caller 顯式指定時凍結；否則 self.host 改 property
+        # 每次 access 走 resolve_ollama_host()（內部 120s cache 控制成本）。
+        self._explicit_host = host  # None 表示走 lazy resolve
        self.model = model
        self.available_models = []

+    @property
+    def host(self) -> str:
+        """每次存取 lazy resolve（120s cache）；caller 顯式指定 host 才凍結。"""
+        if self._explicit_host:
+            return self._explicit_host
+        return resolve_ollama_host()
+
    def check_connection(self) -> bool:
        """檢查 Ollama 服務是否可用（含快取）"""
        import time
@@ -208,81 +220,72 @@ class OllamaService:
                 system_prompt: str = None, temperature: float = 0.7,
                 timeout: int = None) -> OllamaResponse:
        """
-        生成文字
+        生成文字 — 含三主機自動 retry（HOTFIX 2026-05-04）

-        Args:
-            prompt: 使用者提示
-            model: 模型名稱（預設使用 self.model）
-            system_prompt: 系統提示
-            temperature: 創意度 (0-1)
-            timeout: 自訂超時時間（秒），預設使用 TIMEOUT
-
-        Returns:
-            OllamaResponse
+        失敗時自動嘗試下一台主機（最多 3 次：Primary → Secondary → 111），
+        每次失敗 mark_unhealthy 觸發 resolve cache 失效。
        """
        model = model or self.model
        request_timeout = timeout or TIMEOUT

-        try:
-            payload = {
-                "model": model,
-                "prompt": prompt,
-                "stream": False,
-                "options": {
-                    "temperature": temperature
-                }
-            }
+        payload = {
+            "model": model,
+            "prompt": prompt,
+            "stream": False,
+            "options": {"temperature": temperature},
+        }
+        if system_prompt:
+            payload["system"] = system_prompt

-            if system_prompt:
-                payload["system"] = system_prompt
+        # HOTFIX 三主機 retry 鏈
+        attempted_hosts: List[str] = []
+        last_error: Optional[str] = None

-            logger.info(f"[Ollama] 開始生成，模型: {model}，超時: {request_timeout}秒")
+        for attempt in range(3):
+            current_host = self.host  # property 每次 lazy resolve
+            if current_host in attempted_hosts:
+                # 已試過同主機（cache 還沒過期），跳出避免無限迴圈
+                break
+            attempted_hosts.append(current_host)

-            response = requests.post(
-                f"{self.host}/api/generate",
-                json=payload,
-                timeout=request_timeout
-            )
+            logger.info(f"[Ollama] 嘗試 #{attempt+1}/3 host={current_host} model={model} timeout={request_timeout}s")

-            if response.status_code == 200:
-                data = response.json()
-                return OllamaResponse(
-                    success=True,
-                    content=data.get('response', ''),
-                    model=model,
-                    total_duration=data.get('total_duration', 0) / 1e9,  # 轉換為秒
-                    host=self.host
-                )
-            else:
-                return OllamaResponse(
-                    success=False,
-                    content='',
-                    model=model,
-                    error=f"HTTP {response.status_code}: {response.text}",
-                    host=self.host
+            try:
+                response = requests.post(
+                    f"{current_host}/api/generate",
+                    json=payload,
+                    timeout=request_timeout,
                )
+                if response.status_code == 200:
+                    data = response.json()
+                    return OllamaResponse(
+                        success=True,
+                        content=data.get('response', ''),
+                        model=model,
+                        total_duration=data.get('total_duration', 0) / 1e9,
+                        host=current_host,
+                    )
+                # HTTP 非 200：標 unhealthy + 嘗試下一台
+                last_error = f"HTTP {response.status_code}: {response.text[:200]}"
+                logger.warning(f"[Ollama] {current_host} HTTP 失敗 → mark_unhealthy + retry: {last_error}")
+                mark_unhealthy(current_host)
+            except requests.Timeout:
+                last_error = f"timeout ({request_timeout}s)"
+                logger.warning(f"[Ollama] {current_host} timeout → mark_unhealthy + retry")
+                mark_unhealthy(current_host)
+            except Exception as e:
+                last_error = f"{type(e).__name__}: {str(e)[:200]}"
+                logger.error(f"[Ollama] {current_host} error → mark_unhealthy + retry: {last_error}")
+                mark_unhealthy(current_host)

-        except requests.Timeout:
-            # B4: timeout 視為主機不健康，標記 30s 內跳過下次 resolve
-            mark_unhealthy(self.host)
-            return OllamaResponse(
-                success=False,
-                content='',
-                model=model,
-                error="請求超時",
-                host=self.host
-            )
-        except Exception as e:
-            logger.error(f"Ollama 生成錯誤: {e}")
-            # B4: 連線錯誤類例外標 unhealthy；其他例外（JSON parse 等）仍標以策安全
-            mark_unhealthy(self.host)
-            return OllamaResponse(
-                success=False,
-                content='',
-                model=model,
-                error=str(e),
-                host=self.host
-            )
+        # 三台都失敗
+        return OllamaResponse(
+            success=False,
+            content='',
+            model=model,
+            error=f"all {len(attempted_hosts)} hosts failed; last={last_error}; tried={attempted_hosts}",
+            host=attempted_hosts[-1] if attempted_hosts else 'unknown',
+        )

    def generate_sales_copy(self, product_name: str, trend_keywords: List[str] = None,
                           style: str = "吸睛", upcoming_holidays: List[Dict] = None,