diff --git a/services/ollama_service.py b/services/ollama_service.py
index d9b51fa..8173dbc 100644
--- a/services/ollama_service.py
+++ b/services/ollama_service.py
@@ -167,11 +167,23 @@ class OllamaService:
     _CACHE_TTL = 60  # 快取 60 秒
 
     def __init__(self, host: str = None, model: str = DEFAULT_MODEL):
-        # V-New: 若未指定 host，使用 resolve_ollama_host() 自動選擇 GCP 或 111
-        self.host = host or resolve_ollama_host()
+        # HOTFIX 2026-05-04 統帥反饋「111 關機 → GCP 也斷」根因修補：
+        # 原邏輯 self.host = resolve_ollama_host() 是 __init__ 凍結，容器啟動時若
+        # GCP cold start 觸發 fallback 111，self.host 永遠卡 111。即使 cache 過期，
+        # OllamaService instance 不會重新 resolve。
+        # 修補：self._explicit_host 只在 caller 顯式指定時凍結；否則 self.host 改 property
+        # 每次 access 走 resolve_ollama_host()（內部 120s cache 控制成本）。
+        self._explicit_host = host  # None 表示走 lazy resolve
         self.model = model
         self.available_models = []
 
+    @property
+    def host(self) -> str:
+        """每次存取 lazy resolve（120s cache）；caller 顯式指定 host 才凍結。"""
+        if self._explicit_host:
+            return self._explicit_host
+        return resolve_ollama_host()
+
     def check_connection(self) -> bool:
         """檢查 Ollama 服務是否可用（含快取）"""
         import time
@@ -208,81 +220,72 @@ class OllamaService:
                  system_prompt: str = None, temperature: float = 0.7,
                  timeout: int = None) -> OllamaResponse:
         """
-        生成文字
+        生成文字 — 含三主機自動 retry（HOTFIX 2026-05-04）
 
-        Args:
-            prompt: 使用者提示
-            model: 模型名稱（預設使用 self.model）
-            system_prompt: 系統提示
-            temperature: 創意度 (0-1)
-            timeout: 自訂超時時間（秒），預設使用 TIMEOUT
-
-        Returns:
-            OllamaResponse
+        失敗時自動嘗試下一台主機（最多 3 次：Primary → Secondary → 111），
+        每次失敗 mark_unhealthy 觸發 resolve cache 失效。
         """
         model = model or self.model
         request_timeout = timeout or TIMEOUT
 
-        try:
-            payload = {
-                "model": model,
-                "prompt": prompt,
-                "stream": False,
-                "options": {
-                    "temperature": temperature
-                }
-            }
+        payload = {
+            "model": model,
+            "prompt": prompt,
+            "stream": False,
+            "options": {"temperature": temperature},
+        }
+        if system_prompt:
+            payload["system"] = system_prompt
 
-            if system_prompt:
-                payload["system"] = system_prompt
+        # HOTFIX 三主機 retry 鏈
+        attempted_hosts: List[str] = []
+        last_error: Optional[str] = None
 
-            logger.info(f"[Ollama] 開始生成，模型: {model}，超時: {request_timeout}秒")
+        for attempt in range(3):
+            current_host = self.host  # property 每次 lazy resolve
+            if current_host in attempted_hosts:
+                # 已試過同主機（cache 還沒過期），跳出避免無限迴圈
+                break
+            attempted_hosts.append(current_host)
 
-            response = requests.post(
-                f"{self.host}/api/generate",
-                json=payload,
-                timeout=request_timeout
-            )
+            logger.info(f"[Ollama] 嘗試 #{attempt+1}/3 host={current_host} model={model} timeout={request_timeout}s")
 
-            if response.status_code == 200:
-                data = response.json()
-                return OllamaResponse(
-                    success=True,
-                    content=data.get('response', ''),
-                    model=model,
-                    total_duration=data.get('total_duration', 0) / 1e9,  # 轉換為秒
-                    host=self.host
-                )
-            else:
-                return OllamaResponse(
-                    success=False,
-                    content='',
-                    model=model,
-                    error=f"HTTP {response.status_code}: {response.text}",
-                    host=self.host
+            try:
+                response = requests.post(
+                    f"{current_host}/api/generate",
+                    json=payload,
+                    timeout=request_timeout,
                 )
+                if response.status_code == 200:
+                    data = response.json()
+                    return OllamaResponse(
+                        success=True,
+                        content=data.get('response', ''),
+                        model=model,
+                        total_duration=data.get('total_duration', 0) / 1e9,
+                        host=current_host,
+                    )
+                # HTTP 非 200：標 unhealthy + 嘗試下一台
+                last_error = f"HTTP {response.status_code}: {response.text[:200]}"
+                logger.warning(f"[Ollama] {current_host} HTTP 失敗 → mark_unhealthy + retry: {last_error}")
+                mark_unhealthy(current_host)
+            except requests.Timeout:
+                last_error = f"timeout ({request_timeout}s)"
+                logger.warning(f"[Ollama] {current_host} timeout → mark_unhealthy + retry")
+                mark_unhealthy(current_host)
+            except Exception as e:
+                last_error = f"{type(e).__name__}: {str(e)[:200]}"
+                logger.error(f"[Ollama] {current_host} error → mark_unhealthy + retry: {last_error}")
+                mark_unhealthy(current_host)
 
-        except requests.Timeout:
-            # B4: timeout 視為主機不健康，標記 30s 內跳過下次 resolve
-            mark_unhealthy(self.host)
-            return OllamaResponse(
-                success=False,
-                content='',
-                model=model,
-                error="請求超時",
-                host=self.host
-            )
-        except Exception as e:
-            logger.error(f"Ollama 生成錯誤: {e}")
-            # B4: 連線錯誤類例外標 unhealthy；其他例外（JSON parse 等）仍標以策安全
-            mark_unhealthy(self.host)
-            return OllamaResponse(
-                success=False,
-                content='',
-                model=model,
-                error=str(e),
-                host=self.host
-            )
+        # 三台都失敗
+        return OllamaResponse(
+            success=False,
+            content='',
+            model=model,
+            error=f"all {len(attempted_hosts)} hosts failed; last={last_error}; tried={attempted_hosts}",
+            host=attempted_hosts[-1] if attempted_hosts else 'unknown',
+        )
 
     def generate_sales_copy(self, product_name: str, trend_keywords: List[str] = None,
                            style: str = "吸睛", upcoming_holidays: List[Dict] = None,