diff --git a/services/ollama_service.py b/services/ollama_service.py index d9b51fa..8173dbc 100644 --- a/services/ollama_service.py +++ b/services/ollama_service.py @@ -167,11 +167,23 @@ class OllamaService: _CACHE_TTL = 60 # 快取 60 秒 def __init__(self, host: str = None, model: str = DEFAULT_MODEL): - # V-New: 若未指定 host,使用 resolve_ollama_host() 自動選擇 GCP 或 111 - self.host = host or resolve_ollama_host() + # HOTFIX 2026-05-04 統帥反饋「111 關機 → GCP 也斷」根因修補: + # 原邏輯 self.host = resolve_ollama_host() 是 __init__ 凍結,容器啟動時若 + # GCP cold start 觸發 fallback 111,self.host 永遠卡 111。即使 cache 過期, + # OllamaService instance 不會重新 resolve。 + # 修補:self._explicit_host 只在 caller 顯式指定時凍結;否則 self.host 改 property + # 每次 access 走 resolve_ollama_host()(內部 120s cache 控制成本)。 + self._explicit_host = host # None 表示走 lazy resolve self.model = model self.available_models = [] + @property + def host(self) -> str: + """每次存取 lazy resolve(120s cache);caller 顯式指定 host 才凍結。""" + if self._explicit_host: + return self._explicit_host + return resolve_ollama_host() + def check_connection(self) -> bool: """檢查 Ollama 服務是否可用(含快取)""" import time @@ -208,81 +220,72 @@ class OllamaService: system_prompt: str = None, temperature: float = 0.7, timeout: int = None) -> OllamaResponse: """ - 生成文字 + 生成文字 — 含三主機自動 retry(HOTFIX 2026-05-04) - Args: - prompt: 使用者提示 - model: 模型名稱(預設使用 self.model) - system_prompt: 系統提示 - temperature: 創意度 (0-1) - timeout: 自訂超時時間(秒),預設使用 TIMEOUT - - Returns: - OllamaResponse + 失敗時自動嘗試下一台主機(最多 3 次:Primary → Secondary → 111), + 每次失敗 mark_unhealthy 觸發 resolve cache 失效。 """ model = model or self.model request_timeout = timeout or TIMEOUT - try: - payload = { - "model": model, - "prompt": prompt, - "stream": False, - "options": { - "temperature": temperature - } - } + payload = { + "model": model, + "prompt": prompt, + "stream": False, + "options": {"temperature": temperature}, + } + if system_prompt: + payload["system"] = system_prompt - if system_prompt: - payload["system"] = system_prompt + # HOTFIX 三主機 retry 鏈 + attempted_hosts: List[str] = [] + last_error: Optional[str] = None - logger.info(f"[Ollama] 開始生成,模型: {model},超時: {request_timeout}秒") + for attempt in range(3): + current_host = self.host # property 每次 lazy resolve + if current_host in attempted_hosts: + # 已試過同主機(cache 還沒過期),跳出避免無限迴圈 + break + attempted_hosts.append(current_host) - response = requests.post( - f"{self.host}/api/generate", - json=payload, - timeout=request_timeout - ) + logger.info(f"[Ollama] 嘗試 #{attempt+1}/3 host={current_host} model={model} timeout={request_timeout}s") - if response.status_code == 200: - data = response.json() - return OllamaResponse( - success=True, - content=data.get('response', ''), - model=model, - total_duration=data.get('total_duration', 0) / 1e9, # 轉換為秒 - host=self.host - ) - else: - return OllamaResponse( - success=False, - content='', - model=model, - error=f"HTTP {response.status_code}: {response.text}", - host=self.host + try: + response = requests.post( + f"{current_host}/api/generate", + json=payload, + timeout=request_timeout, ) + if response.status_code == 200: + data = response.json() + return OllamaResponse( + success=True, + content=data.get('response', ''), + model=model, + total_duration=data.get('total_duration', 0) / 1e9, + host=current_host, + ) + # HTTP 非 200:標 unhealthy + 嘗試下一台 + last_error = f"HTTP {response.status_code}: {response.text[:200]}" + logger.warning(f"[Ollama] {current_host} HTTP 失敗 → mark_unhealthy + retry: {last_error}") + mark_unhealthy(current_host) + except requests.Timeout: + last_error = f"timeout ({request_timeout}s)" + logger.warning(f"[Ollama] {current_host} timeout → mark_unhealthy + retry") + mark_unhealthy(current_host) + except Exception as e: + last_error = f"{type(e).__name__}: {str(e)[:200]}" + logger.error(f"[Ollama] {current_host} error → mark_unhealthy + retry: {last_error}") + mark_unhealthy(current_host) - except requests.Timeout: - # B4: timeout 視為主機不健康,標記 30s 內跳過下次 resolve - mark_unhealthy(self.host) - return OllamaResponse( - success=False, - content='', - model=model, - error="請求超時", - host=self.host - ) - except Exception as e: - logger.error(f"Ollama 生成錯誤: {e}") - # B4: 連線錯誤類例外標 unhealthy;其他例外(JSON parse 等)仍標以策安全 - mark_unhealthy(self.host) - return OllamaResponse( - success=False, - content='', - model=model, - error=str(e), - host=self.host - ) + # 三台都失敗 + return OllamaResponse( + success=False, + content='', + model=model, + error=f"all {len(attempted_hosts)} hosts failed; last={last_error}; tried={attempted_hosts}", + host=attempted_hosts[-1] if attempted_hosts else 'unknown', + ) def generate_sales_copy(self, product_name: str, trend_keywords: List[str] = None, style: str = "吸睛", upcoming_holidays: List[Dict] = None,