fix(ollama): 解「111 關機 → GCP 也斷」三主機 retry 鏈 + lazy host
All checks were successful
CD Pipeline / deploy (push) Successful in 9m14s
All checks were successful
CD Pipeline / deploy (push) Successful in 9m14s
統帥 2026-05-04 反饋:「111 關機後,兩台 GCP Ollama 也跟著斷線不可用」
根因(雙 bug 連動):
1. OllamaService.__init__ 凍結 self.host:
`self.host = host or resolve_ollama_host()` 是 instance 級凍結
容器啟動時若 GCP cold start 觸發 fallback 111 → self.host 永遠卡 111
即使 cache 過期,instance 不會重新 resolve
2. generate() 失敗無 retry 鏈:
原邏輯只 mark_unhealthy(self.host) + return failure
沒有「換下一台主機再試」邏輯 → 111 死則全死
修補(雙管齊下):
A. self.host 改 @property:每次存取走 resolve_ollama_host()
- caller 顯式指定(_explicit_host)才凍結
- 內部 _RESOLVE_TTL 120s cache 控制 HTTP probe 成本
B. generate() 三主機 retry 鏈:
for attempt in range(3):
current_host = self.host # property lazy resolve
if attempted → break (避免無限迴圈)
post → success ? return : mark_unhealthy + retry
mark_unhealthy 自動清空 resolve cache,下次 self.host 取新主機
行為對比:
戰前:GCP cold start 卡 111 → 111 關機 → 全部失敗
戰後:GCP cold start 卡 111 → 111 關機 → mark_unhealthy(111) →
self.host 重 resolve → GCP Primary(已暖機)→ 成功
generate_embedding 同類修補延後(caller 多走 explicit host 路徑風險較低)
regression: 36 unit tests 全綠(test_ollama_resolve + test_ai_call_logger)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -167,11 +167,23 @@ class OllamaService:
|
||||
_CACHE_TTL = 60 # 快取 60 秒
|
||||
|
||||
def __init__(self, host: str = None, model: str = DEFAULT_MODEL):
|
||||
# V-New: 若未指定 host,使用 resolve_ollama_host() 自動選擇 GCP 或 111
|
||||
self.host = host or resolve_ollama_host()
|
||||
# HOTFIX 2026-05-04 統帥反饋「111 關機 → GCP 也斷」根因修補:
|
||||
# 原邏輯 self.host = resolve_ollama_host() 是 __init__ 凍結,容器啟動時若
|
||||
# GCP cold start 觸發 fallback 111,self.host 永遠卡 111。即使 cache 過期,
|
||||
# OllamaService instance 不會重新 resolve。
|
||||
# 修補:self._explicit_host 只在 caller 顯式指定時凍結;否則 self.host 改 property
|
||||
# 每次 access 走 resolve_ollama_host()(內部 120s cache 控制成本)。
|
||||
self._explicit_host = host # None 表示走 lazy resolve
|
||||
self.model = model
|
||||
self.available_models = []
|
||||
|
||||
@property
|
||||
def host(self) -> str:
|
||||
"""每次存取 lazy resolve(120s cache);caller 顯式指定 host 才凍結。"""
|
||||
if self._explicit_host:
|
||||
return self._explicit_host
|
||||
return resolve_ollama_host()
|
||||
|
||||
def check_connection(self) -> bool:
|
||||
"""檢查 Ollama 服務是否可用(含快取)"""
|
||||
import time
|
||||
@@ -208,81 +220,72 @@ class OllamaService:
|
||||
system_prompt: str = None, temperature: float = 0.7,
|
||||
timeout: int = None) -> OllamaResponse:
|
||||
"""
|
||||
生成文字
|
||||
生成文字 — 含三主機自動 retry(HOTFIX 2026-05-04)
|
||||
|
||||
Args:
|
||||
prompt: 使用者提示
|
||||
model: 模型名稱(預設使用 self.model)
|
||||
system_prompt: 系統提示
|
||||
temperature: 創意度 (0-1)
|
||||
timeout: 自訂超時時間(秒),預設使用 TIMEOUT
|
||||
|
||||
Returns:
|
||||
OllamaResponse
|
||||
失敗時自動嘗試下一台主機(最多 3 次:Primary → Secondary → 111),
|
||||
每次失敗 mark_unhealthy 觸發 resolve cache 失效。
|
||||
"""
|
||||
model = model or self.model
|
||||
request_timeout = timeout or TIMEOUT
|
||||
|
||||
try:
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"temperature": temperature
|
||||
}
|
||||
}
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {"temperature": temperature},
|
||||
}
|
||||
if system_prompt:
|
||||
payload["system"] = system_prompt
|
||||
|
||||
if system_prompt:
|
||||
payload["system"] = system_prompt
|
||||
# HOTFIX 三主機 retry 鏈
|
||||
attempted_hosts: List[str] = []
|
||||
last_error: Optional[str] = None
|
||||
|
||||
logger.info(f"[Ollama] 開始生成,模型: {model},超時: {request_timeout}秒")
|
||||
for attempt in range(3):
|
||||
current_host = self.host # property 每次 lazy resolve
|
||||
if current_host in attempted_hosts:
|
||||
# 已試過同主機(cache 還沒過期),跳出避免無限迴圈
|
||||
break
|
||||
attempted_hosts.append(current_host)
|
||||
|
||||
response = requests.post(
|
||||
f"{self.host}/api/generate",
|
||||
json=payload,
|
||||
timeout=request_timeout
|
||||
)
|
||||
logger.info(f"[Ollama] 嘗試 #{attempt+1}/3 host={current_host} model={model} timeout={request_timeout}s")
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return OllamaResponse(
|
||||
success=True,
|
||||
content=data.get('response', ''),
|
||||
model=model,
|
||||
total_duration=data.get('total_duration', 0) / 1e9, # 轉換為秒
|
||||
host=self.host
|
||||
)
|
||||
else:
|
||||
return OllamaResponse(
|
||||
success=False,
|
||||
content='',
|
||||
model=model,
|
||||
error=f"HTTP {response.status_code}: {response.text}",
|
||||
host=self.host
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{current_host}/api/generate",
|
||||
json=payload,
|
||||
timeout=request_timeout,
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return OllamaResponse(
|
||||
success=True,
|
||||
content=data.get('response', ''),
|
||||
model=model,
|
||||
total_duration=data.get('total_duration', 0) / 1e9,
|
||||
host=current_host,
|
||||
)
|
||||
# HTTP 非 200:標 unhealthy + 嘗試下一台
|
||||
last_error = f"HTTP {response.status_code}: {response.text[:200]}"
|
||||
logger.warning(f"[Ollama] {current_host} HTTP 失敗 → mark_unhealthy + retry: {last_error}")
|
||||
mark_unhealthy(current_host)
|
||||
except requests.Timeout:
|
||||
last_error = f"timeout ({request_timeout}s)"
|
||||
logger.warning(f"[Ollama] {current_host} timeout → mark_unhealthy + retry")
|
||||
mark_unhealthy(current_host)
|
||||
except Exception as e:
|
||||
last_error = f"{type(e).__name__}: {str(e)[:200]}"
|
||||
logger.error(f"[Ollama] {current_host} error → mark_unhealthy + retry: {last_error}")
|
||||
mark_unhealthy(current_host)
|
||||
|
||||
except requests.Timeout:
|
||||
# B4: timeout 視為主機不健康,標記 30s 內跳過下次 resolve
|
||||
mark_unhealthy(self.host)
|
||||
return OllamaResponse(
|
||||
success=False,
|
||||
content='',
|
||||
model=model,
|
||||
error="請求超時",
|
||||
host=self.host
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Ollama 生成錯誤: {e}")
|
||||
# B4: 連線錯誤類例外標 unhealthy;其他例外(JSON parse 等)仍標以策安全
|
||||
mark_unhealthy(self.host)
|
||||
return OllamaResponse(
|
||||
success=False,
|
||||
content='',
|
||||
model=model,
|
||||
error=str(e),
|
||||
host=self.host
|
||||
)
|
||||
# 三台都失敗
|
||||
return OllamaResponse(
|
||||
success=False,
|
||||
content='',
|
||||
model=model,
|
||||
error=f"all {len(attempted_hosts)} hosts failed; last={last_error}; tried={attempted_hosts}",
|
||||
host=attempted_hosts[-1] if attempted_hosts else 'unknown',
|
||||
)
|
||||
|
||||
def generate_sales_copy(self, product_name: str, trend_keywords: List[str] = None,
|
||||
style: str = "吸睛", upcoming_holidays: List[Dict] = None,
|
||||
|
||||
Reference in New Issue
Block a user