From 6572d521bade6eff091f27a9caa0b110616c3004 Mon Sep 17 00:00:00 2001 From: OoO Date: Mon, 4 May 2026 08:56:38 +0800 Subject: [PATCH] =?UTF-8?q?fix(embed):=20generate=5Fembedding=20=E4=B8=89?= =?UTF-8?q?=E4=B8=BB=E6=A9=9F=20retry=20=E2=80=94=20=E4=BF=AE=E5=90=8C?= =?UTF-8?q?=E9=A1=9E=E3=80=8C111=20=E6=AD=BB=E5=89=87=E5=85=A8=E6=AD=BB?= =?UTF-8?q?=E3=80=8Dbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 承前 commit e862a90(generate retry)的同類修補: generate_embedding 之前邏輯: target_host = host or env or resolve # 一次解析 try: post → mark_unhealthy + return [] # 失敗無 retry 修補後: caller 顯式 host → 凍結不 retry(向下相容) caller 走 lazy → 三主機 retry 鏈: 每次 self.host 走 resolve_ollama_host() 失敗 mark_unhealthy + cache 失效 + 取新主機 最多 3 次(避免同主機無限迴圈) 影響範圍:KM embedding worker / RAG query embedding / openclaw_learning regression: 57 unit tests 全綠 Co-Authored-By: Claude Opus 4.7 (1M context) --- services/ollama_service.py | 100 +++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 43 deletions(-) diff --git a/services/ollama_service.py b/services/ollama_service.py index 8173dbc..c31f1ac 100644 --- a/services/ollama_service.py +++ b/services/ollama_service.py @@ -663,55 +663,69 @@ class OllamaService: def generate_embedding(self, text: str, model: str = "bge-m3:latest", host: str = None, timeout: int = None) -> List[float]: """ - [ADR-007, Step 3] 呼叫 Ollama API 將文字轉換為向量 Embedding + [ADR-007] Embedding — 含三主機自動 retry(HOTFIX 2026-05-04) - 2026-04-19 更新(ADR-003 對齊): - embedding 預設走 Hermes 主機 `EMBEDDING_HOST`(env: EMBEDDING_HOST - → fallback http://192.168.0.111:11434,內網免認證), - 避免 self.host 若指向公開 ollama.wooo.work 時回 401。 - 可透過 host 參數 override。 + 失敗時自動嘗試下一台主機(最多 3 次:Primary → Secondary → 111), + 每次失敗 mark_unhealthy 觸發 resolve cache 失效,下次 resolve 取新主機。 + caller 顯式 host=... 時凍結(不 retry)。 """ - # V-New: Embedding 也遵循 GCP 優先、111 備援邏輯 - # EMBEDDING_HOST 若有明確設定則優先使用;否則透過 resolve_ollama_host 自動決定 - target_host = (host or os.getenv("EMBEDDING_HOST") or resolve_ollama_host()).rstrip("/") request_timeout = timeout or EMBED_TIMEOUT - try: - payload = {"model": model, "input": text} - response = requests.post( - f"{target_host}/api/embed", - json=payload, - timeout=request_timeout, - ) - if response.status_code == 200: - vec = self._extract_embedding(response.json()) - if vec: - return vec - logger.warning(f"Ollama Embed Empty Response @ {target_host}/api/embed") - elif response.status_code not in (404, 405): - logger.error( - f"Ollama Embed Error HTTP {response.status_code} @ {target_host}/api/embed: {response.text[:200]}" + + def _embed_one(target_host: str) -> List[float]: + """單次 embedding 嘗試 — 成功回 vec,失敗回 [] + mark_unhealthy""" + try: + # /api/embed 主路徑 + response = requests.post( + f"{target_host}/api/embed", + json={"model": model, "input": text}, + timeout=request_timeout, ) + if response.status_code == 200: + vec = self._extract_embedding(response.json()) + if vec: + return vec + logger.warning(f"[Embed] empty response @ {target_host}/api/embed") + elif response.status_code not in (404, 405): + logger.warning(f"[Embed] HTTP {response.status_code} @ {target_host}/api/embed: {response.text[:200]}") + mark_unhealthy(target_host) + return [] + + # /api/embeddings legacy fallback + legacy = requests.post( + f"{target_host}/api/embeddings", + json={"model": model, "prompt": text}, + timeout=request_timeout, + ) + if legacy.status_code == 200: + return self._extract_embedding(legacy.json()) + + logger.warning(f"[Embed] both endpoints failed @ {target_host}: {legacy.status_code}") + mark_unhealthy(target_host) + return [] + except Exception as e: + logger.warning(f"[Embed] exception @ {target_host}: {e}") + mark_unhealthy(target_host) return [] - # V-Fix: 舊 Ollama 相容;/api/embeddings 已 deprecated,但仍是部分舊節點唯一可用路徑。 - legacy_response = requests.post( - f"{target_host}/api/embeddings", - json={"model": model, "prompt": text}, - timeout=request_timeout, - ) - if legacy_response.status_code == 200: - return self._extract_embedding(legacy_response.json()) - logger.error( - f"Ollama Embed Error HTTP {legacy_response.status_code} @ {target_host}/api/embeddings: {legacy_response.text[:200]}" - ) - # B4: 兩個 endpoint 都失敗,標 unhealthy 讓下次 resolve 跳過 - mark_unhealthy(target_host) - return [] - except Exception as e: - logger.error(f"Ollama Embed Exception @ {target_host}: {e}") - # B4: 連線/timeout 例外標 unhealthy - mark_unhealthy(target_host) - return [] + # caller 顯式指定 host → 凍結不 retry + if host: + return _embed_one(host.rstrip("/")) + + # HOTFIX 三主機 retry 鏈(與 generate() 同模式) + attempted_hosts: List[str] = [] + for attempt in range(3): + target_host = (os.getenv("EMBEDDING_HOST") or resolve_ollama_host()).rstrip("/") + if target_host in attempted_hosts: + break # cache 還沒過期或同主機,避免無限迴圈 + attempted_hosts.append(target_host) + + vec = _embed_one(target_host) + if vec: + return vec + logger.info(f"[Embed] retry #{attempt+1}/3 — {target_host} failed, mark_unhealthy + 取新主機") + + logger.error(f"[Embed] all {len(attempted_hosts)} hosts failed; tried={attempted_hosts}") + return [] # 建立全域服務實例