From 353e565e52f990182a3705924974e8fa76805155 Mon Sep 17 00:00:00 2001 From: OoO Date: Sun, 24 May 2026 14:51:36 +0800 Subject: [PATCH] V10.417 protect embedding fallback routing --- .env.example | 12 +++-- config.py | 2 +- docs/AI_INTELLIGENCE_MODULE_SOT.md | 7 +-- docs/memory/history_logs.md | 1 + services/ollama_service.py | 59 ++++++++++++++++++++--- services/openclaw_learning_service.py | 8 ++- services/rag_service.py | 6 ++- tests/test_ai_insight_embedding_bridge.py | 2 +- tests/test_ollama_embedding.py | 40 ++++++++++++++- tests/test_ollama_retry_chain.py | 45 +++++++++++++++++ tests/test_rag_service.py | 18 +++---- 11 files changed, 170 insertions(+), 30 deletions(-) diff --git a/.env.example b/.env.example index 197359e..25fadb9 100644 --- a/.env.example +++ b/.env.example @@ -140,8 +140,11 @@ HERMES_ALLOW_111_FALLBACK=false # [選填] Embedding 服務主機;留空時自動走同一條 Ollama 三主機級聯 EMBEDDING_HOST= -# [預設 45] Embedding API timeout;優先使用 Ollama /api/embed,舊節點 fallback /api/embeddings -EMBEDDING_TIMEOUT=45 +# [預設 15] Embedding API timeout;優先使用 Ollama /api/embed,舊節點 fallback /api/embeddings +EMBEDDING_TIMEOUT=15 +OLLAMA_EMBED_MAX_TIMEOUT=15 +OLLAMA_EMBED_KEEP_ALIVE=1m +OLLAMA_EMBED_MAX_CHARS=4000 # ========================================== # Elephant Alpha AI Agent Super Orchestrator Settings @@ -368,7 +371,10 @@ OLLAMA_HOST_FALLBACK=http://192.168.0.111:11434 OLLAMA_MODEL=gemma3:4b OLLAMA_TIMEOUT=120 OLLAMA_COPY_TIMEOUT=180 -OLLAMA_EMBED_TIMEOUT=45 +OLLAMA_EMBED_TIMEOUT=15 +OLLAMA_EMBED_MAX_TIMEOUT=15 +OLLAMA_EMBED_KEEP_ALIVE=1m +OLLAMA_EMBED_MAX_CHARS=4000 # 111 是 Mac final fallback,不承接 7B+ / vision / long-context / 長輸出任務;落到 111 時自動降級與縮短常駐。 OLLAMA_111_MODEL_FALLBACK=llama3.2:latest OLLAMA_111_MODEL_DOWNGRADE_PATTERNS=qwen3:*,deepseek-r1:*,hermes3:*,llama3.1:*,qwen2.5:*,qwen2.5-coder:*,gemma3:*,minicpm-v:*,llava:*,*:7b*,*:8b*,*:14b*,*:32b*,*:70b* diff --git a/config.py b/config.py index de10723..cc5974e 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.416" +SYSTEM_VERSION = "V10.417" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index a87951b..6002f4e 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -1,8 +1,8 @@ # MOMO PRO — AI 競價情報模組 Single Source of Truth -> **最後更新**: 2026-05-21 (台北時間) +> **最後更新**: 2026-05-24 (台北時間) > **狀態**: 🟢 四 AI Agent 自動化閉環已落地;LLM 路由紅線升級為 Ollama-first 三主機級聯,Gemini 備援預設關閉 -> **適用版本**: V10.387 +> **適用版本**: V10.417 --- @@ -20,6 +20,7 @@ - Code Review Hermes scan 預設不呼叫 LLM,改用 deterministic fast static scan,避免部署後先卡三段 Ollama timeout;需要 LLM 掃描時才以 `CODE_REVIEW_HERMES_LLM_SCAN_ENABLED=true` 啟用本地矩陣。 - Code Review Hermes LLM scan 啟用時才使用本地模型矩陣,且預設只跑 GCP-A `qwen2.5-coder:7b` → GCP-B `gemma3:4b`;`CODE_REVIEW_ALLOW_111_FALLBACK=true` 時才允許落到 111,並由 `OllamaService` 降級到 `llama3.2:latest`。不啟用 Gemini 備援,本地掃描失敗時只回空 findings 並交由 OpenClaw 本地矩陣續跑。 - Code Review OpenClaw assessment 預設只跑 GCP-A → GCP-B:GCP-A `qwen2.5-coder:7b`、GCP-B `gemma3:4b`;primary timeout 預設 `15s`、secondary timeout 預設 `60s`,讓 A 掛時快速讓位給 B,且 B 有足夠時間完成審查 prompt。111 是最後救急節點,但部署後重分析預設不打 111;只有 `CODE_REVIEW_ALLOW_111_FALLBACK=true` 才允許 111 接手,並降級到 `llama3.2:latest`。Code Review 的 Ollama `keep_alive` 預設為 `5m`,不得再用 `24h` 長駐 runner 壓住 GCP-B/111。GCP-A/GCP-B 都失敗且 Claude/Gemini 未顯式開啟時,必須回 deterministic 本地降級摘要,不呼叫 Gemini、不落 111、不走其他雲端模型。 +- Embedding / semantic RAG 背景任務預設只跑 GCP-A → GCP-B:`OpenClawLearningService` embedding worker 與 `RAGService` 查詢 embedding 呼叫 `OllamaService.generate_embedding(..., allow_111_fallback=False)`;111 只可作人工明確指定的救急路徑,不承接 `bge-m3` 背景批次。`OLLAMA_EMBED_TIMEOUT` / `OLLAMA_EMBED_MAX_TIMEOUT` 預設 `15s`、`OLLAMA_EMBED_KEEP_ALIVE=1m`、`OLLAMA_EMBED_MAX_CHARS=4000`,避免 embedding worker 長時間卡住 GCP-B 或 111。 - OpenClaw Telegram Q&A 主路徑也不得綁單一 host:`_call_qwen3_qa()` 必須透過 `OllamaService` 跑 GCP-A → GCP-B → 111,並把實際落點寫入 `ai_calls.provider`。 - OpenClaw Telegram 圖片商品辨識也必須 Ollama-first:`_identify_product_name_with_ollama_vision()` 透過 `OllamaService` 嘗試 GCP-A → GCP-B → 111;Gemini 只允許以 `openclaw_bot_image_gemini` caller 作為失敗後備援。 - OpenClaw 週報、月報、Meta analysis、日報洞察、Telegram PPT 分析與 MCP fallback 也必須 Ollama-first;Gemini caller 只能帶 `_gemini_fallback` 或明確 fallback caller 語意,且不得先於 Ollama/NIM 被呼叫。OpenClaw strategy 的 Ollama `keep_alive` 預設為 `5m`,避免報告型任務把 GCP-B/111 runner 長駐 24h。 @@ -124,7 +125,7 @@ SQL漏斗(~300筆) - `resource_optimization` 會先執行 `ActionPlanHygieneService` 清理過期噪音:只關閉超過 72 小時的 `code_review_fix` / `openclaw_recommendation` 類 advisory action_plans,以及 NemoTron `direct_response/reply_simple` 舊聊天回覆計畫;將狀態改為 `auto_disabled` 或 `rejected` 並寫入 `metadata_json.hygiene_history`。不刪資料,也不碰 NemoTron human_review / pricing / tool action 類業務行動。 - `momo-scheduler` 每 6 小時固定執行 `run_action_plan_hygiene_task()`,讓過期 advisory action_plans 的關閉不再依賴 `resource_optimization` 告警觸發;排程失敗會經 EventRouter 發送 `action_plan_hygiene_failure`。 - `action_plans` 產生端必須防重:Code Review 同一檔案已有 active `code_review_fix` 時不重建;OpenClaw recommendation 會寫入文字 fingerprint 並跳過同一建議;AIOrchestrator 不再把 NemoTron `direct_response/reply_simple` 聊天回覆存成 action plan,真正需工具、審核或執行的 NemoTron action 才能進 queue。 -- OpenClaw/Hermes embedding 優先呼叫 Ollama `/api/embed`,只在舊節點不支援時 fallback `/api/embeddings`;timeout 由 `EMBEDDING_TIMEOUT` / `OLLAMA_EMBED_TIMEOUT` 控制。 +- OpenClaw/Hermes embedding 優先呼叫 Ollama `/api/embed`,只在舊節點不支援時 fallback `/api/embeddings`;timeout 由 `EMBEDDING_TIMEOUT` / `OLLAMA_EMBED_TIMEOUT` 控制,並受 `OLLAMA_EMBED_MAX_TIMEOUT` 封頂。背景 worker / RAG 查詢不得落 111,除非 caller 顯式允許 `allow_111_fallback=True`。 - PPT 自動產線由 `momo-scheduler` 依節奏執行 `run_ppt_auto_generation_task(schedule_kind)`:每日 20:30 產日報、週一 20:40 產週報/市場情報、每月 1 日 20:50 產月報與管理型簡報、季初 21:00 產季報、半年初 21:10 產半年報、年初 21:20 產年報,再交給 22:00 `ppt_vision_audit` 做視覺審核;每次嘗試會寫入 `ppt_generation_runs`,`/observability/ppt_audit_history` 以精準參數檢查目標版本是否已產生,並可用 `/observability/ppt_audit/generate_missing` 手動補齊缺漏,總開關為 `PPT_AUTO_GENERATION_ENABLED`。PPT vision 需 `PPT_VISION_ENABLED=true` 與容器內 LibreOffice;`/observability/ppt_audit_file/` 會把 PPTX 轉成 PDF 快取供站內線上預覽,原始 PPTX 仍保留下載。 --- diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index 2f7579e..11d507e 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,7 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-24:PChome 近門檻身份回收第二輪 +- **V10.417 Embedding/RAG 背景負載保護**: `OllamaService.generate_embedding()` 新增 `allow_111_fallback`、timeout cap、輸入長度 cap 與 `/api/embed keep_alive=1m`;OpenClaw learning worker 與 RAG 查詢預設只跑 GCP-A → GCP-B,不再把 `bge-m3` 背景 embedding / semantic RAG 轉嫁到 111。預設 `OLLAMA_EMBED_TIMEOUT=15`、`OLLAMA_EMBED_MAX_TIMEOUT=15`、`OLLAMA_EMBED_MAX_CHARS=4000`,避免 embedding worker 在 GCP-B/111 長時間常駐或拖住 runner。 - **V10.416 私密清潔 / 彩妝用途 / 棉棒 / 蘭蔻品線防錯配**: marketplace matcher 追加窄範圍 hard-veto guard,讓 SAUGELLA 日用/加強 vs 黃金女郎型、Lactacyd 清新舒涼 vs 生理呵護、LUNASOL 頰彩 vs 眼彩、MUJI 細軸棉棒 vs 黑色棉棒、LANCOME 超極光晶露 vs 超極限肌因精華露不再停留在模糊 `true_low_confidence`,而是以 `*_variant_conflict` / `makeup_usage_conflict` / `lancome_line_conflict` 明確拒絕;不調整 `MIN_MATCH_SCORE`,也不放寬真同款進 matched 的門檻。 - **V10.416 production pilot**: 正式回刷 7 筆近門檻錯配樣本,SAUGELLA 2 筆、LUNASOL 頰彩 vs 眼彩、LANCOME 超極光 vs 超極限、我的心機兒童防曬 vs 海洋友善防曬、Lactacyd 清新舒涼 vs 生理呵護、MUJI 細軸棉棒 vs 黑色棉棒皆更新為 `identity_veto`;`matched` 維持 1619、`true_low_confidence` 759→753、`recoverable_low_score` 1→0、`identity_veto` 4004→4011,無正式 `competitor_prices` 覆寫。 - **V10.415 Hermes 預設不落 111 + 比對保護**: `OllamaService.generate()` 新增 `allow_111_fallback` 參數,預設維持三主機相容;Hermes intent / competitor analyst 改以 `HERMES_ALLOW_111_FALLBACK=false` 預設只跑 GCP-A → GCP-B,兩台都不可用時交給規則引擎或 DB 證據 fallback,不再把批量價格分析與意圖分類轉嫁到 111。同版 marketplace matcher 將防曬類列入 variant-sensitive,排除 SPF/PA/UVA/UVB 這類規格 token 被誤當型號,避免「兒童防曬乳」與「海洋友善保濕防曬乳」誤配;Recipe Box 兒童防曬氣墊粉餅保留精準同品線例外;另新增 `pack_quantity_difference`,讓 Beauty Foot 足膜 5入 vs 4入走 unit comparable,不再卡在低信心。 diff --git a/services/ollama_service.py b/services/ollama_service.py index faecf76..ff9bb98 100644 --- a/services/ollama_service.py +++ b/services/ollama_service.py @@ -56,6 +56,9 @@ DEFAULT_MODEL = os.getenv('OLLAMA_MODEL', 'llama3.1:8b') # 較快速的模型 TIMEOUT = int(os.getenv('OLLAMA_TIMEOUT', '120')) # 秒 - 2 分鐘 COPY_TIMEOUT = int(os.getenv('OLLAMA_COPY_TIMEOUT', '180')) # 文案生成專用超時 - 3 分鐘 EMBED_TIMEOUT = int(os.getenv('OLLAMA_EMBED_TIMEOUT', os.getenv('EMBEDDING_TIMEOUT', '45'))) +EMBED_MAX_TIMEOUT = int(os.getenv('OLLAMA_EMBED_MAX_TIMEOUT', '15')) +EMBED_KEEP_ALIVE = os.getenv('OLLAMA_EMBED_KEEP_ALIVE', '1m') +EMBED_MAX_CHARS = int(os.getenv('OLLAMA_EMBED_MAX_CHARS', '4000')) FALLBACK_111_KEEP_ALIVE = os.getenv('OLLAMA_111_KEEP_ALIVE', '5m') FALLBACK_111_MAX_TIMEOUT = int(os.getenv('OLLAMA_111_MAX_TIMEOUT', '20')) FALLBACK_111_NUM_CTX = int(os.getenv('OLLAMA_111_NUM_CTX', '4096')) @@ -881,7 +884,8 @@ class OllamaService: return [] def generate_embedding(self, text: str, model: str = "bge-m3:latest", - host: str = None, timeout: int = None) -> List[float]: + host: str = None, timeout: int = None, + allow_111_fallback: bool = True) -> List[float]: """ [ADR-007] Embedding — 含三主機自動 retry(HOTFIX 2026-05-04) @@ -889,7 +893,18 @@ class OllamaService: 每次失敗 mark_unhealthy 觸發 resolve cache 失效,下次 resolve 取新主機。 caller 顯式 host=... 時凍結(不 retry)。 """ - request_timeout = timeout or EMBED_TIMEOUT + clean_text = (text or "").strip() + if not clean_text: + return [] + if len(clean_text) > EMBED_MAX_CHARS: + logger.info( + "[Embed] input clipped from %s to %s chars for model=%s", + len(clean_text), + EMBED_MAX_CHARS, + model, + ) + clean_text = clean_text[:EMBED_MAX_CHARS] + request_timeout = min(timeout or EMBED_TIMEOUT, EMBED_MAX_TIMEOUT) def _embed_one(target_host: str) -> List[float]: """單次 embedding 嘗試 — 成功回 vec,失敗回 [] + mark_unhealthy""" @@ -897,7 +912,7 @@ class OllamaService: # /api/embed 主路徑 response = requests.post( f"{target_host}/api/embed", - json={"model": model, "input": text}, + json={"model": model, "input": clean_text, "keep_alive": EMBED_KEEP_ALIVE}, timeout=request_timeout, ) if response.status_code == 200: @@ -913,7 +928,7 @@ class OllamaService: # /api/embeddings legacy fallback legacy = requests.post( f"{target_host}/api/embeddings", - json={"model": model, "prompt": text}, + json={"model": model, "prompt": clean_text}, timeout=request_timeout, ) if legacy.status_code == 200: @@ -929,20 +944,48 @@ class OllamaService: # caller 顯式指定 host → 凍結不 retry if host: + if not allow_111_fallback and _is_111_fallback_host(host): + logger.warning("[Embed] 111 fallback disabled; explicit host skipped: %s", host) + return [] return _embed_one(host.rstrip("/")) # HOTFIX 三主機 retry 鏈(與 generate() 同模式) attempted_hosts: List[str] = [] - for attempt in range(3): - target_host = (approved_ollama_env("EMBEDDING_HOST") or resolve_ollama_host()).rstrip("/") + canonical_hosts = _canonical_host_chain() + allowed_hosts = [ + candidate for candidate in canonical_hosts + if allow_111_fallback or not _is_111_fallback_host(candidate) + ] + max_attempts = len(canonical_hosts) if allow_111_fallback else max(1, len(allowed_hosts)) + for attempt in range(max_attempts): + configured_host = (approved_ollama_env("EMBEDDING_HOST") or "").rstrip("/") + if configured_host and (allow_111_fallback or not _is_111_fallback_host(configured_host)): + target_host = configured_host + else: + if configured_host and _is_111_fallback_host(configured_host): + logger.warning("[Embed] 111 fallback disabled; ignoring EMBEDDING_HOST=%s", configured_host) + target_host = resolve_ollama_host().rstrip("/") + if not allow_111_fallback and _is_111_fallback_host(target_host): + logger.warning("[Embed] 111 fallback disabled; no approved GCP embedding host available") + break if target_host in attempted_hosts: - break # cache 還沒過期或同主機,避免無限迴圈 + next_host = None + if target_host in allowed_hosts: + next_host = next((candidate for candidate in allowed_hosts if candidate not in attempted_hosts), None) + if not next_host: + break # cache 還沒過期或同主機,避免無限迴圈 + logger.info( + "[Embed] resolver returned attempted host=%s; forcing next host=%s", + target_host, + next_host, + ) + target_host = next_host attempted_hosts.append(target_host) vec = _embed_one(target_host) if vec: return vec - logger.info(f"[Embed] retry #{attempt+1}/3 — {target_host} failed, mark_unhealthy + 取新主機") + logger.info(f"[Embed] retry #{attempt+1}/{max_attempts} — {target_host} failed, mark_unhealthy + 取新主機") logger.error(f"[Embed] all {len(attempted_hosts)} hosts failed; tried={attempted_hosts}") return [] diff --git a/services/openclaw_learning_service.py b/services/openclaw_learning_service.py index 2b7add3..c1ac976 100644 --- a/services/openclaw_learning_service.py +++ b/services/openclaw_learning_service.py @@ -119,7 +119,11 @@ def _process_one_embedding(row_id: int, target_table: str, target_id: int, ) session.commit() - vec = ollama_service.generate_embedding(text_content, model=model) + vec = ollama_service.generate_embedding( + text_content, + model=model, + allow_111_fallback=False, + ) if not vec: raise RuntimeError("embedding 回傳空值") @@ -441,7 +445,7 @@ def _build_semantic_rag_context(session, query: str, insight_type: str = None, if not query: return "" try: - vec = ollama_service.generate_embedding(query) + vec = ollama_service.generate_embedding(query, allow_111_fallback=False) if not vec: return "" filters = ["embedding IS NOT NULL", "status IN ('approved', 'active', 'executed')"] diff --git a/services/rag_service.py b/services/rag_service.py index be1a608..df24714 100644 --- a/services/rag_service.py +++ b/services/rag_service.py @@ -349,7 +349,11 @@ class RAGService: query_vec: Optional[List[float]] = None try: from services.ollama_service import ollama_service - query_vec = ollama_service.generate_embedding(text, model=RAG_EMBED_MODEL) + query_vec = ollama_service.generate_embedding( + text, + model=RAG_EMBED_MODEL, + allow_111_fallback=False, + ) if not query_vec: logger.warning( "[RAGService] embedding empty (caller=%s, len=%d) — fallback LLM", diff --git a/tests/test_ai_insight_embedding_bridge.py b/tests/test_ai_insight_embedding_bridge.py index 75f3eb2..95127d2 100644 --- a/tests/test_ai_insight_embedding_bridge.py +++ b/tests/test_ai_insight_embedding_bridge.py @@ -82,7 +82,7 @@ def test_process_one_embedding_writes_signature(monkeypatch): monkeypatch.setattr( learning.ollama_service, "generate_embedding", - lambda text, model="bge-m3:latest": [0.1] * 1024, + lambda text, model="bge-m3:latest", **_kwargs: [0.1] * 1024, ) ok = learning._process_one_embedding( diff --git a/tests/test_ollama_embedding.py b/tests/test_ollama_embedding.py index 940420e..29f1400 100644 --- a/tests/test_ollama_embedding.py +++ b/tests/test_ollama_embedding.py @@ -24,7 +24,11 @@ def test_generate_embedding_uses_current_embed_endpoint(monkeypatch): assert vec == [0.1, 0.2, 0.3] assert calls == [ - ("http://ollama/api/embed", {"model": "bge-m3:latest", "input": "hello"}, 7), + ( + "http://ollama/api/embed", + {"model": "bge-m3:latest", "input": "hello", "keep_alive": "1m"}, + 7, + ), ] @@ -43,10 +47,42 @@ def test_generate_embedding_falls_back_to_legacy_embeddings_endpoint(monkeypatch assert vec == [0.4, 0.5] assert calls == [ - ("http://ollama/api/embed", {"model": "bge-m3:latest", "input": "hello"}, 9), + ( + "http://ollama/api/embed", + {"model": "bge-m3:latest", "input": "hello", "keep_alive": "1m"}, + 9, + ), ("http://ollama/api/embeddings", {"model": "bge-m3:latest", "prompt": "hello"}, 9), ] def test_extract_embedding_accepts_flat_embeddings_shape(): assert OllamaService._extract_embedding({"embeddings": [0.1, 0.2]}) == [0.1, 0.2] + + +def test_generate_embedding_caps_timeout_and_clips_input(monkeypatch): + calls = [] + + def fake_post(url, json, timeout): + calls.append((url, json, timeout)) + return FakeResponse(200, {"embeddings": [[0.1, 0.2, 0.3]]}) + + monkeypatch.setattr("services.ollama_service.EMBED_MAX_TIMEOUT", 3) + monkeypatch.setattr("services.ollama_service.EMBED_MAX_CHARS", 5) + monkeypatch.setattr("services.ollama_service.requests.post", fake_post) + + vec = OllamaService().generate_embedding( + "hello world", + model="bge-m3:latest", + host="http://ollama", + timeout=45, + ) + + assert vec == [0.1, 0.2, 0.3] + assert calls == [ + ( + "http://ollama/api/embed", + {"model": "bge-m3:latest", "input": "hello", "keep_alive": "1m"}, + 3, + ), + ] diff --git a/tests/test_ollama_retry_chain.py b/tests/test_ollama_retry_chain.py index bb4d5a6..efd4273 100644 --- a/tests/test_ollama_retry_chain.py +++ b/tests/test_ollama_retry_chain.py @@ -279,6 +279,51 @@ def test_embedding_all_three_hosts_fail_returns_empty(): assert mock_post.call_count == 3 +def test_embedding_can_disable_111_fallback_for_background_rag_work(): + """背景 embedding/RAG 任務只跑 GCP-A/GCP-B,避免 111 承接 bge-m3 長任務。""" + import requests + from services import ollama_service as oss + from services.ollama_service import OllamaService + + svc = OllamaService() + hosts = [ + oss.OLLAMA_HOST_SECONDARY, + oss.OLLAMA_HOST_FALLBACK, + ] + + with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \ + patch.dict('os.environ', {}, clear=False), \ + patch('services.ollama_service.requests.post', + side_effect=requests.Timeout('secondary timeout')) as mock_post: + import os + os.environ.pop('EMBEDDING_HOST', None) + vec = svc.generate_embedding('test text', allow_111_fallback=False) + + posted_hosts = [call.args[0].split('/api/embed')[0] for call in mock_post.call_args_list] + assert vec == [] + assert posted_hosts == [oss.OLLAMA_HOST_SECONDARY] + assert oss.OLLAMA_HOST_FALLBACK not in posted_hosts + + +def test_embedding_ignores_111_embedding_host_when_fallback_disabled(): + """EMBEDDING_HOST 若誤設 111,背景 embedding 仍回 GCP resolver,不直接棄跑。""" + from services import ollama_service as oss + from services.ollama_service import OllamaService + + svc = OllamaService() + fake_ok = MagicMock(status_code=200) + fake_ok.json.return_value = {'embeddings': [[0.7, 0.8]]} + + with patch('services.ollama_service.resolve_ollama_host', return_value=oss.OLLAMA_HOST_SECONDARY), \ + patch.dict('os.environ', {'EMBEDDING_HOST': oss.OLLAMA_HOST_FALLBACK}, clear=False), \ + patch('services.ollama_service.requests.post', return_value=fake_ok) as mock_post: + vec = svc.generate_embedding('test text', allow_111_fallback=False) + + posted_hosts = [call.args[0].split('/api/embed')[0] for call in mock_post.call_args_list] + assert vec == [0.7, 0.8] + assert posted_hosts == [oss.OLLAMA_HOST_SECONDARY] + + # ═══════════════════════════════════════════════════════════════════════════ # T4: mark_unhealthy 觸發 cache 失效(驗 self.host 取新主機) # ═══════════════════════════════════════════════════════════════════════════ diff --git a/tests/test_rag_service.py b/tests/test_rag_service.py index 182af32..028044e 100644 --- a/tests/test_rag_service.py +++ b/tests/test_rag_service.py @@ -99,7 +99,7 @@ class TestRagEnabledHits: monkeypatch.setattr( 'services.ollama_service.ollama_service.generate_embedding', - lambda text, model="bge-m3:latest": _fake_embedding(), + lambda text, model="bge-m3:latest", **_kwargs: _fake_embedding(), ) sig = rs.get_embedding_signature() @@ -132,7 +132,7 @@ class TestRagEnabledHits: monkeypatch.setattr( 'services.ollama_service.ollama_service.generate_embedding', - lambda text, model="bge-m3:latest": _fake_embedding(), + lambda text, model="bge-m3:latest", **_kwargs: _fake_embedding(), ) monkeypatch.setattr( rs.rag_service, @@ -170,7 +170,7 @@ class TestRagEnabledHits: monkeypatch.setattr( 'services.ollama_service.ollama_service.generate_embedding', - lambda text, model="bge-m3:latest": _fake_embedding(), + lambda text, model="bge-m3:latest", **_kwargs: _fake_embedding(), ) fake_session = MagicMock() fake_session.execute.return_value.fetchall.return_value = [] @@ -227,7 +227,7 @@ class TestEmbeddingSignature: monkeypatch.setattr( 'services.ollama_service.ollama_service.generate_embedding', - lambda text, model="bge-m3:latest": _fake_embedding(), + lambda text, model="bge-m3:latest", **_kwargs: _fake_embedding(), ) sig = rs.get_embedding_signature() fake_rows = [ @@ -261,7 +261,7 @@ class TestEmbeddingSignature: monkeypatch.setattr( 'services.ollama_service.ollama_service.generate_embedding', - lambda text, model="bge-m3:latest": _fake_embedding(), + lambda text, model="bge-m3:latest", **_kwargs: _fake_embedding(), ) fake_rows = [ _make_row_obj( @@ -288,7 +288,7 @@ class TestFireAndForgetLog: monkeypatch.setattr( 'services.ollama_service.ollama_service.generate_embedding', - lambda text, model="bge-m3:latest": _fake_embedding(), + lambda text, model="bge-m3:latest", **_kwargs: _fake_embedding(), ) # SELECT session 正常;INSERT session 故意 raise select_session = MagicMock() @@ -344,7 +344,7 @@ class TestFireAndForgetLog: monkeypatch.setattr( 'services.ollama_service.ollama_service.generate_embedding', - lambda text, model="bge-m3:latest": [], + lambda text, model="bge-m3:latest", **_kwargs: [], ) # DB 不應被呼叫 called = {'count': 0} @@ -466,7 +466,7 @@ class TestParamGuards: monkeypatch.setattr( 'services.ollama_service.ollama_service.generate_embedding', - lambda text, model="bge-m3:latest": _fake_embedding(), + lambda text, model="bge-m3:latest", **_kwargs: _fake_embedding(), ) fake_session = MagicMock() fake_session.execute.return_value.fetchall.return_value = [] @@ -487,7 +487,7 @@ class TestParamGuards: monkeypatch.setattr( 'services.ollama_service.ollama_service.generate_embedding', - lambda text, model="bge-m3:latest": _fake_embedding(), + lambda text, model="bge-m3:latest", **_kwargs: _fake_embedding(), ) fake_session = MagicMock() captured = {}