From ed29e66fde3921acf035095668cdea84f5485b72 Mon Sep 17 00:00:00 2001 From: OoO Date: Wed, 13 May 2026 21:21:05 +0800 Subject: [PATCH] fix: route hermes through ollama cascade --- docs/AI_INTELLIGENCE_MODULE_SOT.md | 1 + .../claude_inventory_validation_20260513.md | 1 + services/hermes_analyst_service.py | 86 ++++++------ services/ollama_service.py | 7 +- tests/test_hermes_ollama_cascade.py | 132 ++++++++++++++++++ 5 files changed, 180 insertions(+), 47 deletions(-) create mode 100644 tests/test_hermes_ollama_cascade.py diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index e9d975b..95df0b4 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -13,6 +13,7 @@ - Gemini 只能作為 Ollama 主路徑失敗後的備援,或 ADR-028 明確鎖定的 MCP Grounding、PPT/vision、週/月報、Code Review、EA HITL、複雜 SKU 升級等低頻場景。 - 188 `192.168.0.188` 僅是 App / DB / scheduler / Telegram bot 容器宿主與 AutoHeal target,不可作為 Ollama 節點。 - 通用 AI 文案、關鍵字、商品洞察與 Telegram Q&A 第一響應不得 Gemini-first。 +- Hermes intent / analyst 路徑不得手刻 `/api/generate` 或只 resolve 單次 host;必須走 `OllamaService`,讓同一請求可依序 retry GCP-A → GCP-B → 111。 - Code Review pipeline 也必須 Ollama-first:Hermes scan 與 OpenClaw assessment 都走 `OllamaService` 三主機 retry;Gemini telemetry 只能以 `code_review_openclaw_gemini` 出現,表示 Ollama/可選 Claude 備援都失敗後才啟用。 - OpenClaw Telegram Q&A 主路徑也不得綁單一 host:`_call_qwen3_qa()` 必須透過 `OllamaService` 跑 GCP-A → GCP-B → 111,並把實際落點寫入 `ai_calls.provider`。 diff --git a/docs/memory/claude_inventory_validation_20260513.md b/docs/memory/claude_inventory_validation_20260513.md index 579a6ea..9060f45 100644 --- a/docs/memory/claude_inventory_validation_20260513.md +++ b/docs/memory/claude_inventory_validation_20260513.md @@ -51,6 +51,7 @@ - `services/pg_sync_service.py` 是顯式 opt-in legacy CLI,不是生產自動同步路徑;`tests/test_pg_sync_contract.py` 已守住預設 OFF 與 runtime paths 不自動 import。 - `qwen3:14b` 不是未使用 Ollama 模型:OpenClaw QA、NemoTron dispatch 與 LLM model router 仍有現役路徑;`tests/test_qwen3_runtime_usage.py` 已守住,不能只因體積大就三主機移除。 - Ollama host env 已加白名單護欄:`OLLAMA_HOST*` / `EMBEDDING_HOST` 只接受 GCP-A、GCP-B、111 或 110 proxy,誤設 188/localhost 會回到核准主機。 +- Hermes intent 與批量 analyst 已從單次 `resolve_ollama_host()` + raw `requests.post('/api/generate')` 改為 `OllamaService.generate()`,同一請求會依序 retry GCP-A → GCP-B → 111,並保留 `HERMES_KEEP_ALIVE` 與實際 provider 回寫測試。 - OpenClaw QA / daily Hermes template / NemoTron qwen3 的 flag 文件與測試已對齊 Ollama-first 預設 ON;顯式 `false` 才是 Gemini/NIM legacy 緊急退路。OpenClaw QA 已移除單一 `OPENCLAW_QA_OLLAMA_HOST` 主機覆寫,`_call_qwen3_qa()` 改走 `OllamaService` 的 GCP-A → GCP-B → 111 retry 並回寫實際 provider。 - Code Review pipeline 已對齊 Ollama-first:`_hermes_scan()` 與 `_openclaw_assess()` 都先走 `OllamaService` 的 GCP-A → GCP-B → 111 retry;Gemini 僅在 Ollama(與可選 Claude)失敗後以 `code_review_openclaw_gemini` caller 記錄備援,不再以 `code_review_openclaw` 直接 Gemini-first。 - `.env.example` 已補齊 Python runtime 實際讀取的環境變數,`tests/test_phase3f_cleanup_contracts.py::test_env_example_documents_runtime_os_env_keys` 會掃 `app.py/config.py/scheduler.py/run_scheduler.py/routes/services/utils` 的 `os.getenv()` / `os.environ.get()`;只允許 `PYTEST_CURRENT_TEST` 與 `MOMO_ALLOW_INSECURE_CONFIG_FOR_TESTS` 兩個測試內部 key 不進範例。 diff --git a/services/hermes_analyst_service.py b/services/hermes_analyst_service.py index b4be5a7..842aae9 100644 --- a/services/hermes_analyst_service.py +++ b/services/hermes_analyst_service.py @@ -4,7 +4,7 @@ Hermes 3 競價情報分析服務 (Module 2) 角色:分析師 (Analyst) -模型:hermes3:latest @ HERMES_URL(預設 192.168.0.111:11434) +模型:hermes3:latest @ OllamaService 三主機級聯(GCP-A → GCP-B → 111) 輸入:SQL 漏斗篩選後的候選商品(~300筆) 輸出:Top N 威脅清單(結構化 JSON)→ 交給 NemoTron dispatcher @@ -21,16 +21,15 @@ import uuid from dataclasses import dataclass from typing import Optional -import requests from sqlalchemy import text from services.mcp_context_service import build_mcp_context -from services.ollama_service import resolve_ollama_host, get_host_label +from services.ollama_service import OllamaService, get_host_label, get_provider_tag from services.ai_call_logger import log_ai_call # Operation Ollama-First v5.0 P1 from services.rag_service import rag_service, is_rag_enabled # Phase 11: RAG-first 快取 logger = logging.getLogger(__name__) -from config import HERMES_URL, HERMES_TIMEOUT +from config import HERMES_TIMEOUT HERMES_MODEL = "hermes3:latest" HERMES_KEEP_ALIVE = "24h" # ADR-012:保持模型熱駐留,避免被別模型擠下後冷啟動 30+s timeout @@ -219,35 +218,34 @@ class HermesAnalystService: "規則:greeting/help 類 complexity_score<=0.3;涉及數據、報告、日期、" "品牌、競品對比者 complexity_score>=0.7 且 requires_data_fetch=true。" ) - payload = { - "model": HERMES_MODEL, - "system": system, - "prompt": f"使用者訊息:{message}\n輸出 JSON:", - "stream": False, - "keep_alive": HERMES_KEEP_ALIVE, # ADR-012:避免冷啟動 timeout - "options": {"temperature": 0.1}, - } - target_host = resolve_ollama_host() + prompt = f"使用者訊息:{message}\n輸出 JSON:" # Phase 1 v5.0: 包 ai_call_logger 追蹤 Hermes 意圖分類 token / fallback with log_ai_call( caller='hermes_intent', provider='gcp_ollama', model=HERMES_MODEL, - meta={'host_label': get_host_label(target_host)}, + meta={'route': 'ollama_first'}, ) as _ctx: try: - resp = requests.post( - f"{target_host}/api/generate", - json=payload, - timeout=HERMES_TIMEOUT, # 統一 config 集中讀取(ADR-008);keep_alive 確保熱駐留時實測 < 10s + ollama = OllamaService(model=HERMES_MODEL) + resp = ollama.generate( + prompt=prompt, + model=HERMES_MODEL, + system_prompt=system, + temperature=0.1, + timeout=HERMES_TIMEOUT, + keep_alive=HERMES_KEEP_ALIVE, # ADR-012:避免冷啟動 timeout ) - resp.raise_for_status() - body = resp.json() + _ctx.set_provider(get_provider_tag(resp.host or '')) _ctx.set_tokens( - input=body.get("prompt_eval_count", 0), - output=body.get("eval_count", 0), + input=resp.input_tokens, + output=resp.output_tokens, ) - raw = (body.get("response", "") or "").strip() + _ctx.add_meta('host', resp.host) + _ctx.add_meta('host_label', get_host_label(resp.host or '')) + if not resp.success: + raise RuntimeError(resp.error or "ollama generate failed") + raw = (resp.content or "").strip() if raw.startswith("```"): raw = re.sub(r"^```(?:json)?\s*", "", raw, flags=re.MULTILINE) raw = re.sub(r"\s*```\s*$", "", raw.strip(), flags=re.MULTILINE).strip() @@ -495,44 +493,40 @@ class HermesAnalystService: f'"risk": "HIGH|MED|LOW", "recommended_action": string, "confidence": number}}]' ) - payload = { - "model": HERMES_MODEL, - "system": self.SYSTEM_PROMPT, - "prompt": prompt, - "stream": False, - "keep_alive": HERMES_KEEP_ALIVE, # ADR-012:避免冷啟動 timeout - "options": {"temperature": 0.1}, - } - - target_host = resolve_ollama_host() # Phase 1 v5.0: 包 ai_call_logger 追蹤 Hermes 競價分析 token / fallback with log_ai_call( caller='hermes_analyst', provider='gcp_ollama', model=HERMES_MODEL, meta={ - 'host_label': get_host_label(target_host), + 'route': 'ollama_first', 'item_count': len(items), 'top_n': TOP_N, }, ) as _ctx: try: - resp = requests.post( - f"{target_host}/api/generate", - json=payload, + ollama = OllamaService(model=HERMES_MODEL) + resp = ollama.generate( + prompt=prompt, + model=HERMES_MODEL, + system_prompt=self.SYSTEM_PROMPT, + temperature=0.1, timeout=HERMES_TIMEOUT, + keep_alive=HERMES_KEEP_ALIVE, ) - resp.raise_for_status() + _ctx.set_provider(get_provider_tag(resp.host or '')) + _ctx.set_tokens(input=resp.input_tokens, output=resp.output_tokens) + _ctx.add_meta('host', resp.host) + _ctx.add_meta('host_label', get_host_label(resp.host or '')) + if not resp.success: + raise RuntimeError(resp.error or "ollama generate failed") except Exception as e: _ctx.set_error(f"{type(e).__name__}: {e}") raise - data = resp.json() - raw = data.get("response", "").strip() - duration_sec = round(data.get("total_duration", 0) / 1e9, 1) - eval_tokens_raw = data.get("eval_count", 0) # Ollama 推理 token 數 - prompt_tokens_raw = data.get("prompt_eval_count", 0) - _ctx.set_tokens(input=prompt_tokens_raw, output=eval_tokens_raw) + raw = (resp.content or "").strip() + duration_sec = round(resp.total_duration or 0, 1) + eval_tokens_raw = resp.output_tokens logger.info( f"[Hermes] 推理耗時 {duration_sec}s," f"輸入 {len(items)} 筆,tokens={eval_tokens_raw},回應長度 {len(raw)}" @@ -541,8 +535,8 @@ class HermesAnalystService: self._last_stats = { "duration_sec": duration_sec, "tokens": eval_tokens_raw, - "host": target_host, - "host_label": get_host_label(target_host) + "host": resp.host, + "host_label": get_host_label(resp.host or '') } # P0-1 修復:剝除 Hermes 可能輸出的 markdown code fence diff --git a/services/ollama_service.py b/services/ollama_service.py index 6d7aa64..10ed9e2 100644 --- a/services/ollama_service.py +++ b/services/ollama_service.py @@ -286,7 +286,8 @@ class OllamaService: def generate(self, prompt: str, model: str = None, system_prompt: str = None, temperature: float = 0.7, - timeout: int = None) -> OllamaResponse: + timeout: int = None, keep_alive: str = None, + options: Optional[Dict[str, Any]] = None) -> OllamaResponse: """ 生成文字 — 含三主機自動 retry(HOTFIX 2026-05-04) @@ -302,8 +303,12 @@ class OllamaService: "stream": False, "options": {"temperature": temperature}, } + if options: + payload["options"].update(options) if system_prompt: payload["system"] = system_prompt + if keep_alive: + payload["keep_alive"] = keep_alive # HOTFIX 三主機 retry 鏈 attempted_hosts: List[str] = [] diff --git a/tests/test_hermes_ollama_cascade.py b/tests/test_hermes_ollama_cascade.py new file mode 100644 index 0000000..43bdee6 --- /dev/null +++ b/tests/test_hermes_ollama_cascade.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +"""Hermes 分析師必須透過 OllamaService 三主機級聯。""" + +import time +from types import SimpleNamespace + +import pytest + +import services.ai_call_logger as logger_mod +import services.hermes_analyst_service as hermes_mod +from services.ai_call_logger import _reset_kill_switch + + +@pytest.fixture(autouse=True) +def reset_ai_logger(monkeypatch): + _reset_kill_switch() + captured = [] + + def fake_write(state): + captured.append({ + 'caller': state.caller, + 'provider': state.provider, + 'model': state.model, + 'status': state.status, + 'fallback_to': state.fallback_to, + 'error': state.error, + 'meta': dict(state.meta), + }) + + monkeypatch.setattr(logger_mod, '_write_to_db', fake_write) + monkeypatch.setenv('AI_CALL_LOGGING_ENABLED', 'true') + yield captured + + +def _wait_for(captured, n=1, timeout=2.0): + deadline = time.time() + timeout + while time.time() < deadline: + if len(captured) >= n: + return True + time.sleep(0.01) + return False + + +def _stub_ollama(monkeypatch, *, content: str, host: str): + fake_resp = SimpleNamespace( + success=True, + content=content, + model=hermes_mod.HERMES_MODEL, + error=None, + total_duration=1.2, + host=host, + input_tokens=33, + output_tokens=22, + ) + + class FakeOllamaService: + instances = [] + + def __init__(self, *args, **kwargs): + self.init_args = args + self.init_kwargs = kwargs + self.generate_calls = [] + FakeOllamaService.instances.append(self) + + def generate(self, **kwargs): + self.generate_calls.append(kwargs) + return fake_resp + + monkeypatch.setattr(hermes_mod, 'OllamaService', FakeOllamaService) + return FakeOllamaService + + +def test_hermes_intent_uses_ollama_service_and_logs_actual_host(monkeypatch, reset_ai_logger): + fake_service = _stub_ollama( + monkeypatch, + content='{"intent":"query_sales","confidence":0.9,"complexity_score":0.8,' + '"requires_data_fetch":true,"preliminary_answer":""}', + host='http://192.168.0.111:11434', + ) + + svc = hermes_mod.HermesAnalystService() + result = svc._call_hermes_intent("本週業績如何?") + + assert result['intent'] == 'query_sales' + assert result['metadata']['source'] == 'hermes_llm' + call_kwargs = fake_service.instances[0].generate_calls[0] + assert call_kwargs['model'] == hermes_mod.HERMES_MODEL + assert call_kwargs['keep_alive'] == hermes_mod.HERMES_KEEP_ALIVE + + assert _wait_for(reset_ai_logger, 1) + rec = reset_ai_logger[0] + assert rec['caller'] == 'hermes_intent' + assert rec['provider'] == 'ollama_111' + assert rec['meta']['host_label'] == '111 備援' + + +def test_hermes_batch_analyze_uses_ollama_service_and_logs_secondary(monkeypatch, reset_ai_logger): + fake_service = _stub_ollama( + monkeypatch, + content='[{"sku":"A1","name":"測試商品","category":"家電","momo_price":120,' + '"pchome_price":100,"gap_pct":20,"sales_7d_delta_pct":-30,' + '"risk":"HIGH","recommended_action":"建議人工評估","confidence":0.8}]', + host='http://34.21.145.224:11434', + ) + monkeypatch.setattr(hermes_mod, 'build_mcp_context', lambda *args, **kwargs: 'MCP context') + + candidates = [{ + 'sku': 'A1', + 'name': '測試商品', + 'category': '家電', + 'momo_price': 120, + 'pchome_price': 100, + 'sales_7d_prev': 1000, + 'sales_7d_curr': 700, + 'competitor_tags': [], + }] + + svc = hermes_mod.HermesAnalystService() + raw_threats, items = svc._batch_analyze(candidates) + + assert raw_threats[0]['sku'] == 'A1' + assert items[0]['gap_pct'] == 20.0 + call_kwargs = fake_service.instances[0].generate_calls[0] + assert call_kwargs['system_prompt'] == svc.SYSTEM_PROMPT + assert call_kwargs['keep_alive'] == hermes_mod.HERMES_KEEP_ALIVE + + assert _wait_for(reset_ai_logger, 1) + rec = reset_ai_logger[0] + assert rec['caller'] == 'hermes_analyst' + assert rec['provider'] == 'ollama_secondary' + assert rec['meta']['host_label'] == 'GCP-SSD-2'