diff --git a/services/ai_call_logger.py b/services/ai_call_logger.py index 0f862a9..fe94291 100644 --- a/services/ai_call_logger.py +++ b/services/ai_call_logger.py @@ -55,11 +55,17 @@ COST_TABLE: Dict[str, Dict[str, float]] = { 'claude-opus-4-7': {'in': 15.0, 'out': 75.0}, # 程式碼 #1,Arena Elo 1548 'claude-sonnet-4-6': {'in': 3.0, 'out': 15.0}, # agentic 平衡 'claude-haiku-4-5': {'in': 0.8, 'out': 4.0}, # 輕量快速 - # Ollama 自架 (全 0) - 'hermes3:latest': {'in': 0.0, 'out': 0.0}, - 'qwen2.5-coder:7b': {'in': 0.0, 'out': 0.0}, - 'llama3.1:8b': {'in': 0.0, 'out': 0.0}, - 'bge-m3:latest': {'in': 0.0, 'out': 0.0}, + # Ollama 自架 (全 0;Phase 8/13 GCP 拉模型陸續加入) + 'hermes3:latest': {'in': 0.0, 'out': 0.0}, + 'qwen2.5-coder:7b': {'in': 0.0, 'out': 0.0}, + 'qwen2.5-coder:32b': {'in': 0.0, 'out': 0.0}, # Phase 8 GCP 候選(待拉) + 'qwen2.5:7b-instruct': {'in': 0.0, 'out': 0.0}, # Phase 3 A7 OpenClaw Q&A 預設(GCP 已拉) + 'qwen3:14b': {'in': 0.0, 'out': 0.0}, # Phase 3 A9 Nemotron + A7 升級(GCP 已拉) + 'qwen2-vl:7b': {'in': 0.0, 'out': 0.0}, # Phase 13 PPT vision 候選 + 'deepseek-r1:14b': {'in': 0.0, 'out': 0.0}, # 推理增強候選 + 'gemma3:4b': {'in': 0.0, 'out': 0.0}, # 輕量 sales_copy 候選 + 'llama3.1:8b': {'in': 0.0, 'out': 0.0}, + 'bge-m3:latest': {'in': 0.0, 'out': 0.0}, } diff --git a/services/ollama_service.py b/services/ollama_service.py index c31f1ac..03ad371 100644 --- a/services/ollama_service.py +++ b/services/ollama_service.py @@ -150,13 +150,21 @@ def get_host_label(host: str) -> str: @dataclass class OllamaResponse: - """Ollama 回應結構""" + """Ollama 回應結構 + + Phase 13 補強(A4 已知 limitation 修補): + 新增 input_tokens / output_tokens 欄位,由 /api/generate 回應的 + prompt_eval_count + eval_count 解析。解 ai_call_logger 的 + openclaw_bot_main token=0 假數據問題(誤導 token 日報統計)。 + """ success: bool content: str model: str error: Optional[str] = None total_duration: Optional[float] = None host: Optional[str] = None + input_tokens: int = 0 # prompt_eval_count + output_tokens: int = 0 # eval_count class OllamaService: @@ -264,6 +272,9 @@ class OllamaService: model=model, total_duration=data.get('total_duration', 0) / 1e9, host=current_host, + # Phase 13 補強:解 token=0 假數據(A4 已知 limitation) + input_tokens=int(data.get('prompt_eval_count', 0) or 0), + output_tokens=int(data.get('eval_count', 0) or 0), ) # HTTP 非 200:標 unhealthy + 嘗試下一台 last_error = f"HTTP {response.status_code}: {response.text[:200]}" diff --git a/tests/test_ollama_retry_chain.py b/tests/test_ollama_retry_chain.py new file mode 100644 index 0000000..ee97a9e --- /dev/null +++ b/tests/test_ollama_retry_chain.py @@ -0,0 +1,251 @@ +""" +tests/test_ollama_retry_chain.py +───────────────────────────────────────────────────────────────── +Operation Ollama-First v5.0 / Phase 13 補強 — 三主機 retry 鏈驗證 + +驗證 hotfix e862a90 (generate retry) + 6572d52 (embed retry) 的邏輯: + 1. self.host @property lazy resolve + 2. generate / generate_embedding 失敗自動 retry 下一台主機(最多 3 次) + 3. mark_unhealthy 後下次 self.host 取新主機 + 4. caller 顯式 host=... 凍結不 retry + 5. 三主機都失敗 → return failure(不無限迴圈) + 6. Phase 13 補強:OllamaResponse.input_tokens/output_tokens 解析 + +紀律:不打真實 Ollama,全 mock requests.post。 +""" + +from unittest.mock import patch, MagicMock + +import pytest + + +# ═══════════════════════════════════════════════════════════════════════════ +# Fixtures +# ═══════════════════════════════════════════════════════════════════════════ + +@pytest.fixture(autouse=True) +def _reset_state(): + """每 test 清 unhealthy marks + resolve cache""" + import services.ollama_service as oss + oss._unhealthy_marks.clear() + oss._resolved_host_cache['host'] = None + oss._resolved_host_cache['ts'] = 0 + yield + oss._unhealthy_marks.clear() + oss._resolved_host_cache['host'] = None + oss._resolved_host_cache['ts'] = 0 + + +# ═══════════════════════════════════════════════════════════════════════════ +# T1: self.host 改 @property(不再凍結 instance) +# ═══════════════════════════════════════════════════════════════════════════ + +def test_self_host_is_lazy_property_not_frozen(): + """OllamaService.host 應為 property,每次存取走 resolve_ollama_host()""" + from services.ollama_service import OllamaService + + svc = OllamaService() + # property 不在 __init__ 凍結 + assert svc._explicit_host is None + + # 存取 .host 走 resolve + with patch('services.ollama_service.resolve_ollama_host', return_value='http://test:11434') as mock_resolve: + h1 = svc.host + h2 = svc.host + assert h1 == h2 == 'http://test:11434' + assert mock_resolve.call_count >= 2 # 每次 access 都呼叫 + + +def test_explicit_host_freezes(): + """caller 顯式傳 host → _explicit_host 凍結,不走 lazy resolve""" + from services.ollama_service import OllamaService + + svc = OllamaService(host='http://explicit:11434') + assert svc._explicit_host == 'http://explicit:11434' + + with patch('services.ollama_service.resolve_ollama_host') as mock_resolve: + h = svc.host + assert h == 'http://explicit:11434' + mock_resolve.assert_not_called() + + +# ═══════════════════════════════════════════════════════════════════════════ +# T2: generate 三主機 retry 鏈(核心 hotfix e862a90) +# ═══════════════════════════════════════════════════════════════════════════ + +def test_generate_retries_on_first_host_timeout(): + """第一台 timeout → mark_unhealthy → 第二台成功""" + import requests + from services.ollama_service import OllamaService + + svc = OllamaService() + + # 序列:第一次 timeout,第二次成功 + fake_ok = MagicMock(status_code=200) + fake_ok.json.return_value = { + 'response': 'OK from secondary', + 'prompt_eval_count': 100, 'eval_count': 50, + } + call_results = [requests.Timeout('first host'), fake_ok] + + # mock host property 回傳兩台不同主機 + hosts = ['http://host1:11434', 'http://host2:11434'] + + with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \ + patch('services.ollama_service.requests.post', side_effect=call_results) as mock_post: + resp = svc.generate('test prompt') + + assert resp.success is True + assert resp.content == 'OK from secondary' + assert resp.input_tokens == 100 + assert resp.output_tokens == 50 + assert mock_post.call_count == 2 # 第一次 timeout, 第二次成功 + + +def test_generate_all_three_hosts_fail_returns_failure(): + """三主機都失敗 → success=False,不無限迴圈""" + import requests + from services.ollama_service import OllamaService + + svc = OllamaService() + hosts = ['http://h1:11434', 'http://h2:11434', 'http://h3:11434'] + + with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \ + patch('services.ollama_service.requests.post', + side_effect=requests.Timeout('all timeout')) as mock_post: + resp = svc.generate('test') + + assert resp.success is False + assert mock_post.call_count == 3 # 試了 3 次 + assert 'all 3 hosts failed' in (resp.error or '') + + +def test_generate_same_host_breaks_loop(): + """resolve 連 3 次回同主機(cache 沒過期)→ 不無限迴圈""" + import requests + from services.ollama_service import OllamaService + + svc = OllamaService() + same_host = 'http://stuck:11434' + + with patch('services.ollama_service.resolve_ollama_host', return_value=same_host), \ + patch('services.ollama_service.requests.post', + side_effect=requests.Timeout('stuck')) as mock_post: + resp = svc.generate('test') + + assert resp.success is False + # 第一次試 stuck → 失敗 → 第二次 self.host 還是 stuck(cache 還在)→ break + assert mock_post.call_count == 1 + + +def test_generate_token_parsing_phase13(): + """Phase 13 補強:OllamaResponse 解 prompt_eval_count + eval_count""" + from services.ollama_service import OllamaService + + svc = OllamaService() + fake_resp = MagicMock(status_code=200) + fake_resp.json.return_value = { + 'response': 'hello', + 'prompt_eval_count': 250, + 'eval_count': 80, + 'total_duration': 1500000000, # 1.5s in nanoseconds + } + + with patch('services.ollama_service.resolve_ollama_host', return_value='http://x:11434'), \ + patch('services.ollama_service.requests.post', return_value=fake_resp): + resp = svc.generate('test') + + assert resp.success is True + assert resp.input_tokens == 250 + assert resp.output_tokens == 80 + assert resp.total_duration == 1.5 # 轉換為秒 + + +# ═══════════════════════════════════════════════════════════════════════════ +# T3: generate_embedding 三主機 retry(hotfix 6572d52) +# ═══════════════════════════════════════════════════════════════════════════ + +def test_embedding_retries_on_first_host_timeout(): + """embed 第一台 timeout → 第二台成功""" + import requests + from services.ollama_service import OllamaService + + svc = OllamaService() + fake_ok = MagicMock(status_code=200) + fake_ok.json.return_value = {'embeddings': [[0.1, 0.2, 0.3]]} + + hosts = ['http://h1:11434', 'http://h2:11434'] + call_seq = [requests.Timeout('h1'), fake_ok] + + with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \ + patch.dict('os.environ', {}, clear=False), \ + patch('services.ollama_service.requests.post', side_effect=call_seq) as mock_post: + # 確保不走 EMBEDDING_HOST env + import os + os.environ.pop('EMBEDDING_HOST', None) + vec = svc.generate_embedding('test text') + + assert vec == [0.1, 0.2, 0.3] + assert mock_post.call_count == 2 # 第二台成功 + + +def test_embedding_explicit_host_no_retry(): + """caller 顯式 host=... → 不 retry,失敗即回 []""" + import requests + from services.ollama_service import OllamaService + + svc = OllamaService() + + with patch('services.ollama_service.requests.post', + side_effect=requests.Timeout('explicit host fail')) as mock_post: + vec = svc.generate_embedding('test', host='http://explicit:11434') + + assert vec == [] + # 顯式 host 凍結不 retry → 嘗試一次(/api/embed),可能再試 /api/embeddings legacy + # 但 retry 鏈不啟動(沒有 for-loop 迭代不同主機) + # 我們驗:post 呼叫次數 ≤ 2(最多主路徑+legacy 各一次,不會 retry 不同主機) + assert mock_post.call_count <= 2 + + +def test_embedding_all_three_hosts_fail_returns_empty(): + """embed 三主機都失敗 → 回 []""" + import requests + from services.ollama_service import OllamaService + + svc = OllamaService() + hosts = ['http://h1:11434', 'http://h2:11434', 'http://h3:11434'] + + with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \ + patch.dict('os.environ', {}, clear=False), \ + patch('services.ollama_service.requests.post', + side_effect=requests.Timeout('all fail')) as mock_post: + import os + os.environ.pop('EMBEDDING_HOST', None) + vec = svc.generate_embedding('test') + + assert vec == [] + # 三主機 retry,每次主路徑+legacy = 6 次(但 legacy 也是 timeout) + # 實際看 _embed_one 邏輯:timeout 的 except 直接 mark_unhealthy 不試 legacy + # 所以是 3 次(主路徑 timeout × 3 主機) + assert mock_post.call_count == 3 + + +# ═══════════════════════════════════════════════════════════════════════════ +# T4: mark_unhealthy 觸發 cache 失效(驗 self.host 取新主機) +# ═══════════════════════════════════════════════════════════════════════════ + +def test_mark_unhealthy_invalidates_cache_for_next_host(): + """generate 失敗 mark_unhealthy → 下次 self.host 走 resolve 取新主機""" + import services.ollama_service as oss + from services.ollama_service import OllamaService + + svc = OllamaService() + + # 先 cache 一個主機 + oss._resolved_host_cache['host'] = 'http://primary:11434' + oss._resolved_host_cache['ts'] = __import__('time').time() + + # mark_unhealthy 應清空 cache + oss.mark_unhealthy('http://primary:11434') + + assert oss._resolved_host_cache['host'] is None