Files
ewoooc/tests/test_ollama_retry_chain.py
OoO 942193db2a
All checks were successful
CD Pipeline / deploy (push) Successful in 2m41s
feat(p13): OllamaResponse token 補欄 + COST_TABLE 補新模型 + retry 鏈 unit test
Operation Ollama-First v5.0 / Phase 13 補強

(A) services/ollama_service.py — OllamaResponse 加 input_tokens/output_tokens
- A4 Phase 1 已知 limitation 修補:openclaw_bot_main token=0 假數據誤導日報
- generate() 解 prompt_eval_count + eval_count 寫 OllamaResponse
- 影響:ai_call_logger 收到正確 token 數,token 日報 Ollama 占比準確

(B) services/ai_call_logger.py — COST_TABLE 補 GCP 已拉/候選模型
- qwen2.5:7b-instruct (Phase 3 A7 OpenClaw Q&A 預設)
- qwen3:14b (Phase 3 A9 Nemotron + A7 升級候選)
- qwen2.5-coder:32b (Phase 8 候選)
- qwen2-vl:7b (Phase 13+ PPT vision 候選)
- deepseek-r1:14b / gemma3:4b (推理增強 / 輕量)
- 全部 cost=0(Ollama 自架)
- 解 logger.warning「unknown model cost」誤報

(J) tests/test_ollama_retry_chain.py (10 unit tests) — 驗 hotfix e862a90/6572d52
- T1 self.host @property lazy resolve
- T2 explicit host 凍結不 retry
- T3 generate 第一台 timeout → 第二台成功(核心 retry 鏈)
- T4 三主機都失敗 → success=False
- T5 cache 卡同主機 → break 不無限迴圈
- T6 Phase 13 token 解析驗證
- T7-T9 generate_embedding 同類驗證
- T10 mark_unhealthy 清 resolve cache

regression: 全戰役 14 test 檔仍 zero regression

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 10:07:33 +08:00

252 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
tests/test_ollama_retry_chain.py
─────────────────────────────────────────────────────────────────
Operation Ollama-First v5.0 / Phase 13 補強 — 三主機 retry 鏈驗證
驗證 hotfix e862a90 (generate retry) + 6572d52 (embed retry) 的邏輯:
1. self.host @property lazy resolve
2. generate / generate_embedding 失敗自動 retry 下一台主機(最多 3 次)
3. mark_unhealthy 後下次 self.host 取新主機
4. caller 顯式 host=... 凍結不 retry
5. 三主機都失敗 → return failure不無限迴圈
6. Phase 13 補強OllamaResponse.input_tokens/output_tokens 解析
紀律:不打真實 Ollama全 mock requests.post。
"""
from unittest.mock import patch, MagicMock
import pytest
# ═══════════════════════════════════════════════════════════════════════════
# Fixtures
# ═══════════════════════════════════════════════════════════════════════════
@pytest.fixture(autouse=True)
def _reset_state():
"""每 test 清 unhealthy marks + resolve cache"""
import services.ollama_service as oss
oss._unhealthy_marks.clear()
oss._resolved_host_cache['host'] = None
oss._resolved_host_cache['ts'] = 0
yield
oss._unhealthy_marks.clear()
oss._resolved_host_cache['host'] = None
oss._resolved_host_cache['ts'] = 0
# ═══════════════════════════════════════════════════════════════════════════
# T1: self.host 改 @property不再凍結 instance
# ═══════════════════════════════════════════════════════════════════════════
def test_self_host_is_lazy_property_not_frozen():
"""OllamaService.host 應為 property每次存取走 resolve_ollama_host()"""
from services.ollama_service import OllamaService
svc = OllamaService()
# property 不在 __init__ 凍結
assert svc._explicit_host is None
# 存取 .host 走 resolve
with patch('services.ollama_service.resolve_ollama_host', return_value='http://test:11434') as mock_resolve:
h1 = svc.host
h2 = svc.host
assert h1 == h2 == 'http://test:11434'
assert mock_resolve.call_count >= 2 # 每次 access 都呼叫
def test_explicit_host_freezes():
"""caller 顯式傳 host → _explicit_host 凍結,不走 lazy resolve"""
from services.ollama_service import OllamaService
svc = OllamaService(host='http://explicit:11434')
assert svc._explicit_host == 'http://explicit:11434'
with patch('services.ollama_service.resolve_ollama_host') as mock_resolve:
h = svc.host
assert h == 'http://explicit:11434'
mock_resolve.assert_not_called()
# ═══════════════════════════════════════════════════════════════════════════
# T2: generate 三主機 retry 鏈(核心 hotfix e862a90
# ═══════════════════════════════════════════════════════════════════════════
def test_generate_retries_on_first_host_timeout():
"""第一台 timeout → mark_unhealthy → 第二台成功"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
# 序列:第一次 timeout第二次成功
fake_ok = MagicMock(status_code=200)
fake_ok.json.return_value = {
'response': 'OK from secondary',
'prompt_eval_count': 100, 'eval_count': 50,
}
call_results = [requests.Timeout('first host'), fake_ok]
# mock host property 回傳兩台不同主機
hosts = ['http://host1:11434', 'http://host2:11434']
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch('services.ollama_service.requests.post', side_effect=call_results) as mock_post:
resp = svc.generate('test prompt')
assert resp.success is True
assert resp.content == 'OK from secondary'
assert resp.input_tokens == 100
assert resp.output_tokens == 50
assert mock_post.call_count == 2 # 第一次 timeout, 第二次成功
def test_generate_all_three_hosts_fail_returns_failure():
"""三主機都失敗 → success=False不無限迴圈"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
hosts = ['http://h1:11434', 'http://h2:11434', 'http://h3:11434']
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('all timeout')) as mock_post:
resp = svc.generate('test')
assert resp.success is False
assert mock_post.call_count == 3 # 試了 3 次
assert 'all 3 hosts failed' in (resp.error or '')
def test_generate_same_host_breaks_loop():
"""resolve 連 3 次回同主機cache 沒過期)→ 不無限迴圈"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
same_host = 'http://stuck:11434'
with patch('services.ollama_service.resolve_ollama_host', return_value=same_host), \
patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('stuck')) as mock_post:
resp = svc.generate('test')
assert resp.success is False
# 第一次試 stuck → 失敗 → 第二次 self.host 還是 stuckcache 還在)→ break
assert mock_post.call_count == 1
def test_generate_token_parsing_phase13():
"""Phase 13 補強OllamaResponse 解 prompt_eval_count + eval_count"""
from services.ollama_service import OllamaService
svc = OllamaService()
fake_resp = MagicMock(status_code=200)
fake_resp.json.return_value = {
'response': 'hello',
'prompt_eval_count': 250,
'eval_count': 80,
'total_duration': 1500000000, # 1.5s in nanoseconds
}
with patch('services.ollama_service.resolve_ollama_host', return_value='http://x:11434'), \
patch('services.ollama_service.requests.post', return_value=fake_resp):
resp = svc.generate('test')
assert resp.success is True
assert resp.input_tokens == 250
assert resp.output_tokens == 80
assert resp.total_duration == 1.5 # 轉換為秒
# ═══════════════════════════════════════════════════════════════════════════
# T3: generate_embedding 三主機 retryhotfix 6572d52
# ═══════════════════════════════════════════════════════════════════════════
def test_embedding_retries_on_first_host_timeout():
"""embed 第一台 timeout → 第二台成功"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
fake_ok = MagicMock(status_code=200)
fake_ok.json.return_value = {'embeddings': [[0.1, 0.2, 0.3]]}
hosts = ['http://h1:11434', 'http://h2:11434']
call_seq = [requests.Timeout('h1'), fake_ok]
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch.dict('os.environ', {}, clear=False), \
patch('services.ollama_service.requests.post', side_effect=call_seq) as mock_post:
# 確保不走 EMBEDDING_HOST env
import os
os.environ.pop('EMBEDDING_HOST', None)
vec = svc.generate_embedding('test text')
assert vec == [0.1, 0.2, 0.3]
assert mock_post.call_count == 2 # 第二台成功
def test_embedding_explicit_host_no_retry():
"""caller 顯式 host=... → 不 retry失敗即回 []"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
with patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('explicit host fail')) as mock_post:
vec = svc.generate_embedding('test', host='http://explicit:11434')
assert vec == []
# 顯式 host 凍結不 retry → 嘗試一次(/api/embed可能再試 /api/embeddings legacy
# 但 retry 鏈不啟動(沒有 for-loop 迭代不同主機)
# 我們驗post 呼叫次數 ≤ 2最多主路徑+legacy 各一次,不會 retry 不同主機)
assert mock_post.call_count <= 2
def test_embedding_all_three_hosts_fail_returns_empty():
"""embed 三主機都失敗 → 回 []"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
hosts = ['http://h1:11434', 'http://h2:11434', 'http://h3:11434']
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch.dict('os.environ', {}, clear=False), \
patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('all fail')) as mock_post:
import os
os.environ.pop('EMBEDDING_HOST', None)
vec = svc.generate_embedding('test')
assert vec == []
# 三主機 retry每次主路徑+legacy = 6 次(但 legacy 也是 timeout
# 實際看 _embed_one 邏輯timeout 的 except 直接 mark_unhealthy 不試 legacy
# 所以是 3 次(主路徑 timeout × 3 主機)
assert mock_post.call_count == 3
# ═══════════════════════════════════════════════════════════════════════════
# T4: mark_unhealthy 觸發 cache 失效(驗 self.host 取新主機)
# ═══════════════════════════════════════════════════════════════════════════
def test_mark_unhealthy_invalidates_cache_for_next_host():
"""generate 失敗 mark_unhealthy → 下次 self.host 走 resolve 取新主機"""
import services.ollama_service as oss
from services.ollama_service import OllamaService
svc = OllamaService()
# 先 cache 一個主機
oss._resolved_host_cache['host'] = 'http://primary:11434'
oss._resolved_host_cache['ts'] = __import__('time').time()
# mark_unhealthy 應清空 cache
oss.mark_unhealthy('http://primary:11434')
assert oss._resolved_host_cache['host'] is None