feat(p13): OllamaResponse token 補欄 + COST_TABLE 補新模型 + retry 鏈 unit test
All checks were successful
CD Pipeline / deploy (push) Successful in 2m41s
All checks were successful
CD Pipeline / deploy (push) Successful in 2m41s
Operation Ollama-First v5.0 / Phase 13 補強 (A) services/ollama_service.py — OllamaResponse 加 input_tokens/output_tokens - A4 Phase 1 已知 limitation 修補:openclaw_bot_main token=0 假數據誤導日報 - generate() 解 prompt_eval_count + eval_count 寫 OllamaResponse - 影響:ai_call_logger 收到正確 token 數,token 日報 Ollama 占比準確 (B) services/ai_call_logger.py — COST_TABLE 補 GCP 已拉/候選模型 - qwen2.5:7b-instruct (Phase 3 A7 OpenClaw Q&A 預設) - qwen3:14b (Phase 3 A9 Nemotron + A7 升級候選) - qwen2.5-coder:32b (Phase 8 候選) - qwen2-vl:7b (Phase 13+ PPT vision 候選) - deepseek-r1:14b / gemma3:4b (推理增強 / 輕量) - 全部 cost=0(Ollama 自架) - 解 logger.warning「unknown model cost」誤報 (J) tests/test_ollama_retry_chain.py (10 unit tests) — 驗 hotfix e862a90/6572d52 - T1 self.host @property lazy resolve - T2 explicit host 凍結不 retry - T3 generate 第一台 timeout → 第二台成功(核心 retry 鏈) - T4 三主機都失敗 → success=False - T5 cache 卡同主機 → break 不無限迴圈 - T6 Phase 13 token 解析驗證 - T7-T9 generate_embedding 同類驗證 - T10 mark_unhealthy 清 resolve cache regression: 全戰役 14 test 檔仍 zero regression Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -55,11 +55,17 @@ COST_TABLE: Dict[str, Dict[str, float]] = {
|
||||
'claude-opus-4-7': {'in': 15.0, 'out': 75.0}, # 程式碼 #1,Arena Elo 1548
|
||||
'claude-sonnet-4-6': {'in': 3.0, 'out': 15.0}, # agentic 平衡
|
||||
'claude-haiku-4-5': {'in': 0.8, 'out': 4.0}, # 輕量快速
|
||||
# Ollama 自架 (全 0)
|
||||
'hermes3:latest': {'in': 0.0, 'out': 0.0},
|
||||
'qwen2.5-coder:7b': {'in': 0.0, 'out': 0.0},
|
||||
'llama3.1:8b': {'in': 0.0, 'out': 0.0},
|
||||
'bge-m3:latest': {'in': 0.0, 'out': 0.0},
|
||||
# Ollama 自架 (全 0;Phase 8/13 GCP 拉模型陸續加入)
|
||||
'hermes3:latest': {'in': 0.0, 'out': 0.0},
|
||||
'qwen2.5-coder:7b': {'in': 0.0, 'out': 0.0},
|
||||
'qwen2.5-coder:32b': {'in': 0.0, 'out': 0.0}, # Phase 8 GCP 候選(待拉)
|
||||
'qwen2.5:7b-instruct': {'in': 0.0, 'out': 0.0}, # Phase 3 A7 OpenClaw Q&A 預設(GCP 已拉)
|
||||
'qwen3:14b': {'in': 0.0, 'out': 0.0}, # Phase 3 A9 Nemotron + A7 升級(GCP 已拉)
|
||||
'qwen2-vl:7b': {'in': 0.0, 'out': 0.0}, # Phase 13 PPT vision 候選
|
||||
'deepseek-r1:14b': {'in': 0.0, 'out': 0.0}, # 推理增強候選
|
||||
'gemma3:4b': {'in': 0.0, 'out': 0.0}, # 輕量 sales_copy 候選
|
||||
'llama3.1:8b': {'in': 0.0, 'out': 0.0},
|
||||
'bge-m3:latest': {'in': 0.0, 'out': 0.0},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -150,13 +150,21 @@ def get_host_label(host: str) -> str:
|
||||
|
||||
@dataclass
|
||||
class OllamaResponse:
|
||||
"""Ollama 回應結構"""
|
||||
"""Ollama 回應結構
|
||||
|
||||
Phase 13 補強(A4 已知 limitation 修補):
|
||||
新增 input_tokens / output_tokens 欄位,由 /api/generate 回應的
|
||||
prompt_eval_count + eval_count 解析。解 ai_call_logger 的
|
||||
openclaw_bot_main token=0 假數據問題(誤導 token 日報統計)。
|
||||
"""
|
||||
success: bool
|
||||
content: str
|
||||
model: str
|
||||
error: Optional[str] = None
|
||||
total_duration: Optional[float] = None
|
||||
host: Optional[str] = None
|
||||
input_tokens: int = 0 # prompt_eval_count
|
||||
output_tokens: int = 0 # eval_count
|
||||
|
||||
|
||||
class OllamaService:
|
||||
@@ -264,6 +272,9 @@ class OllamaService:
|
||||
model=model,
|
||||
total_duration=data.get('total_duration', 0) / 1e9,
|
||||
host=current_host,
|
||||
# Phase 13 補強:解 token=0 假數據(A4 已知 limitation)
|
||||
input_tokens=int(data.get('prompt_eval_count', 0) or 0),
|
||||
output_tokens=int(data.get('eval_count', 0) or 0),
|
||||
)
|
||||
# HTTP 非 200:標 unhealthy + 嘗試下一台
|
||||
last_error = f"HTTP {response.status_code}: {response.text[:200]}"
|
||||
|
||||
251
tests/test_ollama_retry_chain.py
Normal file
251
tests/test_ollama_retry_chain.py
Normal file
@@ -0,0 +1,251 @@
|
||||
"""
|
||||
tests/test_ollama_retry_chain.py
|
||||
─────────────────────────────────────────────────────────────────
|
||||
Operation Ollama-First v5.0 / Phase 13 補強 — 三主機 retry 鏈驗證
|
||||
|
||||
驗證 hotfix e862a90 (generate retry) + 6572d52 (embed retry) 的邏輯:
|
||||
1. self.host @property lazy resolve
|
||||
2. generate / generate_embedding 失敗自動 retry 下一台主機(最多 3 次)
|
||||
3. mark_unhealthy 後下次 self.host 取新主機
|
||||
4. caller 顯式 host=... 凍結不 retry
|
||||
5. 三主機都失敗 → return failure(不無限迴圈)
|
||||
6. Phase 13 補強:OllamaResponse.input_tokens/output_tokens 解析
|
||||
|
||||
紀律:不打真實 Ollama,全 mock requests.post。
|
||||
"""
|
||||
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# Fixtures
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_state():
|
||||
"""每 test 清 unhealthy marks + resolve cache"""
|
||||
import services.ollama_service as oss
|
||||
oss._unhealthy_marks.clear()
|
||||
oss._resolved_host_cache['host'] = None
|
||||
oss._resolved_host_cache['ts'] = 0
|
||||
yield
|
||||
oss._unhealthy_marks.clear()
|
||||
oss._resolved_host_cache['host'] = None
|
||||
oss._resolved_host_cache['ts'] = 0
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# T1: self.host 改 @property(不再凍結 instance)
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_self_host_is_lazy_property_not_frozen():
|
||||
"""OllamaService.host 應為 property,每次存取走 resolve_ollama_host()"""
|
||||
from services.ollama_service import OllamaService
|
||||
|
||||
svc = OllamaService()
|
||||
# property 不在 __init__ 凍結
|
||||
assert svc._explicit_host is None
|
||||
|
||||
# 存取 .host 走 resolve
|
||||
with patch('services.ollama_service.resolve_ollama_host', return_value='http://test:11434') as mock_resolve:
|
||||
h1 = svc.host
|
||||
h2 = svc.host
|
||||
assert h1 == h2 == 'http://test:11434'
|
||||
assert mock_resolve.call_count >= 2 # 每次 access 都呼叫
|
||||
|
||||
|
||||
def test_explicit_host_freezes():
|
||||
"""caller 顯式傳 host → _explicit_host 凍結,不走 lazy resolve"""
|
||||
from services.ollama_service import OllamaService
|
||||
|
||||
svc = OllamaService(host='http://explicit:11434')
|
||||
assert svc._explicit_host == 'http://explicit:11434'
|
||||
|
||||
with patch('services.ollama_service.resolve_ollama_host') as mock_resolve:
|
||||
h = svc.host
|
||||
assert h == 'http://explicit:11434'
|
||||
mock_resolve.assert_not_called()
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# T2: generate 三主機 retry 鏈(核心 hotfix e862a90)
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_generate_retries_on_first_host_timeout():
|
||||
"""第一台 timeout → mark_unhealthy → 第二台成功"""
|
||||
import requests
|
||||
from services.ollama_service import OllamaService
|
||||
|
||||
svc = OllamaService()
|
||||
|
||||
# 序列:第一次 timeout,第二次成功
|
||||
fake_ok = MagicMock(status_code=200)
|
||||
fake_ok.json.return_value = {
|
||||
'response': 'OK from secondary',
|
||||
'prompt_eval_count': 100, 'eval_count': 50,
|
||||
}
|
||||
call_results = [requests.Timeout('first host'), fake_ok]
|
||||
|
||||
# mock host property 回傳兩台不同主機
|
||||
hosts = ['http://host1:11434', 'http://host2:11434']
|
||||
|
||||
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
|
||||
patch('services.ollama_service.requests.post', side_effect=call_results) as mock_post:
|
||||
resp = svc.generate('test prompt')
|
||||
|
||||
assert resp.success is True
|
||||
assert resp.content == 'OK from secondary'
|
||||
assert resp.input_tokens == 100
|
||||
assert resp.output_tokens == 50
|
||||
assert mock_post.call_count == 2 # 第一次 timeout, 第二次成功
|
||||
|
||||
|
||||
def test_generate_all_three_hosts_fail_returns_failure():
|
||||
"""三主機都失敗 → success=False,不無限迴圈"""
|
||||
import requests
|
||||
from services.ollama_service import OllamaService
|
||||
|
||||
svc = OllamaService()
|
||||
hosts = ['http://h1:11434', 'http://h2:11434', 'http://h3:11434']
|
||||
|
||||
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
|
||||
patch('services.ollama_service.requests.post',
|
||||
side_effect=requests.Timeout('all timeout')) as mock_post:
|
||||
resp = svc.generate('test')
|
||||
|
||||
assert resp.success is False
|
||||
assert mock_post.call_count == 3 # 試了 3 次
|
||||
assert 'all 3 hosts failed' in (resp.error or '')
|
||||
|
||||
|
||||
def test_generate_same_host_breaks_loop():
|
||||
"""resolve 連 3 次回同主機(cache 沒過期)→ 不無限迴圈"""
|
||||
import requests
|
||||
from services.ollama_service import OllamaService
|
||||
|
||||
svc = OllamaService()
|
||||
same_host = 'http://stuck:11434'
|
||||
|
||||
with patch('services.ollama_service.resolve_ollama_host', return_value=same_host), \
|
||||
patch('services.ollama_service.requests.post',
|
||||
side_effect=requests.Timeout('stuck')) as mock_post:
|
||||
resp = svc.generate('test')
|
||||
|
||||
assert resp.success is False
|
||||
# 第一次試 stuck → 失敗 → 第二次 self.host 還是 stuck(cache 還在)→ break
|
||||
assert mock_post.call_count == 1
|
||||
|
||||
|
||||
def test_generate_token_parsing_phase13():
|
||||
"""Phase 13 補強:OllamaResponse 解 prompt_eval_count + eval_count"""
|
||||
from services.ollama_service import OllamaService
|
||||
|
||||
svc = OllamaService()
|
||||
fake_resp = MagicMock(status_code=200)
|
||||
fake_resp.json.return_value = {
|
||||
'response': 'hello',
|
||||
'prompt_eval_count': 250,
|
||||
'eval_count': 80,
|
||||
'total_duration': 1500000000, # 1.5s in nanoseconds
|
||||
}
|
||||
|
||||
with patch('services.ollama_service.resolve_ollama_host', return_value='http://x:11434'), \
|
||||
patch('services.ollama_service.requests.post', return_value=fake_resp):
|
||||
resp = svc.generate('test')
|
||||
|
||||
assert resp.success is True
|
||||
assert resp.input_tokens == 250
|
||||
assert resp.output_tokens == 80
|
||||
assert resp.total_duration == 1.5 # 轉換為秒
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# T3: generate_embedding 三主機 retry(hotfix 6572d52)
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_embedding_retries_on_first_host_timeout():
|
||||
"""embed 第一台 timeout → 第二台成功"""
|
||||
import requests
|
||||
from services.ollama_service import OllamaService
|
||||
|
||||
svc = OllamaService()
|
||||
fake_ok = MagicMock(status_code=200)
|
||||
fake_ok.json.return_value = {'embeddings': [[0.1, 0.2, 0.3]]}
|
||||
|
||||
hosts = ['http://h1:11434', 'http://h2:11434']
|
||||
call_seq = [requests.Timeout('h1'), fake_ok]
|
||||
|
||||
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
|
||||
patch.dict('os.environ', {}, clear=False), \
|
||||
patch('services.ollama_service.requests.post', side_effect=call_seq) as mock_post:
|
||||
# 確保不走 EMBEDDING_HOST env
|
||||
import os
|
||||
os.environ.pop('EMBEDDING_HOST', None)
|
||||
vec = svc.generate_embedding('test text')
|
||||
|
||||
assert vec == [0.1, 0.2, 0.3]
|
||||
assert mock_post.call_count == 2 # 第二台成功
|
||||
|
||||
|
||||
def test_embedding_explicit_host_no_retry():
|
||||
"""caller 顯式 host=... → 不 retry,失敗即回 []"""
|
||||
import requests
|
||||
from services.ollama_service import OllamaService
|
||||
|
||||
svc = OllamaService()
|
||||
|
||||
with patch('services.ollama_service.requests.post',
|
||||
side_effect=requests.Timeout('explicit host fail')) as mock_post:
|
||||
vec = svc.generate_embedding('test', host='http://explicit:11434')
|
||||
|
||||
assert vec == []
|
||||
# 顯式 host 凍結不 retry → 嘗試一次(/api/embed),可能再試 /api/embeddings legacy
|
||||
# 但 retry 鏈不啟動(沒有 for-loop 迭代不同主機)
|
||||
# 我們驗:post 呼叫次數 ≤ 2(最多主路徑+legacy 各一次,不會 retry 不同主機)
|
||||
assert mock_post.call_count <= 2
|
||||
|
||||
|
||||
def test_embedding_all_three_hosts_fail_returns_empty():
|
||||
"""embed 三主機都失敗 → 回 []"""
|
||||
import requests
|
||||
from services.ollama_service import OllamaService
|
||||
|
||||
svc = OllamaService()
|
||||
hosts = ['http://h1:11434', 'http://h2:11434', 'http://h3:11434']
|
||||
|
||||
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
|
||||
patch.dict('os.environ', {}, clear=False), \
|
||||
patch('services.ollama_service.requests.post',
|
||||
side_effect=requests.Timeout('all fail')) as mock_post:
|
||||
import os
|
||||
os.environ.pop('EMBEDDING_HOST', None)
|
||||
vec = svc.generate_embedding('test')
|
||||
|
||||
assert vec == []
|
||||
# 三主機 retry,每次主路徑+legacy = 6 次(但 legacy 也是 timeout)
|
||||
# 實際看 _embed_one 邏輯:timeout 的 except 直接 mark_unhealthy 不試 legacy
|
||||
# 所以是 3 次(主路徑 timeout × 3 主機)
|
||||
assert mock_post.call_count == 3
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
# T4: mark_unhealthy 觸發 cache 失效(驗 self.host 取新主機)
|
||||
# ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
def test_mark_unhealthy_invalidates_cache_for_next_host():
|
||||
"""generate 失敗 mark_unhealthy → 下次 self.host 走 resolve 取新主機"""
|
||||
import services.ollama_service as oss
|
||||
from services.ollama_service import OllamaService
|
||||
|
||||
svc = OllamaService()
|
||||
|
||||
# 先 cache 一個主機
|
||||
oss._resolved_host_cache['host'] = 'http://primary:11434'
|
||||
oss._resolved_host_cache['ts'] = __import__('time').time()
|
||||
|
||||
# mark_unhealthy 應清空 cache
|
||||
oss.mark_unhealthy('http://primary:11434')
|
||||
|
||||
assert oss._resolved_host_cache['host'] is None
|
||||
Reference in New Issue
Block a user