feat(p13): OllamaResponse token 補欄 + COST_TABLE 補新模型 + retry 鏈 unit test
All checks were successful
CD Pipeline / deploy (push) Successful in 2m41s

Operation Ollama-First v5.0 / Phase 13 補強

(A) services/ollama_service.py — OllamaResponse 加 input_tokens/output_tokens
- A4 Phase 1 已知 limitation 修補:openclaw_bot_main token=0 假數據誤導日報
- generate() 解 prompt_eval_count + eval_count 寫 OllamaResponse
- 影響:ai_call_logger 收到正確 token 數,token 日報 Ollama 占比準確

(B) services/ai_call_logger.py — COST_TABLE 補 GCP 已拉/候選模型
- qwen2.5:7b-instruct (Phase 3 A7 OpenClaw Q&A 預設)
- qwen3:14b (Phase 3 A9 Nemotron + A7 升級候選)
- qwen2.5-coder:32b (Phase 8 候選)
- qwen2-vl:7b (Phase 13+ PPT vision 候選)
- deepseek-r1:14b / gemma3:4b (推理增強 / 輕量)
- 全部 cost=0(Ollama 自架)
- 解 logger.warning「unknown model cost」誤報

(J) tests/test_ollama_retry_chain.py (10 unit tests) — 驗 hotfix e862a90/6572d52
- T1 self.host @property lazy resolve
- T2 explicit host 凍結不 retry
- T3 generate 第一台 timeout → 第二台成功(核心 retry 鏈)
- T4 三主機都失敗 → success=False
- T5 cache 卡同主機 → break 不無限迴圈
- T6 Phase 13 token 解析驗證
- T7-T9 generate_embedding 同類驗證
- T10 mark_unhealthy 清 resolve cache

regression: 全戰役 14 test 檔仍 zero regression

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
OoO
2026-05-04 10:07:33 +08:00
parent c1fd913a35
commit 942193db2a
3 changed files with 274 additions and 6 deletions

View File

@@ -55,11 +55,17 @@ COST_TABLE: Dict[str, Dict[str, float]] = {
'claude-opus-4-7': {'in': 15.0, 'out': 75.0}, # 程式碼 #1Arena Elo 1548
'claude-sonnet-4-6': {'in': 3.0, 'out': 15.0}, # agentic 平衡
'claude-haiku-4-5': {'in': 0.8, 'out': 4.0}, # 輕量快速
# Ollama 自架 (全 0)
'hermes3:latest': {'in': 0.0, 'out': 0.0},
'qwen2.5-coder:7b': {'in': 0.0, 'out': 0.0},
'llama3.1:8b': {'in': 0.0, 'out': 0.0},
'bge-m3:latest': {'in': 0.0, 'out': 0.0},
# Ollama 自架 (全 0Phase 8/13 GCP 拉模型陸續加入)
'hermes3:latest': {'in': 0.0, 'out': 0.0},
'qwen2.5-coder:7b': {'in': 0.0, 'out': 0.0},
'qwen2.5-coder:32b': {'in': 0.0, 'out': 0.0}, # Phase 8 GCP 候選(待拉)
'qwen2.5:7b-instruct': {'in': 0.0, 'out': 0.0}, # Phase 3 A7 OpenClaw Q&A 預設GCP 已拉)
'qwen3:14b': {'in': 0.0, 'out': 0.0}, # Phase 3 A9 Nemotron + A7 升級GCP 已拉)
'qwen2-vl:7b': {'in': 0.0, 'out': 0.0}, # Phase 13 PPT vision 候選
'deepseek-r1:14b': {'in': 0.0, 'out': 0.0}, # 推理增強候選
'gemma3:4b': {'in': 0.0, 'out': 0.0}, # 輕量 sales_copy 候選
'llama3.1:8b': {'in': 0.0, 'out': 0.0},
'bge-m3:latest': {'in': 0.0, 'out': 0.0},
}

View File

@@ -150,13 +150,21 @@ def get_host_label(host: str) -> str:
@dataclass
class OllamaResponse:
"""Ollama 回應結構"""
"""Ollama 回應結構
Phase 13 補強A4 已知 limitation 修補):
新增 input_tokens / output_tokens 欄位,由 /api/generate 回應的
prompt_eval_count + eval_count 解析。解 ai_call_logger 的
openclaw_bot_main token=0 假數據問題(誤導 token 日報統計)。
"""
success: bool
content: str
model: str
error: Optional[str] = None
total_duration: Optional[float] = None
host: Optional[str] = None
input_tokens: int = 0 # prompt_eval_count
output_tokens: int = 0 # eval_count
class OllamaService:
@@ -264,6 +272,9 @@ class OllamaService:
model=model,
total_duration=data.get('total_duration', 0) / 1e9,
host=current_host,
# Phase 13 補強:解 token=0 假數據A4 已知 limitation
input_tokens=int(data.get('prompt_eval_count', 0) or 0),
output_tokens=int(data.get('eval_count', 0) or 0),
)
# HTTP 非 200標 unhealthy + 嘗試下一台
last_error = f"HTTP {response.status_code}: {response.text[:200]}"

View File

@@ -0,0 +1,251 @@
"""
tests/test_ollama_retry_chain.py
─────────────────────────────────────────────────────────────────
Operation Ollama-First v5.0 / Phase 13 補強 — 三主機 retry 鏈驗證
驗證 hotfix e862a90 (generate retry) + 6572d52 (embed retry) 的邏輯:
1. self.host @property lazy resolve
2. generate / generate_embedding 失敗自動 retry 下一台主機(最多 3 次)
3. mark_unhealthy 後下次 self.host 取新主機
4. caller 顯式 host=... 凍結不 retry
5. 三主機都失敗 → return failure不無限迴圈
6. Phase 13 補強OllamaResponse.input_tokens/output_tokens 解析
紀律:不打真實 Ollama全 mock requests.post。
"""
from unittest.mock import patch, MagicMock
import pytest
# ═══════════════════════════════════════════════════════════════════════════
# Fixtures
# ═══════════════════════════════════════════════════════════════════════════
@pytest.fixture(autouse=True)
def _reset_state():
"""每 test 清 unhealthy marks + resolve cache"""
import services.ollama_service as oss
oss._unhealthy_marks.clear()
oss._resolved_host_cache['host'] = None
oss._resolved_host_cache['ts'] = 0
yield
oss._unhealthy_marks.clear()
oss._resolved_host_cache['host'] = None
oss._resolved_host_cache['ts'] = 0
# ═══════════════════════════════════════════════════════════════════════════
# T1: self.host 改 @property不再凍結 instance
# ═══════════════════════════════════════════════════════════════════════════
def test_self_host_is_lazy_property_not_frozen():
"""OllamaService.host 應為 property每次存取走 resolve_ollama_host()"""
from services.ollama_service import OllamaService
svc = OllamaService()
# property 不在 __init__ 凍結
assert svc._explicit_host is None
# 存取 .host 走 resolve
with patch('services.ollama_service.resolve_ollama_host', return_value='http://test:11434') as mock_resolve:
h1 = svc.host
h2 = svc.host
assert h1 == h2 == 'http://test:11434'
assert mock_resolve.call_count >= 2 # 每次 access 都呼叫
def test_explicit_host_freezes():
"""caller 顯式傳 host → _explicit_host 凍結,不走 lazy resolve"""
from services.ollama_service import OllamaService
svc = OllamaService(host='http://explicit:11434')
assert svc._explicit_host == 'http://explicit:11434'
with patch('services.ollama_service.resolve_ollama_host') as mock_resolve:
h = svc.host
assert h == 'http://explicit:11434'
mock_resolve.assert_not_called()
# ═══════════════════════════════════════════════════════════════════════════
# T2: generate 三主機 retry 鏈(核心 hotfix e862a90
# ═══════════════════════════════════════════════════════════════════════════
def test_generate_retries_on_first_host_timeout():
"""第一台 timeout → mark_unhealthy → 第二台成功"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
# 序列:第一次 timeout第二次成功
fake_ok = MagicMock(status_code=200)
fake_ok.json.return_value = {
'response': 'OK from secondary',
'prompt_eval_count': 100, 'eval_count': 50,
}
call_results = [requests.Timeout('first host'), fake_ok]
# mock host property 回傳兩台不同主機
hosts = ['http://host1:11434', 'http://host2:11434']
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch('services.ollama_service.requests.post', side_effect=call_results) as mock_post:
resp = svc.generate('test prompt')
assert resp.success is True
assert resp.content == 'OK from secondary'
assert resp.input_tokens == 100
assert resp.output_tokens == 50
assert mock_post.call_count == 2 # 第一次 timeout, 第二次成功
def test_generate_all_three_hosts_fail_returns_failure():
"""三主機都失敗 → success=False不無限迴圈"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
hosts = ['http://h1:11434', 'http://h2:11434', 'http://h3:11434']
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('all timeout')) as mock_post:
resp = svc.generate('test')
assert resp.success is False
assert mock_post.call_count == 3 # 試了 3 次
assert 'all 3 hosts failed' in (resp.error or '')
def test_generate_same_host_breaks_loop():
"""resolve 連 3 次回同主機cache 沒過期)→ 不無限迴圈"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
same_host = 'http://stuck:11434'
with patch('services.ollama_service.resolve_ollama_host', return_value=same_host), \
patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('stuck')) as mock_post:
resp = svc.generate('test')
assert resp.success is False
# 第一次試 stuck → 失敗 → 第二次 self.host 還是 stuckcache 還在)→ break
assert mock_post.call_count == 1
def test_generate_token_parsing_phase13():
"""Phase 13 補強OllamaResponse 解 prompt_eval_count + eval_count"""
from services.ollama_service import OllamaService
svc = OllamaService()
fake_resp = MagicMock(status_code=200)
fake_resp.json.return_value = {
'response': 'hello',
'prompt_eval_count': 250,
'eval_count': 80,
'total_duration': 1500000000, # 1.5s in nanoseconds
}
with patch('services.ollama_service.resolve_ollama_host', return_value='http://x:11434'), \
patch('services.ollama_service.requests.post', return_value=fake_resp):
resp = svc.generate('test')
assert resp.success is True
assert resp.input_tokens == 250
assert resp.output_tokens == 80
assert resp.total_duration == 1.5 # 轉換為秒
# ═══════════════════════════════════════════════════════════════════════════
# T3: generate_embedding 三主機 retryhotfix 6572d52
# ═══════════════════════════════════════════════════════════════════════════
def test_embedding_retries_on_first_host_timeout():
"""embed 第一台 timeout → 第二台成功"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
fake_ok = MagicMock(status_code=200)
fake_ok.json.return_value = {'embeddings': [[0.1, 0.2, 0.3]]}
hosts = ['http://h1:11434', 'http://h2:11434']
call_seq = [requests.Timeout('h1'), fake_ok]
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch.dict('os.environ', {}, clear=False), \
patch('services.ollama_service.requests.post', side_effect=call_seq) as mock_post:
# 確保不走 EMBEDDING_HOST env
import os
os.environ.pop('EMBEDDING_HOST', None)
vec = svc.generate_embedding('test text')
assert vec == [0.1, 0.2, 0.3]
assert mock_post.call_count == 2 # 第二台成功
def test_embedding_explicit_host_no_retry():
"""caller 顯式 host=... → 不 retry失敗即回 []"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
with patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('explicit host fail')) as mock_post:
vec = svc.generate_embedding('test', host='http://explicit:11434')
assert vec == []
# 顯式 host 凍結不 retry → 嘗試一次(/api/embed可能再試 /api/embeddings legacy
# 但 retry 鏈不啟動(沒有 for-loop 迭代不同主機)
# 我們驗post 呼叫次數 ≤ 2最多主路徑+legacy 各一次,不會 retry 不同主機)
assert mock_post.call_count <= 2
def test_embedding_all_three_hosts_fail_returns_empty():
"""embed 三主機都失敗 → 回 []"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
hosts = ['http://h1:11434', 'http://h2:11434', 'http://h3:11434']
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch.dict('os.environ', {}, clear=False), \
patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('all fail')) as mock_post:
import os
os.environ.pop('EMBEDDING_HOST', None)
vec = svc.generate_embedding('test')
assert vec == []
# 三主機 retry每次主路徑+legacy = 6 次(但 legacy 也是 timeout
# 實際看 _embed_one 邏輯timeout 的 except 直接 mark_unhealthy 不試 legacy
# 所以是 3 次(主路徑 timeout × 3 主機)
assert mock_post.call_count == 3
# ═══════════════════════════════════════════════════════════════════════════
# T4: mark_unhealthy 觸發 cache 失效(驗 self.host 取新主機)
# ═══════════════════════════════════════════════════════════════════════════
def test_mark_unhealthy_invalidates_cache_for_next_host():
"""generate 失敗 mark_unhealthy → 下次 self.host 走 resolve 取新主機"""
import services.ollama_service as oss
from services.ollama_service import OllamaService
svc = OllamaService()
# 先 cache 一個主機
oss._resolved_host_cache['host'] = 'http://primary:11434'
oss._resolved_host_cache['ts'] = __import__('time').time()
# mark_unhealthy 應清空 cache
oss.mark_unhealthy('http://primary:11434')
assert oss._resolved_host_cache['host'] is None