test(p19): 補 Phase 14/15/16/17 unit test — 27 tests 全綠

Operation Ollama-First v5.0 / Phase 19 — 補完戰役紀律

tests/test_caller_registry.py (7 tests)
- registry 含 30+ 核心 caller (ADR-028 對齊)
- is_known_caller / assert_known_caller strict=False/True 行為
- ai_call_logger 整合:未知 caller log warning 不阻擋
- frozenset 不可變動

tests/test_deepseek_service.py (6 tests)
- is_available() 需 KEY + flag 雙條件
- generate flag OFF / 200 success / 500 / timeout
- usage tokens 解析(prompt_tokens / completion_tokens)

tests/test_ppt_vision_service.py (6 tests)
- flag OFF 不打 HTTP / 檔不存在
-  無視覺異常 / ⚠️ marker 解析
- HTTP 500 觸發 mark_unhealthy / timeout fail-safe

tests/test_low_quality_response_v2.py (8 tests)
- 規則 5 純英文回應 (中文 < 30%)
- 規則 6 thinking-mode 漏洞 <think>...</think>
- 規則 7 重複迴圈 (前 50 字 ≥ 3 次)
- 規則 8 佔位符 ({{var}} / [TODO] / <待填>)
- 合法繁中商業文字應通過 8 條規則

regression: 全戰役 unit test 累計 241 tests
- Phase 1: 52 (logger + report)
- Phase 2: 14 (ollama_resolve)
- Phase 3: 36 (qa/golden/nemotron/daily)
- Phase 7: 23 (anthropic + code_review)
- Phase 11: 70 (rag + learning + promotion)
- Phase 10.5: 8 (mcp_router)
- Phase 13: 10 (retry chain)
- Phase 19: 27 (caller_registry + deepseek + ppt_vision + lq_v2)  新

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
OoO
2026-05-04 10:27:35 +08:00
parent 98063059c2
commit f11b0cc732
4 changed files with 505 additions and 0 deletions

View File

@@ -0,0 +1,136 @@
"""
tests/test_caller_registry.py
─────────────────────────────────────────────────────────────────
Operation Ollama-First v5.0 / Phase 16 — caller_registry 集中管理驗證
驗證面:
T1. CALLER_REGISTRY 含 ADR-028 列舉的 30+ caller
T2. is_known_caller 對白名單回 True / 未知回 False
T3. assert_known_caller(strict=False) → log warning 不 raise
T4. assert_known_caller(strict=True) → 未知 raise ValueError
T5. list_callers_by_service 分組正確
T6. ai_call_logger 整合:未知 caller 不阻擋log warning
"""
import logging
import pytest
def test_registry_contains_core_callers():
"""ADR-028 白名單核心 caller 必在 registry"""
from services.llm_caller_registry import CALLER_REGISTRY
must_have = {
# Hermes
'hermes_analyst', 'hermes_intent',
# OpenClaw
'openclaw_daily', 'openclaw_weekly', 'openclaw_monthly',
'openclaw_meta', 'openclaw_qa',
# MCP
'mcp_l1_grounding', 'mcp_collector',
# Code Review
'code_review_hermes', 'code_review_openclaw', 'code_review_elephant',
# NemoTron / EA
'nemotron_dispatch', 'ea_engine',
# PPT
'ppt_gemini', 'ppt_ollama', 'ppt_nim', 'ppt_vision',
# KM Embedding
'km_embedding_worker', 'km_embedding_realtime',
# Sales / Trend
'sales_copy', 'trend_match', 'trend_qa', 'product_insights',
# Bot
'openclaw_bot_main', 'openclaw_bot_gemini', 'openclaw_bot_nim',
}
missing = must_have - CALLER_REGISTRY
assert not missing, f"registry 缺 {len(missing)} 個關鍵 caller: {missing}"
def test_is_known_caller():
from services.llm_caller_registry import is_known_caller
assert is_known_caller('hermes_analyst') is True
assert is_known_caller('openclaw_qa') is True
assert is_known_caller('not_a_real_caller') is False
assert is_known_caller('') is False
assert is_known_caller('GeMiNi_Caller_Wrong_Case') is False
def test_assert_known_caller_strict_false_only_warns(caplog):
"""strict=False預設→ 不在 registry 只 log warning"""
from services.llm_caller_registry import assert_known_caller
with caplog.at_level(logging.WARNING):
assert_known_caller('totally_made_up_caller', strict=False)
# 應有 warning
warnings = [r for r in caplog.records if r.levelno >= logging.WARNING]
assert len(warnings) >= 1
assert 'unknown caller' in warnings[0].message.lower() or \
'totally_made_up_caller' in warnings[0].message
def test_assert_known_caller_strict_true_raises():
from services.llm_caller_registry import assert_known_caller
with pytest.raises(ValueError, match='unknown caller'):
assert_known_caller('definitely_not_real', strict=True)
def test_assert_known_caller_passes_for_real_caller():
"""合法 caller → 不 raise / 不 warn"""
from services.llm_caller_registry import assert_known_caller
# 不該 raise
assert_known_caller('hermes_analyst', strict=True)
assert_known_caller('openclaw_qa', strict=False)
def test_list_callers_by_service_structure():
from services.llm_caller_registry import list_callers_by_service, CALLER_REGISTRY
grouped = list_callers_by_service()
expected_groups = {'hermes', 'openclaw', 'openclaw_bot', 'mcp', 'code_review',
'ppt', 'tg_bot', 'km_embedding', 'sales_trend', 'misc'}
assert expected_groups.issubset(grouped.keys())
# 每組至少 1 個
for group, callers in grouped.items():
assert isinstance(callers, list)
assert len(callers) >= 1, f"group '{group}' 是空的"
# hermes 至少含 hermes_analyst
assert 'hermes_analyst' in grouped['hermes']
def test_ai_call_logger_integration_does_not_block_unknown_caller(caplog):
"""ai_call_logger 收到 unknown caller 應 log warning 但不阻擋 context manager"""
import os
os.environ['AI_CALL_LOGGING_ENABLED'] = 'false' # 跳過 DB 寫入
try:
from services.ai_call_logger import log_ai_call
with caplog.at_level(logging.WARNING):
with log_ai_call(
caller='unknown_test_caller_xyz', # 故意不在 registry
provider='gcp_ollama',
model='hermes3:latest',
) as ctx:
ctx.set_tokens(input=100, output=50)
# context manager 不 raise正常結束
# 應該有 warningcaller 不在 registry
warnings = [r for r in caplog.records if r.levelno >= logging.WARNING]
assert any('unknown_test_caller' in r.message.lower() or
'unknown caller' in r.message.lower()
for r in warnings), \
f"應 warn unknown caller實際 warnings: {[r.message for r in warnings]}"
finally:
os.environ.pop('AI_CALL_LOGGING_ENABLED', None)
def test_registry_is_immutable_frozenset():
"""CALLER_REGISTRY 是 frozenset 不可變動"""
from services.llm_caller_registry import CALLER_REGISTRY
assert isinstance(CALLER_REGISTRY, frozenset)
with pytest.raises(AttributeError):
CALLER_REGISTRY.add('attempted_mutation') # type: ignore

View File

@@ -0,0 +1,118 @@
"""
tests/test_deepseek_service.py
─────────────────────────────────────────────────────────────────
Operation Ollama-First v5.0 / Phase 15 — DeepSeek 直連 service 驗證
"""
from unittest.mock import patch, MagicMock
import pytest
@pytest.fixture(autouse=True)
def _reset_env(monkeypatch):
"""每 test 清 env"""
monkeypatch.delenv('DEEPSEEK_DIRECT_ENABLED', raising=False)
monkeypatch.delenv('DEEPSEEK_API_KEY', raising=False)
yield
def test_is_available_requires_key_and_flag(monkeypatch):
from services.deepseek_service import DeepSeekService
svc = DeepSeekService()
# 無 key 無 flag → False
assert svc.is_available() is False
# 只有 flag → False
monkeypatch.setenv('DEEPSEEK_DIRECT_ENABLED', 'true')
assert svc.is_available() is False
# flag + key → True需 reload module 取新 env
monkeypatch.setenv('DEEPSEEK_API_KEY', 'sk-test')
import importlib
import services.deepseek_service as ds
importlib.reload(ds)
assert ds.deepseek_service.is_available() is True
def test_generate_returns_failure_when_unavailable(monkeypatch):
"""flag OFF 時 generate 直接 return failure不打 HTTP"""
monkeypatch.setenv('DEEPSEEK_DIRECT_ENABLED', 'false')
from services.deepseek_service import DeepSeekService
svc = DeepSeekService()
with patch('services.deepseek_service.requests.post') as mock_post:
resp = svc.generate('test prompt')
assert resp.success is False
assert 'DEEPSEEK_DIRECT_ENABLED=false' in (resp.error or '') or \
'API_KEY 未設' in (resp.error or '')
mock_post.assert_not_called()
def test_generate_success_parses_usage(monkeypatch):
"""正常 200 回應應解 usage tokens"""
monkeypatch.setenv('DEEPSEEK_DIRECT_ENABLED', 'true')
monkeypatch.setenv('DEEPSEEK_API_KEY', 'sk-test')
import importlib
import services.deepseek_service as ds
importlib.reload(ds)
fake_resp = MagicMock(status_code=200)
fake_resp.json.return_value = {
'model': 'deepseek-chat',
'choices': [{'message': {'content': 'Hello from DeepSeek'}}],
'usage': {'prompt_tokens': 100, 'completion_tokens': 50},
}
with patch('services.deepseek_service.requests.post', return_value=fake_resp):
resp = ds.deepseek_service.generate('hi', system_prompt='you are helpful')
assert resp.success is True
assert resp.content == 'Hello from DeepSeek'
assert resp.input_tokens == 100
assert resp.output_tokens == 50
assert resp.model == 'deepseek-chat'
def test_generate_http_500_returns_failure(monkeypatch):
monkeypatch.setenv('DEEPSEEK_DIRECT_ENABLED', 'true')
monkeypatch.setenv('DEEPSEEK_API_KEY', 'sk-test')
import importlib
import services.deepseek_service as ds
importlib.reload(ds)
fake_resp = MagicMock(status_code=500)
fake_resp.text = 'Internal Server Error'
with patch('services.deepseek_service.requests.post', return_value=fake_resp):
resp = ds.deepseek_service.generate('test')
assert resp.success is False
assert 'HTTP 500' in (resp.error or '')
def test_generate_timeout_returns_failure(monkeypatch):
monkeypatch.setenv('DEEPSEEK_DIRECT_ENABLED', 'true')
monkeypatch.setenv('DEEPSEEK_API_KEY', 'sk-test')
import importlib
import services.deepseek_service as ds
importlib.reload(ds)
import requests
with patch('services.deepseek_service.requests.post',
side_effect=requests.Timeout('60s')):
resp = ds.deepseek_service.generate('test')
assert resp.success is False
assert 'timeout' in (resp.error or '').lower()
def test_check_connection_when_unavailable():
from services.deepseek_service import DeepSeekService
svc = DeepSeekService()
# 無 key 無 flag
assert svc.check_connection() is False

View File

@@ -0,0 +1,116 @@
"""
tests/test_low_quality_response_v2.py
─────────────────────────────────────────────────────────────────
Operation Ollama-First v5.0 / Phase 17 — _is_low_quality_response 4 條新規則驗證
驗證面(規則 5-8 是 Phase 17 新增):
規則 1-4既有test_openclaw_qa_routing 已驗)
規則 5: 純英文回應(中文 < 30%
規則 6: thinking-mode 漏洞(<think>...</think>
規則 7: 重複迴圈(前 50 字 ≥ 3 次)
規則 8: 佔位符未填充({{var}} / [TODO] / <待填>
"""
def test_rule5_pure_english_response_rejected():
"""純英文長文應被拒(中文 < 30%"""
from services.openclaw_strategist_service import _is_low_quality_response
text = (
"This is a long English response from the LLM model that does not "
"have any traditional Chinese characters in it. We expect this kind "
"of response to be rejected as low quality because the user is asking "
"in Traditional Chinese and expects an answer in Traditional Chinese."
)
assert _is_low_quality_response(text) is True
def test_rule5_mixed_chinese_english_acceptable():
"""中英混合(中文佔比 ≥ 30%)應通過"""
from services.openclaw_strategist_service import _is_low_quality_response
# 中文密度高的 text40%+ 中文字元)
text = (
"本週業績分析報告:總營收較上週成長百分之十二,主要來自家電類別與生活雜貨。\n"
"競品動向監控:對手實施大規模補貼戰,預估壓縮我方百分之三毛利率。\n"
"建議行動:(一) 加碼家電促銷檔期 (二) 觀察補貼是否延續至下週。"
)
assert _is_low_quality_response(text) is False
def test_rule6_thinking_block_leak_rejected():
"""reasoning model thinking 區塊洩漏應拒"""
from services.openclaw_strategist_service import _is_low_quality_response
text_with_open_tag = (
"<think>讓我思考一下這個問題...</think>\n"
"本週業績分析:總營收成長 12% YoY主要來自家電類別的銷售提升。"
)
assert _is_low_quality_response(text_with_open_tag) is True
text_with_close_only = (
"本週業績分析:總營收成長 12% YoY主要來自家電類別的銷售提升。\n"
"</think>"
)
assert _is_low_quality_response(text_with_close_only) is True
def test_rule7_repetition_loop_rejected():
"""前 50 字出現 ≥ 3 次 → 卡迴圈"""
from services.openclaw_strategist_service import _is_low_quality_response
# 重複 8 次保證 > 200 字(前 50 字出現 ≥ 3 次觸發規則 7
base = "本週業績有顯著成長,主要驅動類別是家電與生活雜貨大類別。額外文字。"
repeated = base * 8
assert len(repeated) > 200
assert _is_low_quality_response(repeated) is True
def test_rule7_normal_long_text_acceptable():
"""正常長文(即使重複某些字)不該被誤判為迴圈"""
from services.openclaw_strategist_service import _is_low_quality_response
normal_long = (
"本週業績整體呈現上升趨勢,主要驅動類別為家電與生活雜貨大類別。\n"
"競品動向PChome 在 3C 類發動大規模補貼戰,預估壓縮我方 3-5 個百分點毛利率。\n"
"蝦皮也在母嬰用品加碼免運券促銷,需密切觀察跟降節奏。\n"
"建議行動:(1) 加碼家電促銷檔期 (2) 觀察 PChome 補貼是否延續至下週 "
"(3) 對價差大於 5% 的 SKU 主動啟動 EA 流程。"
)
# 雖然「主要」「促銷」等詞可能重複,但前 50 字的整體 substring 不會出現 ≥ 3 次
assert _is_low_quality_response(normal_long) is False
def test_rule8_placeholder_markers_rejected():
"""偵測佔位符 / 未實作標記"""
from services.openclaw_strategist_service import _is_low_quality_response
cases = [
# {{var}} jinja unfilled
"本週業績:{{revenue}} 元,較上週成長 {{wow_pct}}%。建議行動 ...",
# [TODO] / [todo]
"本週銷售分析:[TODO] 補上具體數字後完成此段。",
"業績檢討:競品分析 [todo] 待補充。",
# <待填>
"本週概況:營收 NT$<待填>,毛利率 <待填>。",
# 尚未實作
"策略建議:(尚未實作) 請統帥手動補充。",
]
for text in cases:
# 補長度避免被規則 1 拒(< 50 字)
text = text + "(測試填充內容延長至大於 50 字)"
assert _is_low_quality_response(text) is True, f"應拒絕含佔位符: {text[:30]}"
def test_legitimate_chinese_business_text_passes():
"""合法繁中商業文字應通過所有 8 條規則"""
from services.openclaw_strategist_service import _is_low_quality_response
text = (
"本週業績分析2026-05-04\n"
"總營收 NT$ 4,230,000較上週成長 12.3% WoW。\n"
"主要驅動類別:家電(+18%)、生活雜貨(+9%)。\n"
"競品動向PChome 在 3C 類補貼戰預估壓縮我方 3-5pp 毛利率。\n"
"建議行動:加碼家電檔期,對價差 > 5% SKU 啟動 EA 流程。"
)
assert _is_low_quality_response(text) is False

View File

@@ -0,0 +1,135 @@
"""
tests/test_ppt_vision_service.py
─────────────────────────────────────────────────────────────────
Operation Ollama-First v5.0 / Phase 14 — PPT vision (minicpm-v) 驗證
"""
import os
import tempfile
from unittest.mock import patch, MagicMock
import pytest
@pytest.fixture(autouse=True)
def _reset_env(monkeypatch):
monkeypatch.delenv('PPT_VISION_ENABLED', raising=False)
yield
@pytest.fixture
def fake_image():
"""產生 1KB 假 png 檔給 test 用"""
f = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
f.write(b'\x89PNG\r\n\x1a\n' + b'\x00' * 1000) # PNG magic + 1KB padding
f.close()
yield f.name
try:
os.unlink(f.name)
except Exception:
pass
def test_flag_off_returns_disabled_error(fake_image):
"""flag OFF 時 check_image 直接回 success=False不打 HTTP"""
from services.ppt_vision_service import PPTVisionService
svc = PPTVisionService()
with patch('services.ppt_vision_service.requests.post') as mock_post:
result = svc.check_image(fake_image)
assert result.success is False
assert 'PPT_VISION_ENABLED=false' in (result.error or '')
mock_post.assert_not_called()
def test_missing_image_file(monkeypatch):
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
from services.ppt_vision_service import PPTVisionService
svc = PPTVisionService()
result = svc.check_image('/tmp/this_file_does_not_exist_xyz.png')
assert result.success is False
assert 'image not found' in (result.error or '')
def test_no_issues_response(fake_image, monkeypatch):
"""minicpm-v 回「✅ 無視覺異常」→ issues_found 應為空 list"""
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
from services.ppt_vision_service import PPTVisionService
fake_resp = MagicMock(status_code=200)
fake_resp.json.return_value = {'response': '✅ 無視覺異常'}
with patch('services.ollama_service.resolve_ollama_host',
return_value='http://test:11434'), \
patch('services.ppt_vision_service.requests.post', return_value=fake_resp):
svc = PPTVisionService()
result = svc.check_image(fake_image)
assert result.success is True
assert result.issues_found == []
assert result.confidence == 1.0
def test_issues_detected(fake_image, monkeypatch):
"""minicpm-v 回多個 ⚠️ marker → issues_found 應含解析的問題"""
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
from services.ppt_vision_service import PPTVisionService
fake_resp = MagicMock(status_code=200)
fake_resp.json.return_value = {
'response': '⚠️ 圖表被切掉:右側長條圖超出邊界\n'
'⚠️ 文字溢出:商品標題被遮擋\n'
'其他無問題'
}
with patch('services.ollama_service.resolve_ollama_host',
return_value='http://test:11434'), \
patch('services.ppt_vision_service.requests.post', return_value=fake_resp):
svc = PPTVisionService()
result = svc.check_image(fake_image)
assert result.success is True
assert len(result.issues_found) == 2
assert any('圖表被切掉' in i for i in result.issues_found)
assert any('文字溢出' in i for i in result.issues_found)
assert result.confidence > 0.5
def test_http_500_marks_unhealthy(fake_image, monkeypatch):
"""HTTP 500 → success=False + mark_unhealthy 被呼叫"""
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
from services.ppt_vision_service import PPTVisionService
fake_resp = MagicMock(status_code=500)
fake_resp.text = 'oops'
with patch('services.ollama_service.resolve_ollama_host',
return_value='http://test:11434'), \
patch('services.ollama_service.mark_unhealthy') as mock_mark, \
patch('services.ppt_vision_service.requests.post', return_value=fake_resp):
svc = PPTVisionService()
result = svc.check_image(fake_image)
assert result.success is False
assert 'HTTP 500' in (result.error or '')
mock_mark.assert_called_once_with('http://test:11434')
def test_timeout_returns_failure(fake_image, monkeypatch):
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
from services.ppt_vision_service import PPTVisionService
import requests
with patch('services.ollama_service.resolve_ollama_host',
return_value='http://test:11434'), \
patch('services.ollama_service.mark_unhealthy'), \
patch('services.ppt_vision_service.requests.post',
side_effect=requests.Timeout('60s')):
svc = PPTVisionService()
result = svc.check_image(fake_image)
assert result.success is False
assert 'timeout' in (result.error or '').lower()