From f11b0cc732badef544016d56a3571d0fca4fd5e3 Mon Sep 17 00:00:00 2001 From: OoO Date: Mon, 4 May 2026 10:27:35 +0800 Subject: [PATCH] =?UTF-8?q?test(p19):=20=E8=A3=9C=20Phase=2014/15/16/17=20?= =?UTF-8?q?unit=20test=20=E2=80=94=2027=20tests=20=E5=85=A8=E7=B6=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operation Ollama-First v5.0 / Phase 19 — 補完戰役紀律 tests/test_caller_registry.py (7 tests) - registry 含 30+ 核心 caller (ADR-028 對齊) - is_known_caller / assert_known_caller strict=False/True 行為 - ai_call_logger 整合:未知 caller log warning 不阻擋 - frozenset 不可變動 tests/test_deepseek_service.py (6 tests) - is_available() 需 KEY + flag 雙條件 - generate flag OFF / 200 success / 500 / timeout - usage tokens 解析(prompt_tokens / completion_tokens) tests/test_ppt_vision_service.py (6 tests) - flag OFF 不打 HTTP / 檔不存在 - ✅ 無視覺異常 / ⚠️ marker 解析 - HTTP 500 觸發 mark_unhealthy / timeout fail-safe tests/test_low_quality_response_v2.py (8 tests) - 規則 5 純英文回應 (中文 < 30%) - 規則 6 thinking-mode 漏洞 ... - 規則 7 重複迴圈 (前 50 字 ≥ 3 次) - 規則 8 佔位符 ({{var}} / [TODO] / <待填>) - 合法繁中商業文字應通過 8 條規則 regression: 全戰役 unit test 累計 241 tests - Phase 1: 52 (logger + report) - Phase 2: 14 (ollama_resolve) - Phase 3: 36 (qa/golden/nemotron/daily) - Phase 7: 23 (anthropic + code_review) - Phase 11: 70 (rag + learning + promotion) - Phase 10.5: 8 (mcp_router) - Phase 13: 10 (retry chain) - Phase 19: 27 (caller_registry + deepseek + ppt_vision + lq_v2) ⭐ 新 Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_caller_registry.py | 136 ++++++++++++++++++++++++++ tests/test_deepseek_service.py | 118 ++++++++++++++++++++++ tests/test_low_quality_response_v2.py | 116 ++++++++++++++++++++++ tests/test_ppt_vision_service.py | 135 +++++++++++++++++++++++++ 4 files changed, 505 insertions(+) create mode 100644 tests/test_caller_registry.py create mode 100644 tests/test_deepseek_service.py create mode 100644 tests/test_low_quality_response_v2.py create mode 100644 tests/test_ppt_vision_service.py diff --git a/tests/test_caller_registry.py b/tests/test_caller_registry.py new file mode 100644 index 0000000..2541481 --- /dev/null +++ b/tests/test_caller_registry.py @@ -0,0 +1,136 @@ +""" +tests/test_caller_registry.py +───────────────────────────────────────────────────────────────── +Operation Ollama-First v5.0 / Phase 16 — caller_registry 集中管理驗證 + +驗證面: + T1. CALLER_REGISTRY 含 ADR-028 列舉的 30+ caller + T2. is_known_caller 對白名單回 True / 未知回 False + T3. assert_known_caller(strict=False) → log warning 不 raise + T4. assert_known_caller(strict=True) → 未知 raise ValueError + T5. list_callers_by_service 分組正確 + T6. ai_call_logger 整合:未知 caller 不阻擋(log warning) +""" + +import logging + +import pytest + + +def test_registry_contains_core_callers(): + """ADR-028 白名單核心 caller 必在 registry""" + from services.llm_caller_registry import CALLER_REGISTRY + + must_have = { + # Hermes + 'hermes_analyst', 'hermes_intent', + # OpenClaw + 'openclaw_daily', 'openclaw_weekly', 'openclaw_monthly', + 'openclaw_meta', 'openclaw_qa', + # MCP + 'mcp_l1_grounding', 'mcp_collector', + # Code Review + 'code_review_hermes', 'code_review_openclaw', 'code_review_elephant', + # NemoTron / EA + 'nemotron_dispatch', 'ea_engine', + # PPT + 'ppt_gemini', 'ppt_ollama', 'ppt_nim', 'ppt_vision', + # KM Embedding + 'km_embedding_worker', 'km_embedding_realtime', + # Sales / Trend + 'sales_copy', 'trend_match', 'trend_qa', 'product_insights', + # Bot + 'openclaw_bot_main', 'openclaw_bot_gemini', 'openclaw_bot_nim', + } + + missing = must_have - CALLER_REGISTRY + assert not missing, f"registry 缺 {len(missing)} 個關鍵 caller: {missing}" + + +def test_is_known_caller(): + from services.llm_caller_registry import is_known_caller + + assert is_known_caller('hermes_analyst') is True + assert is_known_caller('openclaw_qa') is True + assert is_known_caller('not_a_real_caller') is False + assert is_known_caller('') is False + assert is_known_caller('GeMiNi_Caller_Wrong_Case') is False + + +def test_assert_known_caller_strict_false_only_warns(caplog): + """strict=False(預設)→ 不在 registry 只 log warning""" + from services.llm_caller_registry import assert_known_caller + + with caplog.at_level(logging.WARNING): + assert_known_caller('totally_made_up_caller', strict=False) + # 應有 warning + warnings = [r for r in caplog.records if r.levelno >= logging.WARNING] + assert len(warnings) >= 1 + assert 'unknown caller' in warnings[0].message.lower() or \ + 'totally_made_up_caller' in warnings[0].message + + +def test_assert_known_caller_strict_true_raises(): + from services.llm_caller_registry import assert_known_caller + + with pytest.raises(ValueError, match='unknown caller'): + assert_known_caller('definitely_not_real', strict=True) + + +def test_assert_known_caller_passes_for_real_caller(): + """合法 caller → 不 raise / 不 warn""" + from services.llm_caller_registry import assert_known_caller + # 不該 raise + assert_known_caller('hermes_analyst', strict=True) + assert_known_caller('openclaw_qa', strict=False) + + +def test_list_callers_by_service_structure(): + from services.llm_caller_registry import list_callers_by_service, CALLER_REGISTRY + + grouped = list_callers_by_service() + expected_groups = {'hermes', 'openclaw', 'openclaw_bot', 'mcp', 'code_review', + 'ppt', 'tg_bot', 'km_embedding', 'sales_trend', 'misc'} + assert expected_groups.issubset(grouped.keys()) + + # 每組至少 1 個 + for group, callers in grouped.items(): + assert isinstance(callers, list) + assert len(callers) >= 1, f"group '{group}' 是空的" + + # hermes 至少含 hermes_analyst + assert 'hermes_analyst' in grouped['hermes'] + + +def test_ai_call_logger_integration_does_not_block_unknown_caller(caplog): + """ai_call_logger 收到 unknown caller 應 log warning 但不阻擋 context manager""" + import os + os.environ['AI_CALL_LOGGING_ENABLED'] = 'false' # 跳過 DB 寫入 + try: + from services.ai_call_logger import log_ai_call + + with caplog.at_level(logging.WARNING): + with log_ai_call( + caller='unknown_test_caller_xyz', # 故意不在 registry + provider='gcp_ollama', + model='hermes3:latest', + ) as ctx: + ctx.set_tokens(input=100, output=50) + # context manager 不 raise,正常結束 + # 應該有 warning(caller 不在 registry) + warnings = [r for r in caplog.records if r.levelno >= logging.WARNING] + assert any('unknown_test_caller' in r.message.lower() or + 'unknown caller' in r.message.lower() + for r in warnings), \ + f"應 warn unknown caller,實際 warnings: {[r.message for r in warnings]}" + finally: + os.environ.pop('AI_CALL_LOGGING_ENABLED', None) + + +def test_registry_is_immutable_frozenset(): + """CALLER_REGISTRY 是 frozenset 不可變動""" + from services.llm_caller_registry import CALLER_REGISTRY + + assert isinstance(CALLER_REGISTRY, frozenset) + with pytest.raises(AttributeError): + CALLER_REGISTRY.add('attempted_mutation') # type: ignore diff --git a/tests/test_deepseek_service.py b/tests/test_deepseek_service.py new file mode 100644 index 0000000..52bea5d --- /dev/null +++ b/tests/test_deepseek_service.py @@ -0,0 +1,118 @@ +""" +tests/test_deepseek_service.py +───────────────────────────────────────────────────────────────── +Operation Ollama-First v5.0 / Phase 15 — DeepSeek 直連 service 驗證 +""" + +from unittest.mock import patch, MagicMock + +import pytest + + +@pytest.fixture(autouse=True) +def _reset_env(monkeypatch): + """每 test 清 env""" + monkeypatch.delenv('DEEPSEEK_DIRECT_ENABLED', raising=False) + monkeypatch.delenv('DEEPSEEK_API_KEY', raising=False) + yield + + +def test_is_available_requires_key_and_flag(monkeypatch): + from services.deepseek_service import DeepSeekService + + svc = DeepSeekService() + # 無 key 無 flag → False + assert svc.is_available() is False + + # 只有 flag → False + monkeypatch.setenv('DEEPSEEK_DIRECT_ENABLED', 'true') + assert svc.is_available() is False + + # flag + key → True(需 reload module 取新 env) + monkeypatch.setenv('DEEPSEEK_API_KEY', 'sk-test') + import importlib + import services.deepseek_service as ds + importlib.reload(ds) + assert ds.deepseek_service.is_available() is True + + +def test_generate_returns_failure_when_unavailable(monkeypatch): + """flag OFF 時 generate 直接 return failure,不打 HTTP""" + monkeypatch.setenv('DEEPSEEK_DIRECT_ENABLED', 'false') + from services.deepseek_service import DeepSeekService + + svc = DeepSeekService() + with patch('services.deepseek_service.requests.post') as mock_post: + resp = svc.generate('test prompt') + + assert resp.success is False + assert 'DEEPSEEK_DIRECT_ENABLED=false' in (resp.error or '') or \ + 'API_KEY 未設' in (resp.error or '') + mock_post.assert_not_called() + + +def test_generate_success_parses_usage(monkeypatch): + """正常 200 回應應解 usage tokens""" + monkeypatch.setenv('DEEPSEEK_DIRECT_ENABLED', 'true') + monkeypatch.setenv('DEEPSEEK_API_KEY', 'sk-test') + + import importlib + import services.deepseek_service as ds + importlib.reload(ds) + + fake_resp = MagicMock(status_code=200) + fake_resp.json.return_value = { + 'model': 'deepseek-chat', + 'choices': [{'message': {'content': 'Hello from DeepSeek'}}], + 'usage': {'prompt_tokens': 100, 'completion_tokens': 50}, + } + with patch('services.deepseek_service.requests.post', return_value=fake_resp): + resp = ds.deepseek_service.generate('hi', system_prompt='you are helpful') + + assert resp.success is True + assert resp.content == 'Hello from DeepSeek' + assert resp.input_tokens == 100 + assert resp.output_tokens == 50 + assert resp.model == 'deepseek-chat' + + +def test_generate_http_500_returns_failure(monkeypatch): + monkeypatch.setenv('DEEPSEEK_DIRECT_ENABLED', 'true') + monkeypatch.setenv('DEEPSEEK_API_KEY', 'sk-test') + + import importlib + import services.deepseek_service as ds + importlib.reload(ds) + + fake_resp = MagicMock(status_code=500) + fake_resp.text = 'Internal Server Error' + with patch('services.deepseek_service.requests.post', return_value=fake_resp): + resp = ds.deepseek_service.generate('test') + + assert resp.success is False + assert 'HTTP 500' in (resp.error or '') + + +def test_generate_timeout_returns_failure(monkeypatch): + monkeypatch.setenv('DEEPSEEK_DIRECT_ENABLED', 'true') + monkeypatch.setenv('DEEPSEEK_API_KEY', 'sk-test') + + import importlib + import services.deepseek_service as ds + importlib.reload(ds) + + import requests + with patch('services.deepseek_service.requests.post', + side_effect=requests.Timeout('60s')): + resp = ds.deepseek_service.generate('test') + + assert resp.success is False + assert 'timeout' in (resp.error or '').lower() + + +def test_check_connection_when_unavailable(): + from services.deepseek_service import DeepSeekService + + svc = DeepSeekService() + # 無 key 無 flag + assert svc.check_connection() is False diff --git a/tests/test_low_quality_response_v2.py b/tests/test_low_quality_response_v2.py new file mode 100644 index 0000000..1e654a5 --- /dev/null +++ b/tests/test_low_quality_response_v2.py @@ -0,0 +1,116 @@ +""" +tests/test_low_quality_response_v2.py +───────────────────────────────────────────────────────────────── +Operation Ollama-First v5.0 / Phase 17 — _is_low_quality_response 4 條新規則驗證 + +驗證面(規則 5-8 是 Phase 17 新增): + 規則 1-4:既有(test_openclaw_qa_routing 已驗) + 規則 5: 純英文回應(中文 < 30%) + 規則 6: thinking-mode 漏洞(...) + 規則 7: 重複迴圈(前 50 字 ≥ 3 次) + 規則 8: 佔位符未填充({{var}} / [TODO] / <待填>) +""" + + +def test_rule5_pure_english_response_rejected(): + """純英文長文應被拒(中文 < 30%)""" + from services.openclaw_strategist_service import _is_low_quality_response + + text = ( + "This is a long English response from the LLM model that does not " + "have any traditional Chinese characters in it. We expect this kind " + "of response to be rejected as low quality because the user is asking " + "in Traditional Chinese and expects an answer in Traditional Chinese." + ) + assert _is_low_quality_response(text) is True + + +def test_rule5_mixed_chinese_english_acceptable(): + """中英混合(中文佔比 ≥ 30%)應通過""" + from services.openclaw_strategist_service import _is_low_quality_response + + # 中文密度高的 text(40%+ 中文字元) + text = ( + "本週業績分析報告:總營收較上週成長百分之十二,主要來自家電類別與生活雜貨。\n" + "競品動向監控:對手實施大規模補貼戰,預估壓縮我方百分之三毛利率。\n" + "建議行動:(一) 加碼家電促銷檔期 (二) 觀察補貼是否延續至下週。" + ) + assert _is_low_quality_response(text) is False + + +def test_rule6_thinking_block_leak_rejected(): + """reasoning model thinking 區塊洩漏應拒""" + from services.openclaw_strategist_service import _is_low_quality_response + + text_with_open_tag = ( + "讓我思考一下這個問題...\n" + "本週業績分析:總營收成長 12% YoY,主要來自家電類別的銷售提升。" + ) + assert _is_low_quality_response(text_with_open_tag) is True + + text_with_close_only = ( + "本週業績分析:總營收成長 12% YoY,主要來自家電類別的銷售提升。\n" + "" + ) + assert _is_low_quality_response(text_with_close_only) is True + + +def test_rule7_repetition_loop_rejected(): + """前 50 字出現 ≥ 3 次 → 卡迴圈""" + from services.openclaw_strategist_service import _is_low_quality_response + + # 重複 8 次保證 > 200 字(前 50 字出現 ≥ 3 次觸發規則 7) + base = "本週業績有顯著成長,主要驅動類別是家電與生活雜貨大類別。額外文字。" + repeated = base * 8 + assert len(repeated) > 200 + assert _is_low_quality_response(repeated) is True + + +def test_rule7_normal_long_text_acceptable(): + """正常長文(即使重複某些字)不該被誤判為迴圈""" + from services.openclaw_strategist_service import _is_low_quality_response + + normal_long = ( + "本週業績整體呈現上升趨勢,主要驅動類別為家電與生活雜貨大類別。\n" + "競品動向:PChome 在 3C 類發動大規模補貼戰,預估壓縮我方 3-5 個百分點毛利率。\n" + "蝦皮也在母嬰用品加碼免運券促銷,需密切觀察跟降節奏。\n" + "建議行動:(1) 加碼家電促銷檔期 (2) 觀察 PChome 補貼是否延續至下週 " + "(3) 對價差大於 5% 的 SKU 主動啟動 EA 流程。" + ) + # 雖然「主要」「促銷」等詞可能重複,但前 50 字的整體 substring 不會出現 ≥ 3 次 + assert _is_low_quality_response(normal_long) is False + + +def test_rule8_placeholder_markers_rejected(): + """偵測佔位符 / 未實作標記""" + from services.openclaw_strategist_service import _is_low_quality_response + + cases = [ + # {{var}} jinja unfilled + "本週業績:{{revenue}} 元,較上週成長 {{wow_pct}}%。建議行動 ...", + # [TODO] / [todo] + "本週銷售分析:[TODO] 補上具體數字後完成此段。", + "業績檢討:競品分析 [todo] 待補充。", + # <待填> + "本週概況:營收 NT$<待填>,毛利率 <待填>。", + # 尚未實作 + "策略建議:(尚未實作) 請統帥手動補充。", + ] + for text in cases: + # 補長度避免被規則 1 拒(< 50 字) + text = text + "(測試填充內容延長至大於 50 字)" + assert _is_low_quality_response(text) is True, f"應拒絕含佔位符: {text[:30]}" + + +def test_legitimate_chinese_business_text_passes(): + """合法繁中商業文字應通過所有 8 條規則""" + from services.openclaw_strategist_service import _is_low_quality_response + + text = ( + "本週業績分析(2026-05-04):\n" + "總營收 NT$ 4,230,000,較上週成長 12.3% WoW。\n" + "主要驅動類別:家電(+18%)、生活雜貨(+9%)。\n" + "競品動向:PChome 在 3C 類補貼戰預估壓縮我方 3-5pp 毛利率。\n" + "建議行動:加碼家電檔期,對價差 > 5% SKU 啟動 EA 流程。" + ) + assert _is_low_quality_response(text) is False diff --git a/tests/test_ppt_vision_service.py b/tests/test_ppt_vision_service.py new file mode 100644 index 0000000..bda4b27 --- /dev/null +++ b/tests/test_ppt_vision_service.py @@ -0,0 +1,135 @@ +""" +tests/test_ppt_vision_service.py +───────────────────────────────────────────────────────────────── +Operation Ollama-First v5.0 / Phase 14 — PPT vision (minicpm-v) 驗證 +""" + +import os +import tempfile +from unittest.mock import patch, MagicMock + +import pytest + + +@pytest.fixture(autouse=True) +def _reset_env(monkeypatch): + monkeypatch.delenv('PPT_VISION_ENABLED', raising=False) + yield + + +@pytest.fixture +def fake_image(): + """產生 1KB 假 png 檔給 test 用""" + f = tempfile.NamedTemporaryFile(suffix='.png', delete=False) + f.write(b'\x89PNG\r\n\x1a\n' + b'\x00' * 1000) # PNG magic + 1KB padding + f.close() + yield f.name + try: + os.unlink(f.name) + except Exception: + pass + + +def test_flag_off_returns_disabled_error(fake_image): + """flag OFF 時 check_image 直接回 success=False(不打 HTTP)""" + from services.ppt_vision_service import PPTVisionService + + svc = PPTVisionService() + with patch('services.ppt_vision_service.requests.post') as mock_post: + result = svc.check_image(fake_image) + + assert result.success is False + assert 'PPT_VISION_ENABLED=false' in (result.error or '') + mock_post.assert_not_called() + + +def test_missing_image_file(monkeypatch): + monkeypatch.setenv('PPT_VISION_ENABLED', 'true') + from services.ppt_vision_service import PPTVisionService + + svc = PPTVisionService() + result = svc.check_image('/tmp/this_file_does_not_exist_xyz.png') + + assert result.success is False + assert 'image not found' in (result.error or '') + + +def test_no_issues_response(fake_image, monkeypatch): + """minicpm-v 回「✅ 無視覺異常」→ issues_found 應為空 list""" + monkeypatch.setenv('PPT_VISION_ENABLED', 'true') + from services.ppt_vision_service import PPTVisionService + + fake_resp = MagicMock(status_code=200) + fake_resp.json.return_value = {'response': '✅ 無視覺異常'} + + with patch('services.ollama_service.resolve_ollama_host', + return_value='http://test:11434'), \ + patch('services.ppt_vision_service.requests.post', return_value=fake_resp): + svc = PPTVisionService() + result = svc.check_image(fake_image) + + assert result.success is True + assert result.issues_found == [] + assert result.confidence == 1.0 + + +def test_issues_detected(fake_image, monkeypatch): + """minicpm-v 回多個 ⚠️ marker → issues_found 應含解析的問題""" + monkeypatch.setenv('PPT_VISION_ENABLED', 'true') + from services.ppt_vision_service import PPTVisionService + + fake_resp = MagicMock(status_code=200) + fake_resp.json.return_value = { + 'response': '⚠️ 圖表被切掉:右側長條圖超出邊界\n' + '⚠️ 文字溢出:商品標題被遮擋\n' + '其他無問題' + } + + with patch('services.ollama_service.resolve_ollama_host', + return_value='http://test:11434'), \ + patch('services.ppt_vision_service.requests.post', return_value=fake_resp): + svc = PPTVisionService() + result = svc.check_image(fake_image) + + assert result.success is True + assert len(result.issues_found) == 2 + assert any('圖表被切掉' in i for i in result.issues_found) + assert any('文字溢出' in i for i in result.issues_found) + assert result.confidence > 0.5 + + +def test_http_500_marks_unhealthy(fake_image, monkeypatch): + """HTTP 500 → success=False + mark_unhealthy 被呼叫""" + monkeypatch.setenv('PPT_VISION_ENABLED', 'true') + from services.ppt_vision_service import PPTVisionService + + fake_resp = MagicMock(status_code=500) + fake_resp.text = 'oops' + + with patch('services.ollama_service.resolve_ollama_host', + return_value='http://test:11434'), \ + patch('services.ollama_service.mark_unhealthy') as mock_mark, \ + patch('services.ppt_vision_service.requests.post', return_value=fake_resp): + svc = PPTVisionService() + result = svc.check_image(fake_image) + + assert result.success is False + assert 'HTTP 500' in (result.error or '') + mock_mark.assert_called_once_with('http://test:11434') + + +def test_timeout_returns_failure(fake_image, monkeypatch): + monkeypatch.setenv('PPT_VISION_ENABLED', 'true') + from services.ppt_vision_service import PPTVisionService + + import requests + with patch('services.ollama_service.resolve_ollama_host', + return_value='http://test:11434'), \ + patch('services.ollama_service.mark_unhealthy'), \ + patch('services.ppt_vision_service.requests.post', + side_effect=requests.Timeout('60s')): + svc = PPTVisionService() + result = svc.check_image(fake_image) + + assert result.success is False + assert 'timeout' in (result.error or '').lower()