#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ tests/test_openclaw_qa_routing.py OpenClaw Q&A 路由 + 品質守門 unit tests (Operation Ollama-First v5.0 — Phase 3, A7 fullstack-engineer) 涵蓋: - feature flag OPENCLAW_QA_OLLAMA_FIRST=false → 相容舊設定但仍強制 Ollama-first - flag=true + 高品質 Ollama 回應 → 直接回 Ollama 結果,不走 Gemini - flag=true + 低品質 Ollama 回應 → 升級至 Gemini,並標 fallback_to=openclaw_qa_gemini_fallback - flag=true + Ollama 呼叫失敗 → 升級至 Gemini - _is_low_quality_response 各規則:空字串 / 長度過短 / 簡體污染 / 拒答 / 流水帳 執行: pytest tests/test_openclaw_qa_routing.py -v """ import os import sys import time from types import ModuleType, SimpleNamespace from typing import Any, Dict, Optional import pytest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import services.openclaw_strategist_service as svc import services.ai_call_logger as logger_mod from services.ai_call_logger import _reset_kill_switch # ───────────────────────────────────────────────────────────────────────────── # Fixtures # ───────────────────────────────────────────────────────────────────────────── @pytest.fixture(autouse=True) def reset_state(monkeypatch): """每個測試重置 logger kill-switch + stub DB 寫入收集 ai_calls 紀錄。""" _reset_kill_switch() captured = [] def fake_write(state): captured.append({ 'caller': state.caller, 'provider': state.provider, 'model': state.model, 'status': state.status, 'fallback_to': state.fallback_to, 'error': state.error, 'meta': dict(state.meta), 'request_id': state.request_id, }) monkeypatch.setattr(logger_mod, '_write_to_db', fake_write) monkeypatch.setenv('AI_CALL_LOGGING_ENABLED', 'true') # 預設 flag=true(Ollama-first) monkeypatch.delenv('OPENCLAW_QA_OLLAMA_FIRST', raising=False) yield captured def _wait_async(captured, n=1, timeout=2.0): deadline = time.time() + timeout while time.time() < deadline: if len(captured) >= n: return True time.sleep(0.01) return False def _stub_ollama_generate( monkeypatch, *, success: bool = True, content: str = '本週 momo 業績成長 12%,建議加碼家電促銷。', error: str = 'ConnectionError: connection refused', host: str = 'http://34.87.90.216:11434', input_tokens: int = 150, output_tokens: int = 60, ): """讓 OpenClaw QA 測試走 OllamaService 介面,而非直打單一 host。""" import services.ollama_service as ollama_mod fake_resp = SimpleNamespace( success=success, content=content if success else '', model=svc.OPENCLAW_QA_OLLAMA_MODEL, error=None if success else error, total_duration=0.12, host=host, input_tokens=input_tokens if success else 0, output_tokens=output_tokens if success else 0, ) class FakeOllamaService: instances = [] def __init__(self, *args, **kwargs): self.init_args = args self.init_kwargs = kwargs self.generate_calls = [] FakeOllamaService.instances.append(self) def generate(self, **kwargs): self.generate_calls.append(kwargs) return fake_resp monkeypatch.setattr(ollama_mod, 'OllamaService', FakeOllamaService) return FakeOllamaService, fake_resp def _stub_gemini(monkeypatch, *, text: str = "Gemini 備援回覆", raise_error: bool = False): """替 google.generativeai 裝測試替身,避免單測打外部 API。""" google_module = ModuleType('google') genai_module = ModuleType('google.generativeai') class FakeGenerativeModel: def __init__(self, **kwargs): self.kwargs = kwargs def generate_content(self, user_prompt, request_options=None): if raise_error: raise RuntimeError('gemini down') return SimpleNamespace( text=text, usage_metadata=SimpleNamespace( prompt_token_count=42, candidates_token_count=16, ), ) genai_module.configure = lambda api_key=None: None genai_module.GenerativeModel = FakeGenerativeModel genai_module.types = SimpleNamespace(GenerationConfig=lambda **kwargs: kwargs) google_module.generativeai = genai_module monkeypatch.setitem(sys.modules, 'google', google_module) monkeypatch.setitem(sys.modules, 'google.generativeai', genai_module) def _stub_nim(monkeypatch, *, text: str = "NIM 備援回覆"): """替 NVIDIA NIM HTTP 呼叫裝測試替身。""" class FakeNimResponse: def raise_for_status(self): return None def json(self): return { "usage": {"prompt_tokens": 21, "completion_tokens": 9}, "choices": [{"message": {"content": text}}], } monkeypatch.setattr(svc.requests, 'post', lambda *args, **kwargs: FakeNimResponse()) # ───────────────────────────────────────────────────────────────────────────── # 1. _is_low_quality_response 純函式規則 # ───────────────────────────────────────────────────────────────────────────── class TestLowQualityRules: def test_empty_string_is_low_quality(self): assert svc._is_low_quality_response("") is True assert svc._is_low_quality_response(None) is True assert svc._is_low_quality_response(" \n ") is True def test_too_short_is_low_quality(self): # 長度 < 50 字元 → 低品質 assert svc._is_low_quality_response("你好,我是 OpenClaw") is True def test_acceptable_response_passes(self): good = ( "本週 momo 業績較上週成長 12%,主要受惠於家電與生活雜貨。\n" "建議:持續關注 PChome 競價動態,必要時調整定價策略。\n" "預估下週 momo 仍有 5-8% 成長空間。" ) assert svc._is_low_quality_response(good) is False def test_simplified_pollution_detected(self): # 句中含 >= 3 個簡體字 hint → 低品質(Qwen 繁中短板核心檢查) polluted = ( "本周业绩比上周增长,您可以关注这个产品的价格变动趋势," "我们建议处理掉滞销库存以提高资产效率" ) assert svc._is_low_quality_response(polluted) is True def test_two_simplified_chars_still_acceptable(self): # 只有 2 個簡體 hint 字(边界以下)+ 結構良好 → 仍可接受 # (避免過度敏感誤殺正常繁體回覆中混入零星簡體字的情境) text = ( "本週 momo 业绩成長明顯,建議持續關注競品動向。\n" "重點品類:家電、3C、生活雜貨。\n" "下週可加碼促銷檔期。" ) assert svc._is_low_quality_response(text) is False def test_refusal_pattern_detected(self): for refusal in ['無法回答', '我不知道', '抱歉,我無法協助']: text = f"關於這個問題,{refusal},請改問其他內容以便我協助您。" assert svc._is_low_quality_response(text) is True, f"應被判定為拒答:{refusal}" def test_flowing_text_no_breaks_is_low_quality(self): # 200+ 字無換行 → 流水帳 text = "本週業績整體呈現上升趨勢。" * 20 # ~200+ 字 assert "\n" not in text assert len(text) > 200 assert svc._is_low_quality_response(text) is True def test_long_text_with_breaks_is_acceptable(self): # 200+ 字但有適度斷行 → 結構良好 text = ( "本週業績整體呈現上升趨勢,主要驅動類別為家電與生活雜貨大類別。\n" "競品動向:PChome 在 3C 類發動大規模補貼戰,預估壓縮我方 3 至 5 個百分點毛利率。\n" "蝦皮也在母嬰用品加碼免運券促銷,需密切觀察跟降節奏,避免市占下滑。\n" "建議行動:(1) 加碼家電促銷檔期,重點操作大尺寸電視與廚房家電," "(2) 觀察 PChome 補貼是否延續至下週,準備二段反擊方案," "(3) 對價差大於 5% 的 SKU 主動啟動 EA 流程,避免毛利持續流失。" ) assert len(text) > 200 assert svc._is_low_quality_response(text) is False # ───────────────────────────────────────────────────────────────────────────── # 2. Routing:feature flag = false 時也不可 Gemini-first # ───────────────────────────────────────────────────────────────────────────── class TestFlagOff: def test_flag_false_still_routes_to_ollama_first(self, monkeypatch, reset_state): """flag=false → 舊緊急退路已關閉,仍必須先走 Ollama。""" monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'false') legacy_called = {'count': 0} ollama_called = {'count': 0} def fake_legacy(q, ctx, request_id=None): legacy_called['count'] += 1 return "[legacy gemini reply]" def fake_ollama(q, ctx, rid): ollama_called['count'] += 1 return "[ollama reply with enough length to pass quality gate。這是一段繁體中文策略回覆,避免被判定過短。]" monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) monkeypatch.setattr(svc, '_call_qwen3_qa', fake_ollama) result = svc.generate_strategy_response("本週業績如何?") assert result.startswith("[ollama reply") assert legacy_called['count'] == 0 assert ollama_called['count'] == 1 class TestOpenClawReportRouting: def test_report_llm_uses_ollama_before_gemini(self, monkeypatch): calls = [] def fake_ollama(system_prompt, user_prompt, **kwargs): calls.append(("ollama", kwargs["caller"])) return "Ollama 報告內容足夠完整" def fake_gemini(*args, **kwargs): raise AssertionError("Gemini must not run when Ollama succeeds") monkeypatch.setattr(svc, "_call_ollama_strategy", fake_ollama) monkeypatch.setattr(svc, "_call_gemini", fake_gemini) result = svc._call_openclaw_llm_ollama_first( "system", "user", temperature=0.3, caller="openclaw_weekly", ) assert result == "Ollama 報告內容足夠完整" assert calls == [("ollama", "openclaw_weekly")] def test_report_llm_disables_111_for_long_strategy_reports(self, monkeypatch, reset_state): FakeOllamaService, _fake_resp = _stub_ollama_generate( monkeypatch, content="OpenClaw 報告內容足夠完整,並且只允許 GCP-A/GCP-B 承接長報告。", ) result = svc._call_ollama_strategy( "system", "user", temperature=0.3, caller="openclaw_meta", num_predict=3072, ) assert result.startswith("OpenClaw 報告內容") assert FakeOllamaService.instances call_kwargs = FakeOllamaService.instances[-1].generate_calls[-1] assert call_kwargs["allow_111_fallback"] is False def test_report_llm_gemini_is_suffix_fallback_only(self, monkeypatch): monkeypatch.setenv("GEMINI_API_HARD_DISABLED", "false") monkeypatch.setenv("GEMINI_FALLBACK_ENABLED", "true") monkeypatch.setenv("GEMINI_API_KEY", "test-key") calls = [] monkeypatch.setattr(svc, "_call_ollama_strategy", lambda *a, **kw: None) def fake_gemini(system_prompt, user_prompt, **kwargs): calls.append(("gemini", kwargs["caller"])) return "Gemini fallback content" monkeypatch.setattr(svc, "_call_gemini", fake_gemini) result = svc._call_openclaw_llm_ollama_first( "system", "user", temperature=0.3, caller="openclaw_monthly", ) assert result == "Gemini fallback content" assert calls == [("gemini", "openclaw_monthly_gemini_fallback")] def test_flag_unset_defaults_to_ollama_first(self, monkeypatch, reset_state): """環境變數完全未設 → 預設 true → 先走 Ollama。""" monkeypatch.delenv('OPENCLAW_QA_OLLAMA_FIRST', raising=False) legacy_called = {'count': 0} ollama_called = {'count': 0} def fake_legacy(q, ctx, request_id=None): legacy_called['count'] += 1 return "[legacy reply]" def fake_ollama(q, ctx, rid): ollama_called['count'] += 1 return ( "Ollama 主路徑已接手競品分析。建議先檢查近七日價差、銷售跌幅、" "PChome 優勢品項與高毛利 SKU,再依 HIGH/MED/LOW 分層處理。" "若價差超過 15% 且銷售下滑超過 20%,應優先送人工覆核與 Telegram 告警。" ) monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) monkeypatch.setattr(svc, '_call_qwen3_qa', fake_ollama) result = svc.generate_strategy_response("競品分析") assert ollama_called['count'] == 1 assert legacy_called['count'] == 0 assert result.startswith("Ollama 主路徑已接手") def test_empty_query_short_circuits(self, monkeypatch, reset_state): """空 query 不應觸發任何 LLM 呼叫。""" monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') legacy_called = {'count': 0} ollama_called = {'count': 0} monkeypatch.setattr(svc, '_legacy_gemini_first_qa', lambda q, ctx, request_id=None: legacy_called.update({'count': legacy_called['count']+1}) or "") monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: ollama_called.update({'count': ollama_called['count']+1}) or "") out = svc.generate_strategy_response("") assert "請輸入您的問題" in out assert legacy_called['count'] == 0 assert ollama_called['count'] == 0 # ───────────────────────────────────────────────────────────────────────────── # 3. Routing:feature flag = true + Ollama 高/低品質 # ───────────────────────────────────────────────────────────────────────────── class TestFlagOn: def test_flag_true_high_quality_returns_ollama(self, monkeypatch, reset_state): """flag=true + Ollama 回高品質 → 直接回 Ollama,不走 Gemini。""" monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') legacy_called = {'count': 0} good_reply = ( "本週 momo 業績成長 12%,主要驅動類別為家電。\n" "建議:持續關注 PChome 競價並加碼家電促銷檔期。\n" "下週預估仍有 5-8% 成長空間。" ) monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: good_reply) def fake_legacy(q, ctx, request_id=None): legacy_called['count'] += 1 return "[gemini fallback]" monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) out = svc.generate_strategy_response("本週業績如何?") assert out == good_reply assert legacy_called['count'] == 0 # Gemini 沒被呼叫 def test_flag_true_low_quality_falls_back_to_gemini(self, monkeypatch, reset_state): """flag=true + Ollama 回低品質(簡體污染)→ fallback Gemini。""" monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') bad_reply = "本周业绩增长,您可以关注这个产品的价格变动,我们建议处理库存" legacy_called = {'count': 0} monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: bad_reply) def fake_legacy(q, ctx, request_id=None): legacy_called['count'] += 1 return "[gemini high quality reply]" monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) out = svc.generate_strategy_response("本週業績如何?") assert out == "[gemini high quality reply]" assert legacy_called['count'] == 1 def test_flag_true_ollama_returns_none_falls_back(self, monkeypatch, reset_state): """flag=true + Ollama 呼叫失敗(回 None)→ fallback Gemini。""" monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') legacy_called = {'count': 0} monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: None) def fake_legacy(q, ctx, request_id=None): legacy_called['count'] += 1 return "[gemini reply after ollama down]" monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) out = svc.generate_strategy_response("test") assert out == "[gemini reply after ollama down]" assert legacy_called['count'] == 1 # ───────────────────────────────────────────────────────────────────────────── # 4. _call_qwen3_qa: ai_call_logger 整合 + fallback_to 標記 # ───────────────────────────────────────────────────────────────────────────── class TestCallQwen3Telemetry: def test_qwen3_logs_ok_status_on_success(self, monkeypatch, reset_state): """高品質回應 → ai_calls 應記 status=ok, caller=openclaw_qa, provider=gcp_ollama""" captured = reset_state fake_service, _fake_resp = _stub_ollama_generate(monkeypatch) result = svc._call_qwen3_qa("本週業績?", None, "qa-test123") assert result is not None assert "業績成長" in result assert fake_service.instances generate_kwargs = fake_service.instances[0].generate_calls[0] assert generate_kwargs['model'] == svc.OPENCLAW_QA_OLLAMA_MODEL assert generate_kwargs['system_prompt'] == svc.QWEN3_TC_SYSTEM_PROMPT assert _wait_async(captured, 1) assert len(captured) == 1 rec = captured[0] assert rec['caller'] == 'openclaw_qa' assert rec['provider'] == 'gcp_ollama' assert rec['model'] == svc.OPENCLAW_QA_OLLAMA_MODEL assert rec['status'] == 'ok' assert rec['fallback_to'] is None assert rec['meta'].get('flag') == 'OPENCLAW_QA_OLLAMA_FIRST' assert rec['meta'].get('route') == 'ollama_first' assert rec['meta'].get('host') == 'http://34.87.90.216:11434' assert rec['meta'].get('host_label') == 'GCP-SSD' assert rec['request_id'] == "qa-test123" def test_qwen3_logs_actual_secondary_provider_after_retry(self, monkeypatch, reset_state): """OllamaService 若落到 GCP-B,ai_calls.provider 必須寫 ollama_secondary。""" captured = reset_state _stub_ollama_generate( monkeypatch, host='http://34.21.145.224:11434', ) result = svc._call_qwen3_qa("本週業績?", None, "qa-secondary") assert result is not None assert _wait_async(captured, 1) rec = captured[0] assert rec['provider'] == 'ollama_secondary' assert rec['meta'].get('host_label') == 'GCP-SSD-2' def test_qwen3_logs_fallback_on_exception(self, monkeypatch, reset_state): """Ollama 連線失敗 → ai_calls 應記 fallback_to=openclaw_qa_gemini_fallback + status=fallback""" captured = reset_state _stub_ollama_generate(monkeypatch, success=False) result = svc._call_qwen3_qa("test", None, "qa-fail123") assert result is None assert _wait_async(captured, 1) rec = captured[0] assert rec['status'] == 'fallback' assert rec['fallback_to'] == 'openclaw_qa_gemini_fallback' assert rec['error'] is not None assert 'ConnectionError' in rec['error'] def test_qwen3_logs_fallback_on_empty_response(self, monkeypatch, reset_state): """Ollama 回空 response → 視為 empty_response,標 fallback。""" captured = reset_state _stub_ollama_generate( monkeypatch, content='', input_tokens=100, output_tokens=0, ) result = svc._call_qwen3_qa("test", None, "qa-empty") assert result is None assert _wait_async(captured, 1) rec = captured[0] assert rec['status'] == 'fallback' assert rec['fallback_to'] == 'openclaw_qa_gemini_fallback' assert rec['error'] == 'empty_response' # ───────────────────────────────────────────────────────────────────────────── # 5. Gemini / NIM 備援遙測:Gemini 不可被誤記為 openclaw_qa 主路徑 # ───────────────────────────────────────────────────────────────────────────── class TestLegacyFallbackTelemetry: def test_gemini_backup_uses_dedicated_caller(self, monkeypatch, reset_state): """Ollama 後的 Gemini 備援應記 openclaw_qa_gemini_fallback,不污染 openclaw_qa。""" captured = reset_state monkeypatch.setenv('GEMINI_API_HARD_DISABLED', 'false') monkeypatch.setenv('GEMINI_FALLBACK_ENABLED', 'true') monkeypatch.setenv('GEMINI_API_KEY', 'test-gemini-key') monkeypatch.setattr(svc, 'NVIDIA_API_KEY', '') _stub_gemini(monkeypatch, text="Gemini 備援:請先檢查近七日業績與競品價差。") out = svc._legacy_gemini_first_qa("本週業績如何?", None, request_id="qa-gemini") assert out.startswith("Gemini 備援") assert _wait_async(captured, 1) assert len(captured) == 1 rec = captured[0] assert rec['caller'] == 'openclaw_qa_gemini_fallback' assert rec['provider'] == 'gemini' assert rec['status'] == 'ok' assert rec['fallback_to'] is None assert not any( r['caller'] == 'openclaw_qa' and r['provider'] == 'gemini' for r in captured ) def test_gemini_backup_failure_falls_to_standard_nim_caller(self, monkeypatch, reset_state): """Gemini 備援失敗後,NIM 應記 openclaw_qa_nim,而非 fallback_fallback_nim。""" captured = reset_state monkeypatch.setenv('GEMINI_API_HARD_DISABLED', 'false') monkeypatch.setenv('GEMINI_FALLBACK_ENABLED', 'true') monkeypatch.setenv('GEMINI_API_KEY', 'test-gemini-key') monkeypatch.setattr(svc, 'NVIDIA_API_KEY', 'test-nim-key') _stub_gemini(monkeypatch, raise_error=True) _stub_nim(monkeypatch, text="NIM 備援:請改看 /daily 與 /threats。") out = svc._legacy_gemini_first_qa("本週業績如何?", None, request_id="qa-nim") assert out.startswith("NIM 備援") assert _wait_async(captured, 2) assert [r['caller'] for r in captured] == [ 'openclaw_qa_gemini_fallback', 'openclaw_qa_nim', ] gemini_rec, nim_rec = captured assert gemini_rec['provider'] == 'gemini' assert gemini_rec['status'] == 'fallback' assert gemini_rec['fallback_to'] == 'openclaw_qa_nim' assert nim_rec['provider'] == 'nim' assert nim_rec['status'] == 'ok' # ───────────────────────────────────────────────────────────────────────────── # 6. 環境變數讀取即時性(runtime toggle) # ───────────────────────────────────────────────────────────────────────────── class TestRuntimeToggle: def test_flag_helper_reads_env_each_call(self, monkeypatch): """_qa_ollama_first_enabled() 應每次重讀 env,允許 runtime 灰度切換。""" monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'false') assert svc._qa_ollama_first_enabled() is False monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') assert svc._qa_ollama_first_enabled() is True # 各種真值字串 for v in ('TRUE', 'True', '1', 'yes', 'on'): monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', v) assert svc._qa_ollama_first_enabled() is True, f"應視為 true: {v!r}" for v in ('false', '0', 'no', 'off', '', 'foo'): monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', v) assert svc._qa_ollama_first_enabled() is False, f"應視為 false: {v!r}"