#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ tests/test_openclaw_qa_routing.py OpenClaw Q&A 路由 + 品質守門 unit tests (Operation Ollama-First v5.0 — Phase 3, A7 fullstack-engineer) 涵蓋: - feature flag OPENCLAW_QA_OLLAMA_FIRST=false → 走 Gemini-first(regression test) - flag=true + 高品質 Ollama 回應 → 直接回 Ollama 結果,不走 Gemini - flag=true + 低品質 Ollama 回應 → 升級至 Gemini,並標 fallback_to=openclaw_qa_gemini_fallback - flag=true + Ollama 呼叫失敗 → 升級至 Gemini - _is_low_quality_response 各規則:空字串 / 長度過短 / 簡體污染 / 拒答 / 流水帳 執行: pytest tests/test_openclaw_qa_routing.py -v """ import os import sys import time from typing import Any, Dict, Optional import pytest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import services.openclaw_strategist_service as svc import services.ai_call_logger as logger_mod from services.ai_call_logger import _reset_kill_switch # ───────────────────────────────────────────────────────────────────────────── # Fixtures # ───────────────────────────────────────────────────────────────────────────── @pytest.fixture(autouse=True) def reset_state(monkeypatch): """每個測試重置 logger kill-switch + stub DB 寫入收集 ai_calls 紀錄。""" _reset_kill_switch() captured = [] def fake_write(state): captured.append({ 'caller': state.caller, 'provider': state.provider, 'model': state.model, 'status': state.status, 'fallback_to': state.fallback_to, 'error': state.error, 'meta': dict(state.meta), 'request_id': state.request_id, }) monkeypatch.setattr(logger_mod, '_write_to_db', fake_write) monkeypatch.setenv('AI_CALL_LOGGING_ENABLED', 'true') # 預設 flag=false(戰前行為) monkeypatch.delenv('OPENCLAW_QA_OLLAMA_FIRST', raising=False) yield captured def _wait_async(captured, n=1, timeout=2.0): deadline = time.time() + timeout while time.time() < deadline: if len(captured) >= n: return True time.sleep(0.01) return False # ───────────────────────────────────────────────────────────────────────────── # 1. _is_low_quality_response 純函式規則 # ───────────────────────────────────────────────────────────────────────────── class TestLowQualityRules: def test_empty_string_is_low_quality(self): assert svc._is_low_quality_response("") is True assert svc._is_low_quality_response(None) is True assert svc._is_low_quality_response(" \n ") is True def test_too_short_is_low_quality(self): # 長度 < 50 字元 → 低品質 assert svc._is_low_quality_response("你好,我是 OpenClaw") is True def test_acceptable_response_passes(self): good = ( "本週 momo 業績較上週成長 12%,主要受惠於家電與生活雜貨。\n" "建議:持續關注 PChome 競價動態,必要時調整定價策略。\n" "預估下週 momo 仍有 5-8% 成長空間。" ) assert svc._is_low_quality_response(good) is False def test_simplified_pollution_detected(self): # 句中含 >= 3 個簡體字 hint → 低品質(Qwen 繁中短板核心檢查) polluted = ( "本周业绩比上周增长,您可以关注这个产品的价格变动趋势," "我们建议处理掉滞销库存以提高资产效率" ) assert svc._is_low_quality_response(polluted) is True def test_two_simplified_chars_still_acceptable(self): # 只有 2 個簡體 hint 字(边界以下)+ 結構良好 → 仍可接受 # (避免過度敏感誤殺正常繁體回覆中混入零星簡體字的情境) text = ( "本週 momo 业绩成長明顯,建議持續關注競品動向。\n" "重點品類:家電、3C、生活雜貨。\n" "下週可加碼促銷檔期。" ) assert svc._is_low_quality_response(text) is False def test_refusal_pattern_detected(self): for refusal in ['無法回答', '我不知道', '抱歉,我無法協助']: text = f"關於這個問題,{refusal},請改問其他內容以便我協助您。" assert svc._is_low_quality_response(text) is True, f"應被判定為拒答:{refusal}" def test_flowing_text_no_breaks_is_low_quality(self): # 200+ 字無換行 → 流水帳 text = "本週業績整體呈現上升趨勢。" * 20 # ~200+ 字 assert "\n" not in text assert len(text) > 200 assert svc._is_low_quality_response(text) is True def test_long_text_with_breaks_is_acceptable(self): # 200+ 字但有適度斷行 → 結構良好 text = ( "本週業績整體呈現上升趨勢,主要驅動類別為家電與生活雜貨大類別。\n" "競品動向:PChome 在 3C 類發動大規模補貼戰,預估壓縮我方 3 至 5 個百分點毛利率。\n" "蝦皮也在母嬰用品加碼免運券促銷,需密切觀察跟降節奏,避免市占下滑。\n" "建議行動:(1) 加碼家電促銷檔期,重點操作大尺寸電視與廚房家電," "(2) 觀察 PChome 補貼是否延續至下週,準備二段反擊方案," "(3) 對價差大於 5% 的 SKU 主動啟動 EA 流程,避免毛利持續流失。" ) assert len(text) > 200 assert svc._is_low_quality_response(text) is False # ───────────────────────────────────────────────────────────────────────────── # 2. Routing:feature flag = false 時維持 Gemini-first 路徑(regression) # ───────────────────────────────────────────────────────────────────────────── class TestFlagOff: def test_flag_false_routes_to_legacy(self, monkeypatch, reset_state): """flag=false(預設)→ 不應該呼叫 _call_qwen3_qa,直接走 _legacy_gemini_first_qa。""" monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'false') legacy_called = {'count': 0} ollama_called = {'count': 0} def fake_legacy(q, ctx, request_id=None): legacy_called['count'] += 1 return "[legacy gemini reply]" def fake_ollama(q, ctx, rid): ollama_called['count'] += 1 return "[should not be called]" monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) monkeypatch.setattr(svc, '_call_qwen3_qa', fake_ollama) result = svc.generate_strategy_response("本週業績如何?") assert result == "[legacy gemini reply]" assert legacy_called['count'] == 1 assert ollama_called['count'] == 0 def test_flag_unset_defaults_to_off(self, monkeypatch, reset_state): """環境變數完全未設 → 預設 false → 走 legacy。""" monkeypatch.delenv('OPENCLAW_QA_OLLAMA_FIRST', raising=False) legacy_called = {'count': 0} def fake_legacy(q, ctx, request_id=None): legacy_called['count'] += 1 return "[legacy reply]" monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) # 不 stub _call_qwen3_qa;如果意外被呼叫會打到真網路 → fail result = svc.generate_strategy_response("競品分析") assert legacy_called['count'] == 1 assert result == "[legacy reply]" def test_empty_query_short_circuits(self, monkeypatch, reset_state): """空 query 不應觸發任何 LLM 呼叫。""" monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') legacy_called = {'count': 0} ollama_called = {'count': 0} monkeypatch.setattr(svc, '_legacy_gemini_first_qa', lambda q, ctx, request_id=None: legacy_called.update({'count': legacy_called['count']+1}) or "") monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: ollama_called.update({'count': ollama_called['count']+1}) or "") out = svc.generate_strategy_response("") assert "請輸入您的問題" in out assert legacy_called['count'] == 0 assert ollama_called['count'] == 0 # ───────────────────────────────────────────────────────────────────────────── # 3. Routing:feature flag = true + Ollama 高/低品質 # ───────────────────────────────────────────────────────────────────────────── class TestFlagOn: def test_flag_true_high_quality_returns_ollama(self, monkeypatch, reset_state): """flag=true + Ollama 回高品質 → 直接回 Ollama,不走 Gemini。""" monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') legacy_called = {'count': 0} good_reply = ( "本週 momo 業績成長 12%,主要驅動類別為家電。\n" "建議:持續關注 PChome 競價並加碼家電促銷檔期。\n" "下週預估仍有 5-8% 成長空間。" ) monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: good_reply) def fake_legacy(q, ctx, request_id=None): legacy_called['count'] += 1 return "[gemini fallback]" monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) out = svc.generate_strategy_response("本週業績如何?") assert out == good_reply assert legacy_called['count'] == 0 # Gemini 沒被呼叫 def test_flag_true_low_quality_falls_back_to_gemini(self, monkeypatch, reset_state): """flag=true + Ollama 回低品質(簡體污染)→ fallback Gemini。""" monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') bad_reply = "本周业绩增长,您可以关注这个产品的价格变动,我们建议处理库存" legacy_called = {'count': 0} monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: bad_reply) def fake_legacy(q, ctx, request_id=None): legacy_called['count'] += 1 return "[gemini high quality reply]" monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) out = svc.generate_strategy_response("本週業績如何?") assert out == "[gemini high quality reply]" assert legacy_called['count'] == 1 def test_flag_true_ollama_returns_none_falls_back(self, monkeypatch, reset_state): """flag=true + Ollama 呼叫失敗(回 None)→ fallback Gemini。""" monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') legacy_called = {'count': 0} monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: None) def fake_legacy(q, ctx, request_id=None): legacy_called['count'] += 1 return "[gemini reply after ollama down]" monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) out = svc.generate_strategy_response("test") assert out == "[gemini reply after ollama down]" assert legacy_called['count'] == 1 # ───────────────────────────────────────────────────────────────────────────── # 4. _call_qwen3_qa: ai_call_logger 整合 + fallback_to 標記 # ───────────────────────────────────────────────────────────────────────────── class TestCallQwen3Telemetry: def test_qwen3_logs_ok_status_on_success(self, monkeypatch, reset_state): """高品質回應 → ai_calls 應記 status=ok, caller=openclaw_qa, provider=gcp_ollama""" captured = reset_state class FakeResp: status_code = 200 def raise_for_status(self): pass def json(self): return { 'response': '本週 momo 業績成長 12%,建議加碼家電促銷。', 'prompt_eval_count': 150, 'eval_count': 60, } monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp()) result = svc._call_qwen3_qa("本週業績?", None, "qa-test123") assert result is not None assert "業績成長" in result assert _wait_async(captured, 1) assert len(captured) == 1 rec = captured[0] assert rec['caller'] == 'openclaw_qa' assert rec['provider'] == 'gcp_ollama' assert rec['model'] == svc.OPENCLAW_QA_OLLAMA_MODEL assert rec['status'] == 'ok' assert rec['fallback_to'] is None assert rec['meta'].get('flag') == 'OPENCLAW_QA_OLLAMA_FIRST' assert rec['request_id'] == "qa-test123" def test_qwen3_logs_fallback_on_exception(self, monkeypatch, reset_state): """Ollama 連線失敗 → ai_calls 應記 fallback_to=openclaw_qa_gemini_fallback + status=fallback""" captured = reset_state def boom(*a, **kw): raise svc.requests.ConnectionError("connection refused") monkeypatch.setattr(svc.requests, 'post', boom) result = svc._call_qwen3_qa("test", None, "qa-fail123") assert result is None assert _wait_async(captured, 1) rec = captured[0] assert rec['status'] == 'fallback' assert rec['fallback_to'] == 'openclaw_qa_gemini_fallback' assert rec['error'] is not None assert 'ConnectionError' in rec['error'] def test_qwen3_logs_fallback_on_empty_response(self, monkeypatch, reset_state): """Ollama 回空 response → 視為 empty_response,標 fallback。""" captured = reset_state class FakeResp: status_code = 200 def raise_for_status(self): pass def json(self): return {'response': '', 'prompt_eval_count': 100, 'eval_count': 0} monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp()) result = svc._call_qwen3_qa("test", None, "qa-empty") assert result is None assert _wait_async(captured, 1) rec = captured[0] assert rec['status'] == 'fallback' assert rec['fallback_to'] == 'openclaw_qa_gemini_fallback' assert rec['error'] == 'empty_response' # ───────────────────────────────────────────────────────────────────────────── # 5. 環境變數讀取即時性(runtime toggle) # ───────────────────────────────────────────────────────────────────────────── class TestRuntimeToggle: def test_flag_helper_reads_env_each_call(self, monkeypatch): """_qa_ollama_first_enabled() 應每次重讀 env,允許 runtime 灰度切換。""" monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'false') assert svc._qa_ollama_first_enabled() is False monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') assert svc._qa_ollama_first_enabled() is True # 各種真值字串 for v in ('TRUE', 'True', '1', 'yes', 'on'): monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', v) assert svc._qa_ollama_first_enabled() is True, f"應視為 true: {v!r}" for v in ('false', '0', 'no', 'off', '', 'foo'): monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', v) assert svc._qa_ollama_first_enabled() is False, f"應視為 false: {v!r}"