Files
ewoooc/tests/test_openclaw_qa_routing.py
2026-05-21 14:45:32 +08:00

569 lines
25 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
tests/test_openclaw_qa_routing.py
OpenClaw Q&A 路由 + 品質守門 unit tests
(Operation Ollama-First v5.0 — Phase 3, A7 fullstack-engineer)
涵蓋:
- feature flag OPENCLAW_QA_OLLAMA_FIRST=false → 相容舊設定但仍強制 Ollama-first
- flag=true + 高品質 Ollama 回應 → 直接回 Ollama 結果,不走 Gemini
- flag=true + 低品質 Ollama 回應 → 升級至 Gemini並標 fallback_to=openclaw_qa_gemini_fallback
- flag=true + Ollama 呼叫失敗 → 升級至 Gemini
- _is_low_quality_response 各規則:空字串 / 長度過短 / 簡體污染 / 拒答 / 流水帳
執行:
pytest tests/test_openclaw_qa_routing.py -v
"""
import os
import sys
import time
from types import ModuleType, SimpleNamespace
from typing import Any, Dict, Optional
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import services.openclaw_strategist_service as svc
import services.ai_call_logger as logger_mod
from services.ai_call_logger import _reset_kill_switch
# ─────────────────────────────────────────────────────────────────────────────
# Fixtures
# ─────────────────────────────────────────────────────────────────────────────
@pytest.fixture(autouse=True)
def reset_state(monkeypatch):
"""每個測試重置 logger kill-switch + stub DB 寫入收集 ai_calls 紀錄。"""
_reset_kill_switch()
captured = []
def fake_write(state):
captured.append({
'caller': state.caller,
'provider': state.provider,
'model': state.model,
'status': state.status,
'fallback_to': state.fallback_to,
'error': state.error,
'meta': dict(state.meta),
'request_id': state.request_id,
})
monkeypatch.setattr(logger_mod, '_write_to_db', fake_write)
monkeypatch.setenv('AI_CALL_LOGGING_ENABLED', 'true')
# 預設 flag=trueOllama-first
monkeypatch.delenv('OPENCLAW_QA_OLLAMA_FIRST', raising=False)
yield captured
def _wait_async(captured, n=1, timeout=2.0):
deadline = time.time() + timeout
while time.time() < deadline:
if len(captured) >= n:
return True
time.sleep(0.01)
return False
def _stub_ollama_generate(
monkeypatch,
*,
success: bool = True,
content: str = '本週 momo 業績成長 12%,建議加碼家電促銷。',
error: str = 'ConnectionError: connection refused',
host: str = 'http://34.143.170.20:11434',
input_tokens: int = 150,
output_tokens: int = 60,
):
"""讓 OpenClaw QA 測試走 OllamaService 介面,而非直打單一 host。"""
import services.ollama_service as ollama_mod
fake_resp = SimpleNamespace(
success=success,
content=content if success else '',
model=svc.OPENCLAW_QA_OLLAMA_MODEL,
error=None if success else error,
total_duration=0.12,
host=host,
input_tokens=input_tokens if success else 0,
output_tokens=output_tokens if success else 0,
)
class FakeOllamaService:
instances = []
def __init__(self, *args, **kwargs):
self.init_args = args
self.init_kwargs = kwargs
self.generate_calls = []
FakeOllamaService.instances.append(self)
def generate(self, **kwargs):
self.generate_calls.append(kwargs)
return fake_resp
monkeypatch.setattr(ollama_mod, 'OllamaService', FakeOllamaService)
return FakeOllamaService, fake_resp
def _stub_gemini(monkeypatch, *, text: str = "Gemini 備援回覆", raise_error: bool = False):
"""替 google.generativeai 裝測試替身,避免單測打外部 API。"""
google_module = ModuleType('google')
genai_module = ModuleType('google.generativeai')
class FakeGenerativeModel:
def __init__(self, **kwargs):
self.kwargs = kwargs
def generate_content(self, user_prompt, request_options=None):
if raise_error:
raise RuntimeError('gemini down')
return SimpleNamespace(
text=text,
usage_metadata=SimpleNamespace(
prompt_token_count=42,
candidates_token_count=16,
),
)
genai_module.configure = lambda api_key=None: None
genai_module.GenerativeModel = FakeGenerativeModel
genai_module.types = SimpleNamespace(GenerationConfig=lambda **kwargs: kwargs)
google_module.generativeai = genai_module
monkeypatch.setitem(sys.modules, 'google', google_module)
monkeypatch.setitem(sys.modules, 'google.generativeai', genai_module)
def _stub_nim(monkeypatch, *, text: str = "NIM 備援回覆"):
"""替 NVIDIA NIM HTTP 呼叫裝測試替身。"""
class FakeNimResponse:
def raise_for_status(self):
return None
def json(self):
return {
"usage": {"prompt_tokens": 21, "completion_tokens": 9},
"choices": [{"message": {"content": text}}],
}
monkeypatch.setattr(svc.requests, 'post', lambda *args, **kwargs: FakeNimResponse())
# ─────────────────────────────────────────────────────────────────────────────
# 1. _is_low_quality_response 純函式規則
# ─────────────────────────────────────────────────────────────────────────────
class TestLowQualityRules:
def test_empty_string_is_low_quality(self):
assert svc._is_low_quality_response("") is True
assert svc._is_low_quality_response(None) is True
assert svc._is_low_quality_response(" \n ") is True
def test_too_short_is_low_quality(self):
# 長度 < 50 字元 → 低品質
assert svc._is_low_quality_response("你好,我是 OpenClaw") is True
def test_acceptable_response_passes(self):
good = (
"本週 momo 業績較上週成長 12%,主要受惠於家電與生活雜貨。\n"
"建議:持續關注 PChome 競價動態,必要時調整定價策略。\n"
"預估下週 momo 仍有 5-8% 成長空間。"
)
assert svc._is_low_quality_response(good) is False
def test_simplified_pollution_detected(self):
# 句中含 >= 3 個簡體字 hint → 低品質Qwen 繁中短板核心檢查)
polluted = (
"本周业绩比上周增长,您可以关注这个产品的价格变动趋势,"
"我们建议处理掉滞销库存以提高资产效率"
)
assert svc._is_low_quality_response(polluted) is True
def test_two_simplified_chars_still_acceptable(self):
# 只有 2 個簡體 hint 字(边界以下)+ 結構良好 → 仍可接受
# (避免過度敏感誤殺正常繁體回覆中混入零星簡體字的情境)
text = (
"本週 momo 业绩成長明顯,建議持續關注競品動向。\n"
"重點品類家電、3C、生活雜貨。\n"
"下週可加碼促銷檔期。"
)
assert svc._is_low_quality_response(text) is False
def test_refusal_pattern_detected(self):
for refusal in ['無法回答', '我不知道', '抱歉,我無法協助']:
text = f"關於這個問題,{refusal},請改問其他內容以便我協助您。"
assert svc._is_low_quality_response(text) is True, f"應被判定為拒答:{refusal}"
def test_flowing_text_no_breaks_is_low_quality(self):
# 200+ 字無換行 → 流水帳
text = "本週業績整體呈現上升趨勢。" * 20 # ~200+ 字
assert "\n" not in text
assert len(text) > 200
assert svc._is_low_quality_response(text) is True
def test_long_text_with_breaks_is_acceptable(self):
# 200+ 字但有適度斷行 → 結構良好
text = (
"本週業績整體呈現上升趨勢,主要驅動類別為家電與生活雜貨大類別。\n"
"競品動向PChome 在 3C 類發動大規模補貼戰,預估壓縮我方 3 至 5 個百分點毛利率。\n"
"蝦皮也在母嬰用品加碼免運券促銷,需密切觀察跟降節奏,避免市占下滑。\n"
"建議行動:(1) 加碼家電促銷檔期,重點操作大尺寸電視與廚房家電,"
"(2) 觀察 PChome 補貼是否延續至下週,準備二段反擊方案,"
"(3) 對價差大於 5% 的 SKU 主動啟動 EA 流程,避免毛利持續流失。"
)
assert len(text) > 200
assert svc._is_low_quality_response(text) is False
# ─────────────────────────────────────────────────────────────────────────────
# 2. Routingfeature flag = false 時也不可 Gemini-first
# ─────────────────────────────────────────────────────────────────────────────
class TestFlagOff:
def test_flag_false_still_routes_to_ollama_first(self, monkeypatch, reset_state):
"""flag=false → 舊緊急退路已關閉,仍必須先走 Ollama。"""
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'false')
legacy_called = {'count': 0}
ollama_called = {'count': 0}
def fake_legacy(q, ctx, request_id=None):
legacy_called['count'] += 1
return "[legacy gemini reply]"
def fake_ollama(q, ctx, rid):
ollama_called['count'] += 1
return "[ollama reply with enough length to pass quality gate。這是一段繁體中文策略回覆避免被判定過短。]"
monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
monkeypatch.setattr(svc, '_call_qwen3_qa', fake_ollama)
result = svc.generate_strategy_response("本週業績如何?")
assert result.startswith("[ollama reply")
assert legacy_called['count'] == 0
assert ollama_called['count'] == 1
class TestOpenClawReportRouting:
def test_report_llm_uses_ollama_before_gemini(self, monkeypatch):
calls = []
def fake_ollama(system_prompt, user_prompt, **kwargs):
calls.append(("ollama", kwargs["caller"]))
return "Ollama 報告內容足夠完整"
def fake_gemini(*args, **kwargs):
raise AssertionError("Gemini must not run when Ollama succeeds")
monkeypatch.setattr(svc, "_call_ollama_strategy", fake_ollama)
monkeypatch.setattr(svc, "_call_gemini", fake_gemini)
result = svc._call_openclaw_llm_ollama_first(
"system",
"user",
temperature=0.3,
caller="openclaw_weekly",
)
assert result == "Ollama 報告內容足夠完整"
assert calls == [("ollama", "openclaw_weekly")]
def test_report_llm_gemini_is_suffix_fallback_only(self, monkeypatch):
monkeypatch.setenv("GEMINI_API_HARD_DISABLED", "false")
monkeypatch.setenv("GEMINI_FALLBACK_ENABLED", "true")
monkeypatch.setenv("GEMINI_API_KEY", "test-key")
calls = []
monkeypatch.setattr(svc, "_call_ollama_strategy", lambda *a, **kw: None)
def fake_gemini(system_prompt, user_prompt, **kwargs):
calls.append(("gemini", kwargs["caller"]))
return "Gemini fallback content"
monkeypatch.setattr(svc, "_call_gemini", fake_gemini)
result = svc._call_openclaw_llm_ollama_first(
"system",
"user",
temperature=0.3,
caller="openclaw_monthly",
)
assert result == "Gemini fallback content"
assert calls == [("gemini", "openclaw_monthly_gemini_fallback")]
def test_flag_unset_defaults_to_ollama_first(self, monkeypatch, reset_state):
"""環境變數完全未設 → 預設 true → 先走 Ollama。"""
monkeypatch.delenv('OPENCLAW_QA_OLLAMA_FIRST', raising=False)
legacy_called = {'count': 0}
ollama_called = {'count': 0}
def fake_legacy(q, ctx, request_id=None):
legacy_called['count'] += 1
return "[legacy reply]"
def fake_ollama(q, ctx, rid):
ollama_called['count'] += 1
return (
"Ollama 主路徑已接手競品分析。建議先檢查近七日價差、銷售跌幅、"
"PChome 優勢品項與高毛利 SKU再依 HIGH/MED/LOW 分層處理。"
"若價差超過 15% 且銷售下滑超過 20%,應優先送人工覆核與 Telegram 告警。"
)
monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
monkeypatch.setattr(svc, '_call_qwen3_qa', fake_ollama)
result = svc.generate_strategy_response("競品分析")
assert ollama_called['count'] == 1
assert legacy_called['count'] == 0
assert result.startswith("Ollama 主路徑已接手")
def test_empty_query_short_circuits(self, monkeypatch, reset_state):
"""空 query 不應觸發任何 LLM 呼叫。"""
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
legacy_called = {'count': 0}
ollama_called = {'count': 0}
monkeypatch.setattr(svc, '_legacy_gemini_first_qa',
lambda q, ctx, request_id=None: legacy_called.update({'count': legacy_called['count']+1}) or "")
monkeypatch.setattr(svc, '_call_qwen3_qa',
lambda q, ctx, rid: ollama_called.update({'count': ollama_called['count']+1}) or "")
out = svc.generate_strategy_response("")
assert "請輸入您的問題" in out
assert legacy_called['count'] == 0
assert ollama_called['count'] == 0
# ─────────────────────────────────────────────────────────────────────────────
# 3. Routingfeature flag = true + Ollama 高/低品質
# ─────────────────────────────────────────────────────────────────────────────
class TestFlagOn:
def test_flag_true_high_quality_returns_ollama(self, monkeypatch, reset_state):
"""flag=true + Ollama 回高品質 → 直接回 Ollama不走 Gemini。"""
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
legacy_called = {'count': 0}
good_reply = (
"本週 momo 業績成長 12%,主要驅動類別為家電。\n"
"建議:持續關注 PChome 競價並加碼家電促銷檔期。\n"
"下週預估仍有 5-8% 成長空間。"
)
monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: good_reply)
def fake_legacy(q, ctx, request_id=None):
legacy_called['count'] += 1
return "[gemini fallback]"
monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
out = svc.generate_strategy_response("本週業績如何?")
assert out == good_reply
assert legacy_called['count'] == 0 # Gemini 沒被呼叫
def test_flag_true_low_quality_falls_back_to_gemini(self, monkeypatch, reset_state):
"""flag=true + Ollama 回低品質(簡體污染)→ fallback Gemini。"""
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
bad_reply = "本周业绩增长,您可以关注这个产品的价格变动,我们建议处理库存"
legacy_called = {'count': 0}
monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: bad_reply)
def fake_legacy(q, ctx, request_id=None):
legacy_called['count'] += 1
return "[gemini high quality reply]"
monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
out = svc.generate_strategy_response("本週業績如何?")
assert out == "[gemini high quality reply]"
assert legacy_called['count'] == 1
def test_flag_true_ollama_returns_none_falls_back(self, monkeypatch, reset_state):
"""flag=true + Ollama 呼叫失敗(回 None→ fallback Gemini。"""
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
legacy_called = {'count': 0}
monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: None)
def fake_legacy(q, ctx, request_id=None):
legacy_called['count'] += 1
return "[gemini reply after ollama down]"
monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
out = svc.generate_strategy_response("test")
assert out == "[gemini reply after ollama down]"
assert legacy_called['count'] == 1
# ─────────────────────────────────────────────────────────────────────────────
# 4. _call_qwen3_qa: ai_call_logger 整合 + fallback_to 標記
# ─────────────────────────────────────────────────────────────────────────────
class TestCallQwen3Telemetry:
def test_qwen3_logs_ok_status_on_success(self, monkeypatch, reset_state):
"""高品質回應 → ai_calls 應記 status=ok, caller=openclaw_qa, provider=gcp_ollama"""
captured = reset_state
fake_service, _fake_resp = _stub_ollama_generate(monkeypatch)
result = svc._call_qwen3_qa("本週業績?", None, "qa-test123")
assert result is not None
assert "業績成長" in result
assert fake_service.instances
generate_kwargs = fake_service.instances[0].generate_calls[0]
assert generate_kwargs['model'] == svc.OPENCLAW_QA_OLLAMA_MODEL
assert generate_kwargs['system_prompt'] == svc.QWEN3_TC_SYSTEM_PROMPT
assert _wait_async(captured, 1)
assert len(captured) == 1
rec = captured[0]
assert rec['caller'] == 'openclaw_qa'
assert rec['provider'] == 'gcp_ollama'
assert rec['model'] == svc.OPENCLAW_QA_OLLAMA_MODEL
assert rec['status'] == 'ok'
assert rec['fallback_to'] is None
assert rec['meta'].get('flag') == 'OPENCLAW_QA_OLLAMA_FIRST'
assert rec['meta'].get('route') == 'ollama_first'
assert rec['meta'].get('host') == 'http://34.143.170.20:11434'
assert rec['meta'].get('host_label') == 'GCP-SSD'
assert rec['request_id'] == "qa-test123"
def test_qwen3_logs_actual_secondary_provider_after_retry(self, monkeypatch, reset_state):
"""OllamaService 若落到 GCP-Bai_calls.provider 必須寫 ollama_secondary。"""
captured = reset_state
_stub_ollama_generate(
monkeypatch,
host='http://34.21.145.224:11434',
)
result = svc._call_qwen3_qa("本週業績?", None, "qa-secondary")
assert result is not None
assert _wait_async(captured, 1)
rec = captured[0]
assert rec['provider'] == 'ollama_secondary'
assert rec['meta'].get('host_label') == 'GCP-SSD-2'
def test_qwen3_logs_fallback_on_exception(self, monkeypatch, reset_state):
"""Ollama 連線失敗 → ai_calls 應記 fallback_to=openclaw_qa_gemini_fallback + status=fallback"""
captured = reset_state
_stub_ollama_generate(monkeypatch, success=False)
result = svc._call_qwen3_qa("test", None, "qa-fail123")
assert result is None
assert _wait_async(captured, 1)
rec = captured[0]
assert rec['status'] == 'fallback'
assert rec['fallback_to'] == 'openclaw_qa_gemini_fallback'
assert rec['error'] is not None
assert 'ConnectionError' in rec['error']
def test_qwen3_logs_fallback_on_empty_response(self, monkeypatch, reset_state):
"""Ollama 回空 response → 視為 empty_response標 fallback。"""
captured = reset_state
_stub_ollama_generate(
monkeypatch,
content='',
input_tokens=100,
output_tokens=0,
)
result = svc._call_qwen3_qa("test", None, "qa-empty")
assert result is None
assert _wait_async(captured, 1)
rec = captured[0]
assert rec['status'] == 'fallback'
assert rec['fallback_to'] == 'openclaw_qa_gemini_fallback'
assert rec['error'] == 'empty_response'
# ─────────────────────────────────────────────────────────────────────────────
# 5. Gemini / NIM 備援遙測Gemini 不可被誤記為 openclaw_qa 主路徑
# ─────────────────────────────────────────────────────────────────────────────
class TestLegacyFallbackTelemetry:
def test_gemini_backup_uses_dedicated_caller(self, monkeypatch, reset_state):
"""Ollama 後的 Gemini 備援應記 openclaw_qa_gemini_fallback不污染 openclaw_qa。"""
captured = reset_state
monkeypatch.setenv('GEMINI_API_HARD_DISABLED', 'false')
monkeypatch.setenv('GEMINI_FALLBACK_ENABLED', 'true')
monkeypatch.setenv('GEMINI_API_KEY', 'test-gemini-key')
monkeypatch.setattr(svc, 'NVIDIA_API_KEY', '')
_stub_gemini(monkeypatch, text="Gemini 備援:請先檢查近七日業績與競品價差。")
out = svc._legacy_gemini_first_qa("本週業績如何?", None, request_id="qa-gemini")
assert out.startswith("Gemini 備援")
assert _wait_async(captured, 1)
assert len(captured) == 1
rec = captured[0]
assert rec['caller'] == 'openclaw_qa_gemini_fallback'
assert rec['provider'] == 'gemini'
assert rec['status'] == 'ok'
assert rec['fallback_to'] is None
assert not any(
r['caller'] == 'openclaw_qa' and r['provider'] == 'gemini'
for r in captured
)
def test_gemini_backup_failure_falls_to_standard_nim_caller(self, monkeypatch, reset_state):
"""Gemini 備援失敗後NIM 應記 openclaw_qa_nim而非 fallback_fallback_nim。"""
captured = reset_state
monkeypatch.setenv('GEMINI_API_HARD_DISABLED', 'false')
monkeypatch.setenv('GEMINI_FALLBACK_ENABLED', 'true')
monkeypatch.setenv('GEMINI_API_KEY', 'test-gemini-key')
monkeypatch.setattr(svc, 'NVIDIA_API_KEY', 'test-nim-key')
_stub_gemini(monkeypatch, raise_error=True)
_stub_nim(monkeypatch, text="NIM 備援:請改看 /daily 與 /threats。")
out = svc._legacy_gemini_first_qa("本週業績如何?", None, request_id="qa-nim")
assert out.startswith("NIM 備援")
assert _wait_async(captured, 2)
assert [r['caller'] for r in captured] == [
'openclaw_qa_gemini_fallback',
'openclaw_qa_nim',
]
gemini_rec, nim_rec = captured
assert gemini_rec['provider'] == 'gemini'
assert gemini_rec['status'] == 'fallback'
assert gemini_rec['fallback_to'] == 'openclaw_qa_nim'
assert nim_rec['provider'] == 'nim'
assert nim_rec['status'] == 'ok'
# ─────────────────────────────────────────────────────────────────────────────
# 6. 環境變數讀取即時性runtime toggle
# ─────────────────────────────────────────────────────────────────────────────
class TestRuntimeToggle:
def test_flag_helper_reads_env_each_call(self, monkeypatch):
"""_qa_ollama_first_enabled() 應每次重讀 env允許 runtime 灰度切換。"""
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'false')
assert svc._qa_ollama_first_enabled() is False
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
assert svc._qa_ollama_first_enabled() is True
# 各種真值字串
for v in ('TRUE', 'True', '1', 'yes', 'on'):
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', v)
assert svc._qa_ollama_first_enabled() is True, f"應視為 true: {v!r}"
for v in ('false', '0', 'no', 'off', '', 'foo'):
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', v)
assert svc._qa_ollama_first_enabled() is False, f"應視為 false: {v!r}"