Phase 1 A4 — 13 個呼叫點接 ai_call_logger(覆蓋率 11.8% → 預估 50%+)
- TOP-1 nemoton_dispatcher: nemotron_dispatch caller (NIM 配額追蹤)
- TOP-2 openclaw_strategist: 4 reports (daily/weekly/monthly/meta) + qa caller
- TOP-3 hermes_analyst: hermes_analyst + hermes_intent (順修 commit 00591c5 殘留 bug)
- TOP-4 code_review_pipeline: code_review_hermes/openclaw/elephant 三鏈 (request_id 串)
- TOP-5 openclaw_bot_routes: openclaw_bot_main/gemini/nim 三層 fallback
Phase 3 A7 — OpenClaw Q&A → qwen3:14b(feature flag OFF)
- OPENCLAW_QA_OLLAMA_FIRST 灰度開關
- 繁中強制 system prompt + Gemini fallback chain
- _is_low_quality_response 品質守門(簡體字檢測 + 拒答訊號 + 結構分數)
- 黃金集 A/B 對照測試框架(10 樣本去 PII)
Phase 3 A8 — OpenClaw 日報 → Hermes 模板(feature flag OFF)
- OPENCLAW_DAILY_HERMES_TEMPLATE 灰度開關
- _compute_daily_kpi 純 SQL + Hermes 規則引擎
- _compute_gemini_insight 精簡 200 字洞察 prompt
- templates/daily_report_v2.j2 + _SafeUndefined 缺欄位優雅降級
- scripts/compare_daily_report_versions.py 雙版本盲測
Phase 3 A9 — Nemotron NIM → qwen3:14b(feature flag OFF)
- NEMOTRON_OLLAMA_FIRST 灰度開關(A2 紅燈:deepseek-r1 假支援,改 qwen3)
- _call_qwen3_dispatch + 既有 NIM tool_calls 解析共用
- 保留 ADR-004「🟡 [降級模式]」Hermes 規則引擎兜底
H6 PII fix — chat_id 進 ai_calls.meta 改 SHA1[:8](4 處 Bot Q&A)
Code Review pipeline — N3 動態 provider tag(gcp/secondary/111)+ A4 logger 三鏈
37 unit tests 全綠(routing 15 + golden 5 + qwen3 8 + daily template 8 + nemotron 1)
Operation Ollama-First v5.0 / Phase 1 A4 + Phase 3 A7+A8+A9
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
359 lines
17 KiB
Python
359 lines
17 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
tests/test_openclaw_qa_routing.py
|
||
OpenClaw Q&A 路由 + 品質守門 unit tests
|
||
(Operation Ollama-First v5.0 — Phase 3, A7 fullstack-engineer)
|
||
|
||
涵蓋:
|
||
- feature flag OPENCLAW_QA_OLLAMA_FIRST=false → 走 Gemini-first(regression test)
|
||
- flag=true + 高品質 Ollama 回應 → 直接回 Ollama 結果,不走 Gemini
|
||
- flag=true + 低品質 Ollama 回應 → 升級至 Gemini,並標 fallback_to=openclaw_qa_gemini_fallback
|
||
- flag=true + Ollama 呼叫失敗 → 升級至 Gemini
|
||
- _is_low_quality_response 各規則:空字串 / 長度過短 / 簡體污染 / 拒答 / 流水帳
|
||
|
||
執行:
|
||
pytest tests/test_openclaw_qa_routing.py -v
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import time
|
||
from typing import Any, Dict, Optional
|
||
|
||
import pytest
|
||
|
||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||
|
||
import services.openclaw_strategist_service as svc
|
||
import services.ai_call_logger as logger_mod
|
||
from services.ai_call_logger import _reset_kill_switch
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Fixtures
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
@pytest.fixture(autouse=True)
|
||
def reset_state(monkeypatch):
|
||
"""每個測試重置 logger kill-switch + stub DB 寫入收集 ai_calls 紀錄。"""
|
||
_reset_kill_switch()
|
||
captured = []
|
||
|
||
def fake_write(state):
|
||
captured.append({
|
||
'caller': state.caller,
|
||
'provider': state.provider,
|
||
'model': state.model,
|
||
'status': state.status,
|
||
'fallback_to': state.fallback_to,
|
||
'error': state.error,
|
||
'meta': dict(state.meta),
|
||
'request_id': state.request_id,
|
||
})
|
||
|
||
monkeypatch.setattr(logger_mod, '_write_to_db', fake_write)
|
||
monkeypatch.setenv('AI_CALL_LOGGING_ENABLED', 'true')
|
||
# 預設 flag=false(戰前行為)
|
||
monkeypatch.delenv('OPENCLAW_QA_OLLAMA_FIRST', raising=False)
|
||
yield captured
|
||
|
||
|
||
def _wait_async(captured, n=1, timeout=2.0):
|
||
deadline = time.time() + timeout
|
||
while time.time() < deadline:
|
||
if len(captured) >= n:
|
||
return True
|
||
time.sleep(0.01)
|
||
return False
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# 1. _is_low_quality_response 純函式規則
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
class TestLowQualityRules:
|
||
|
||
def test_empty_string_is_low_quality(self):
|
||
assert svc._is_low_quality_response("") is True
|
||
assert svc._is_low_quality_response(None) is True
|
||
assert svc._is_low_quality_response(" \n ") is True
|
||
|
||
def test_too_short_is_low_quality(self):
|
||
# 長度 < 50 字元 → 低品質
|
||
assert svc._is_low_quality_response("你好,我是 OpenClaw") is True
|
||
|
||
def test_acceptable_response_passes(self):
|
||
good = (
|
||
"本週 momo 業績較上週成長 12%,主要受惠於家電與生活雜貨。\n"
|
||
"建議:持續關注 PChome 競價動態,必要時調整定價策略。\n"
|
||
"預估下週 momo 仍有 5-8% 成長空間。"
|
||
)
|
||
assert svc._is_low_quality_response(good) is False
|
||
|
||
def test_simplified_pollution_detected(self):
|
||
# 句中含 >= 3 個簡體字 hint → 低品質(Qwen 繁中短板核心檢查)
|
||
polluted = (
|
||
"本周业绩比上周增长,您可以关注这个产品的价格变动趋势,"
|
||
"我们建议处理掉滞销库存以提高资产效率"
|
||
)
|
||
assert svc._is_low_quality_response(polluted) is True
|
||
|
||
def test_two_simplified_chars_still_acceptable(self):
|
||
# 只有 2 個簡體 hint 字(边界以下)+ 結構良好 → 仍可接受
|
||
# (避免過度敏感誤殺正常繁體回覆中混入零星簡體字的情境)
|
||
text = (
|
||
"本週 momo 业绩成長明顯,建議持續關注競品動向。\n"
|
||
"重點品類:家電、3C、生活雜貨。\n"
|
||
"下週可加碼促銷檔期。"
|
||
)
|
||
assert svc._is_low_quality_response(text) is False
|
||
|
||
def test_refusal_pattern_detected(self):
|
||
for refusal in ['無法回答', '我不知道', '抱歉,我無法協助']:
|
||
text = f"關於這個問題,{refusal},請改問其他內容以便我協助您。"
|
||
assert svc._is_low_quality_response(text) is True, f"應被判定為拒答:{refusal}"
|
||
|
||
def test_flowing_text_no_breaks_is_low_quality(self):
|
||
# 200+ 字無換行 → 流水帳
|
||
text = "本週業績整體呈現上升趨勢。" * 20 # ~200+ 字
|
||
assert "\n" not in text
|
||
assert len(text) > 200
|
||
assert svc._is_low_quality_response(text) is True
|
||
|
||
def test_long_text_with_breaks_is_acceptable(self):
|
||
# 200+ 字但有適度斷行 → 結構良好
|
||
text = (
|
||
"本週業績整體呈現上升趨勢,主要驅動類別為家電與生活雜貨大類別。\n"
|
||
"競品動向:PChome 在 3C 類發動大規模補貼戰,預估壓縮我方 3 至 5 個百分點毛利率。\n"
|
||
"蝦皮也在母嬰用品加碼免運券促銷,需密切觀察跟降節奏,避免市占下滑。\n"
|
||
"建議行動:(1) 加碼家電促銷檔期,重點操作大尺寸電視與廚房家電,"
|
||
"(2) 觀察 PChome 補貼是否延續至下週,準備二段反擊方案,"
|
||
"(3) 對價差大於 5% 的 SKU 主動啟動 EA 流程,避免毛利持續流失。"
|
||
)
|
||
assert len(text) > 200
|
||
assert svc._is_low_quality_response(text) is False
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# 2. Routing:feature flag = false 時維持 Gemini-first 路徑(regression)
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
class TestFlagOff:
|
||
|
||
def test_flag_false_routes_to_legacy(self, monkeypatch, reset_state):
|
||
"""flag=false(預設)→ 不應該呼叫 _call_qwen3_qa,直接走 _legacy_gemini_first_qa。"""
|
||
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'false')
|
||
legacy_called = {'count': 0}
|
||
ollama_called = {'count': 0}
|
||
|
||
def fake_legacy(q, ctx, request_id=None):
|
||
legacy_called['count'] += 1
|
||
return "[legacy gemini reply]"
|
||
|
||
def fake_ollama(q, ctx, rid):
|
||
ollama_called['count'] += 1
|
||
return "[should not be called]"
|
||
|
||
monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
|
||
monkeypatch.setattr(svc, '_call_qwen3_qa', fake_ollama)
|
||
|
||
result = svc.generate_strategy_response("本週業績如何?")
|
||
assert result == "[legacy gemini reply]"
|
||
assert legacy_called['count'] == 1
|
||
assert ollama_called['count'] == 0
|
||
|
||
def test_flag_unset_defaults_to_off(self, monkeypatch, reset_state):
|
||
"""環境變數完全未設 → 預設 false → 走 legacy。"""
|
||
monkeypatch.delenv('OPENCLAW_QA_OLLAMA_FIRST', raising=False)
|
||
legacy_called = {'count': 0}
|
||
|
||
def fake_legacy(q, ctx, request_id=None):
|
||
legacy_called['count'] += 1
|
||
return "[legacy reply]"
|
||
|
||
monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
|
||
# 不 stub _call_qwen3_qa;如果意外被呼叫會打到真網路 → fail
|
||
result = svc.generate_strategy_response("競品分析")
|
||
assert legacy_called['count'] == 1
|
||
assert result == "[legacy reply]"
|
||
|
||
def test_empty_query_short_circuits(self, monkeypatch, reset_state):
|
||
"""空 query 不應觸發任何 LLM 呼叫。"""
|
||
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
|
||
legacy_called = {'count': 0}
|
||
ollama_called = {'count': 0}
|
||
monkeypatch.setattr(svc, '_legacy_gemini_first_qa',
|
||
lambda q, ctx, request_id=None: legacy_called.update({'count': legacy_called['count']+1}) or "")
|
||
monkeypatch.setattr(svc, '_call_qwen3_qa',
|
||
lambda q, ctx, rid: ollama_called.update({'count': ollama_called['count']+1}) or "")
|
||
|
||
out = svc.generate_strategy_response("")
|
||
assert "請輸入您的問題" in out
|
||
assert legacy_called['count'] == 0
|
||
assert ollama_called['count'] == 0
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# 3. Routing:feature flag = true + Ollama 高/低品質
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
class TestFlagOn:
|
||
|
||
def test_flag_true_high_quality_returns_ollama(self, monkeypatch, reset_state):
|
||
"""flag=true + Ollama 回高品質 → 直接回 Ollama,不走 Gemini。"""
|
||
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
|
||
legacy_called = {'count': 0}
|
||
good_reply = (
|
||
"本週 momo 業績成長 12%,主要驅動類別為家電。\n"
|
||
"建議:持續關注 PChome 競價並加碼家電促銷檔期。\n"
|
||
"下週預估仍有 5-8% 成長空間。"
|
||
)
|
||
monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: good_reply)
|
||
|
||
def fake_legacy(q, ctx, request_id=None):
|
||
legacy_called['count'] += 1
|
||
return "[gemini fallback]"
|
||
|
||
monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
|
||
|
||
out = svc.generate_strategy_response("本週業績如何?")
|
||
assert out == good_reply
|
||
assert legacy_called['count'] == 0 # Gemini 沒被呼叫
|
||
|
||
def test_flag_true_low_quality_falls_back_to_gemini(self, monkeypatch, reset_state):
|
||
"""flag=true + Ollama 回低品質(簡體污染)→ fallback Gemini。"""
|
||
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
|
||
bad_reply = "本周业绩增长,您可以关注这个产品的价格变动,我们建议处理库存"
|
||
legacy_called = {'count': 0}
|
||
|
||
monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: bad_reply)
|
||
|
||
def fake_legacy(q, ctx, request_id=None):
|
||
legacy_called['count'] += 1
|
||
return "[gemini high quality reply]"
|
||
|
||
monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
|
||
|
||
out = svc.generate_strategy_response("本週業績如何?")
|
||
assert out == "[gemini high quality reply]"
|
||
assert legacy_called['count'] == 1
|
||
|
||
def test_flag_true_ollama_returns_none_falls_back(self, monkeypatch, reset_state):
|
||
"""flag=true + Ollama 呼叫失敗(回 None)→ fallback Gemini。"""
|
||
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
|
||
legacy_called = {'count': 0}
|
||
monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: None)
|
||
|
||
def fake_legacy(q, ctx, request_id=None):
|
||
legacy_called['count'] += 1
|
||
return "[gemini reply after ollama down]"
|
||
|
||
monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
|
||
|
||
out = svc.generate_strategy_response("test")
|
||
assert out == "[gemini reply after ollama down]"
|
||
assert legacy_called['count'] == 1
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# 4. _call_qwen3_qa: ai_call_logger 整合 + fallback_to 標記
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
class TestCallQwen3Telemetry:
|
||
|
||
def test_qwen3_logs_ok_status_on_success(self, monkeypatch, reset_state):
|
||
"""高品質回應 → ai_calls 應記 status=ok, caller=openclaw_qa, provider=gcp_ollama"""
|
||
captured = reset_state
|
||
|
||
class FakeResp:
|
||
status_code = 200
|
||
def raise_for_status(self): pass
|
||
def json(self):
|
||
return {
|
||
'response': '本週 momo 業績成長 12%,建議加碼家電促銷。',
|
||
'prompt_eval_count': 150,
|
||
'eval_count': 60,
|
||
}
|
||
|
||
monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp())
|
||
|
||
result = svc._call_qwen3_qa("本週業績?", None, "qa-test123")
|
||
assert result is not None
|
||
assert "業績成長" in result
|
||
|
||
assert _wait_async(captured, 1)
|
||
assert len(captured) == 1
|
||
rec = captured[0]
|
||
assert rec['caller'] == 'openclaw_qa'
|
||
assert rec['provider'] == 'gcp_ollama'
|
||
assert rec['model'] == svc.OPENCLAW_QA_OLLAMA_MODEL
|
||
assert rec['status'] == 'ok'
|
||
assert rec['fallback_to'] is None
|
||
assert rec['meta'].get('flag') == 'OPENCLAW_QA_OLLAMA_FIRST'
|
||
assert rec['request_id'] == "qa-test123"
|
||
|
||
def test_qwen3_logs_fallback_on_exception(self, monkeypatch, reset_state):
|
||
"""Ollama 連線失敗 → ai_calls 應記 fallback_to=openclaw_qa_gemini_fallback + status=fallback"""
|
||
captured = reset_state
|
||
|
||
def boom(*a, **kw):
|
||
raise svc.requests.ConnectionError("connection refused")
|
||
|
||
monkeypatch.setattr(svc.requests, 'post', boom)
|
||
|
||
result = svc._call_qwen3_qa("test", None, "qa-fail123")
|
||
assert result is None
|
||
|
||
assert _wait_async(captured, 1)
|
||
rec = captured[0]
|
||
assert rec['status'] == 'fallback'
|
||
assert rec['fallback_to'] == 'openclaw_qa_gemini_fallback'
|
||
assert rec['error'] is not None
|
||
assert 'ConnectionError' in rec['error']
|
||
|
||
def test_qwen3_logs_fallback_on_empty_response(self, monkeypatch, reset_state):
|
||
"""Ollama 回空 response → 視為 empty_response,標 fallback。"""
|
||
captured = reset_state
|
||
|
||
class FakeResp:
|
||
status_code = 200
|
||
def raise_for_status(self): pass
|
||
def json(self):
|
||
return {'response': '', 'prompt_eval_count': 100, 'eval_count': 0}
|
||
|
||
monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp())
|
||
|
||
result = svc._call_qwen3_qa("test", None, "qa-empty")
|
||
assert result is None
|
||
|
||
assert _wait_async(captured, 1)
|
||
rec = captured[0]
|
||
assert rec['status'] == 'fallback'
|
||
assert rec['fallback_to'] == 'openclaw_qa_gemini_fallback'
|
||
assert rec['error'] == 'empty_response'
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# 5. 環境變數讀取即時性(runtime toggle)
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
class TestRuntimeToggle:
|
||
|
||
def test_flag_helper_reads_env_each_call(self, monkeypatch):
|
||
"""_qa_ollama_first_enabled() 應每次重讀 env,允許 runtime 灰度切換。"""
|
||
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'false')
|
||
assert svc._qa_ollama_first_enabled() is False
|
||
|
||
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
|
||
assert svc._qa_ollama_first_enabled() is True
|
||
|
||
# 各種真值字串
|
||
for v in ('TRUE', 'True', '1', 'yes', 'on'):
|
||
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', v)
|
||
assert svc._qa_ollama_first_enabled() is True, f"應視為 true: {v!r}"
|
||
|
||
for v in ('false', '0', 'no', 'off', '', 'foo'):
|
||
monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', v)
|
||
assert svc._qa_ollama_first_enabled() is False, f"應視為 false: {v!r}"
|