Operation Ollama-First v5.0 / Phase 19 — 補完戰役紀律 tests/test_caller_registry.py (7 tests) - registry 含 30+ 核心 caller (ADR-028 對齊) - is_known_caller / assert_known_caller strict=False/True 行為 - ai_call_logger 整合:未知 caller log warning 不阻擋 - frozenset 不可變動 tests/test_deepseek_service.py (6 tests) - is_available() 需 KEY + flag 雙條件 - generate flag OFF / 200 success / 500 / timeout - usage tokens 解析(prompt_tokens / completion_tokens) tests/test_ppt_vision_service.py (6 tests) - flag OFF 不打 HTTP / 檔不存在 - ✅ 無視覺異常 / ⚠️ marker 解析 - HTTP 500 觸發 mark_unhealthy / timeout fail-safe tests/test_low_quality_response_v2.py (8 tests) - 規則 5 純英文回應 (中文 < 30%) - 規則 6 thinking-mode 漏洞 <think>...</think> - 規則 7 重複迴圈 (前 50 字 ≥ 3 次) - 規則 8 佔位符 ({{var}} / [TODO] / <待填>) - 合法繁中商業文字應通過 8 條規則 regression: 全戰役 unit test 累計 241 tests - Phase 1: 52 (logger + report) - Phase 2: 14 (ollama_resolve) - Phase 3: 36 (qa/golden/nemotron/daily) - Phase 7: 23 (anthropic + code_review) - Phase 11: 70 (rag + learning + promotion) - Phase 10.5: 8 (mcp_router) - Phase 13: 10 (retry chain) - Phase 19: 27 (caller_registry + deepseek + ppt_vision + lq_v2) ⭐ 新 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
117 lines
5.3 KiB
Python
117 lines
5.3 KiB
Python
"""
|
||
tests/test_low_quality_response_v2.py
|
||
─────────────────────────────────────────────────────────────────
|
||
Operation Ollama-First v5.0 / Phase 17 — _is_low_quality_response 4 條新規則驗證
|
||
|
||
驗證面(規則 5-8 是 Phase 17 新增):
|
||
規則 1-4:既有(test_openclaw_qa_routing 已驗)
|
||
規則 5: 純英文回應(中文 < 30%)
|
||
規則 6: thinking-mode 漏洞(<think>...</think>)
|
||
規則 7: 重複迴圈(前 50 字 ≥ 3 次)
|
||
規則 8: 佔位符未填充({{var}} / [TODO] / <待填>)
|
||
"""
|
||
|
||
|
||
def test_rule5_pure_english_response_rejected():
|
||
"""純英文長文應被拒(中文 < 30%)"""
|
||
from services.openclaw_strategist_service import _is_low_quality_response
|
||
|
||
text = (
|
||
"This is a long English response from the LLM model that does not "
|
||
"have any traditional Chinese characters in it. We expect this kind "
|
||
"of response to be rejected as low quality because the user is asking "
|
||
"in Traditional Chinese and expects an answer in Traditional Chinese."
|
||
)
|
||
assert _is_low_quality_response(text) is True
|
||
|
||
|
||
def test_rule5_mixed_chinese_english_acceptable():
|
||
"""中英混合(中文佔比 ≥ 30%)應通過"""
|
||
from services.openclaw_strategist_service import _is_low_quality_response
|
||
|
||
# 中文密度高的 text(40%+ 中文字元)
|
||
text = (
|
||
"本週業績分析報告:總營收較上週成長百分之十二,主要來自家電類別與生活雜貨。\n"
|
||
"競品動向監控:對手實施大規模補貼戰,預估壓縮我方百分之三毛利率。\n"
|
||
"建議行動:(一) 加碼家電促銷檔期 (二) 觀察補貼是否延續至下週。"
|
||
)
|
||
assert _is_low_quality_response(text) is False
|
||
|
||
|
||
def test_rule6_thinking_block_leak_rejected():
|
||
"""reasoning model thinking 區塊洩漏應拒"""
|
||
from services.openclaw_strategist_service import _is_low_quality_response
|
||
|
||
text_with_open_tag = (
|
||
"<think>讓我思考一下這個問題...</think>\n"
|
||
"本週業績分析:總營收成長 12% YoY,主要來自家電類別的銷售提升。"
|
||
)
|
||
assert _is_low_quality_response(text_with_open_tag) is True
|
||
|
||
text_with_close_only = (
|
||
"本週業績分析:總營收成長 12% YoY,主要來自家電類別的銷售提升。\n"
|
||
"</think>"
|
||
)
|
||
assert _is_low_quality_response(text_with_close_only) is True
|
||
|
||
|
||
def test_rule7_repetition_loop_rejected():
|
||
"""前 50 字出現 ≥ 3 次 → 卡迴圈"""
|
||
from services.openclaw_strategist_service import _is_low_quality_response
|
||
|
||
# 重複 8 次保證 > 200 字(前 50 字出現 ≥ 3 次觸發規則 7)
|
||
base = "本週業績有顯著成長,主要驅動類別是家電與生活雜貨大類別。額外文字。"
|
||
repeated = base * 8
|
||
assert len(repeated) > 200
|
||
assert _is_low_quality_response(repeated) is True
|
||
|
||
|
||
def test_rule7_normal_long_text_acceptable():
|
||
"""正常長文(即使重複某些字)不該被誤判為迴圈"""
|
||
from services.openclaw_strategist_service import _is_low_quality_response
|
||
|
||
normal_long = (
|
||
"本週業績整體呈現上升趨勢,主要驅動類別為家電與生活雜貨大類別。\n"
|
||
"競品動向:PChome 在 3C 類發動大規模補貼戰,預估壓縮我方 3-5 個百分點毛利率。\n"
|
||
"蝦皮也在母嬰用品加碼免運券促銷,需密切觀察跟降節奏。\n"
|
||
"建議行動:(1) 加碼家電促銷檔期 (2) 觀察 PChome 補貼是否延續至下週 "
|
||
"(3) 對價差大於 5% 的 SKU 主動啟動 EA 流程。"
|
||
)
|
||
# 雖然「主要」「促銷」等詞可能重複,但前 50 字的整體 substring 不會出現 ≥ 3 次
|
||
assert _is_low_quality_response(normal_long) is False
|
||
|
||
|
||
def test_rule8_placeholder_markers_rejected():
|
||
"""偵測佔位符 / 未實作標記"""
|
||
from services.openclaw_strategist_service import _is_low_quality_response
|
||
|
||
cases = [
|
||
# {{var}} jinja unfilled
|
||
"本週業績:{{revenue}} 元,較上週成長 {{wow_pct}}%。建議行動 ...",
|
||
# [TODO] / [todo]
|
||
"本週銷售分析:[TODO] 補上具體數字後完成此段。",
|
||
"業績檢討:競品分析 [todo] 待補充。",
|
||
# <待填>
|
||
"本週概況:營收 NT$<待填>,毛利率 <待填>。",
|
||
# 尚未實作
|
||
"策略建議:(尚未實作) 請統帥手動補充。",
|
||
]
|
||
for text in cases:
|
||
# 補長度避免被規則 1 拒(< 50 字)
|
||
text = text + "(測試填充內容延長至大於 50 字)"
|
||
assert _is_low_quality_response(text) is True, f"應拒絕含佔位符: {text[:30]}"
|
||
|
||
|
||
def test_legitimate_chinese_business_text_passes():
|
||
"""合法繁中商業文字應通過所有 8 條規則"""
|
||
from services.openclaw_strategist_service import _is_low_quality_response
|
||
|
||
text = (
|
||
"本週業績分析(2026-05-04):\n"
|
||
"總營收 NT$ 4,230,000,較上週成長 12.3% WoW。\n"
|
||
"主要驅動類別:家電(+18%)、生活雜貨(+9%)。\n"
|
||
"競品動向:PChome 在 3C 類補貼戰預估壓縮我方 3-5pp 毛利率。\n"
|
||
"建議行動:加碼家電檔期,對價差 > 5% SKU 啟動 EA 流程。"
|
||
)
|
||
assert _is_low_quality_response(text) is False
|