Files
ewoooc/tests/test_llm_model_router.py
OoO 390c32b05d
All checks were successful
CD Pipeline / deploy (push) Successful in 2m45s
feat(p21): Caller × Context 動態 Model Router + ADR-034
Operation Ollama-First v5.0 / Phase 21 — 動態路由治理

services/llm_model_router.py (160+ 行)
- 純規則引擎,零 LLM 成本(Python lambda predicate)
- 6 caller × 12 條路由規則:
  • sales_copy: 短文 < 100 字 → gemma3:4b / 長文 → llama3.1:8b
  • hermes_analyst: gap > 20% 或銷量 < -50% → qwen3:14b / 預設 hermes3
  • aider_heal: diff > 200 行 → qwen2.5-coder:32b / 預設 7b
  • openclaw_qa: query > 200 字或 multi_turn → qwen3:14b / 預設 qwen2.5:7b-instruct
  • ppt_vision: minicpm 不健康 → llava / 預設 minicpm-v
  • ea_engine: require_chain_of_thought → deepseek-r1:14b / 預設 Gemini
- feature flag MODEL_ROUTER_ENABLED 預設 OFF(向下相容)
- 失敗安全:predicate 例外 skip 到下一條

tests/test_llm_model_router.py (18 tests 全綠)
- T1 flag OFF 不路由
- T2 sales_copy 短/長文路由
- T3 hermes 簡單/複雜 SKU
- T4 aider_heal 簡單/重構
- T5 ppt_vision 主備援
- T6 ea_engine CoT 路由
- T7 predicate 例外容錯
- T8 utility 函數

ADR-034 — Caller × Context 動態 Model Router
- 6 caller 路由規則對應表
- 5 段否決方案(LLM-based / hardcode / 配置檔 / 統一升級)
- Phase 21.2-21.6 戰略性遷移計畫
- V1-V3 驗收 SQL(caller 整合後 model 分布觀察)

關聯:Primary + Secondary 兩台 GCP 已備齊 10 模型(67GB 對稱)支援所有
路由規則;caller 整合可分階段進行(Phase 21.2-21.5)。

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 10:54:12 +08:00

255 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
tests/test_llm_model_router.py
─────────────────────────────────────────────────────────────────
Operation Ollama-First v5.0 / Phase 21 — Caller × Context 動態路由驗證
"""
import pytest
@pytest.fixture(autouse=True)
def _reset_env(monkeypatch):
monkeypatch.delenv('MODEL_ROUTER_ENABLED', raising=False)
yield
# ═══════════════════════════════════════════════════════════════════════════
# T1: feature flag OFF 時不路由(向下相容)
# ═══════════════════════════════════════════════════════════════════════════
def test_flag_off_returns_default():
from services.llm_model_router import select_model
# flag OFF 直接回 default不評估規則
result = select_model(
caller='sales_copy',
context={'expected_length': 50},
default='llama3.1:8b',
)
assert result == 'llama3.1:8b'
def test_flag_off_unknown_caller_returns_default():
from services.llm_model_router import select_model
result = select_model(caller='nonexistent', default='hermes3:latest')
assert result == 'hermes3:latest'
# ═══════════════════════════════════════════════════════════════════════════
# T2: sales_copy 路由(短文 vs 長文)
# ═══════════════════════════════════════════════════════════════════════════
def test_sales_copy_short_text_routes_to_gemma3(monkeypatch):
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
from services.llm_model_router import select_model
# 50 字短文 → gemma3:4b 輕量
result = select_model(
caller='sales_copy',
context={'expected_length': 50},
default='llama3.1:8b',
)
assert result == 'gemma3:4b'
def test_sales_copy_long_text_routes_to_llama(monkeypatch):
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
from services.llm_model_router import select_model
result = select_model(
caller='sales_copy',
context={'expected_length': 200},
default='llama3.1:8b',
)
assert result == 'llama3.1:8b'
def test_sales_copy_no_length_falls_back_to_default(monkeypatch):
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
from services.llm_model_router import select_model
# 沒給 expected_length → 規則 1 不觸發 → 規則 2 always True → 回 llama3.1:8b
result = select_model(
caller='sales_copy',
context={},
default='llama3.1:8b',
)
assert result == 'llama3.1:8b'
# ═══════════════════════════════════════════════════════════════════════════
# T3: Hermes 競價(簡單 vs 複雜 SKU
# ═══════════════════════════════════════════════════════════════════════════
def test_hermes_simple_routes_to_hermes3(monkeypatch):
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
from services.llm_model_router import select_model
result = select_model(
caller='hermes_analyst',
context={'max_gap_pct': 5.2, 'min_sales_delta': -10.0},
default='hermes3:latest',
)
assert result == 'hermes3:latest'
def test_hermes_high_gap_routes_to_qwen3(monkeypatch):
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
from services.llm_model_router import select_model
# gap > 20% → 升 qwen3:14b
result = select_model(
caller='hermes_analyst',
context={'max_gap_pct': 25.0, 'min_sales_delta': -5.0},
default='hermes3:latest',
)
assert result == 'qwen3:14b'
def test_hermes_sales_crash_routes_to_qwen3(monkeypatch):
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
from services.llm_model_router import select_model
# 銷量 < -50% → 升 qwen3:14b
result = select_model(
caller='hermes_analyst',
context={'max_gap_pct': 5.0, 'min_sales_delta': -60.0},
default='hermes3:latest',
)
assert result == 'qwen3:14b'
# ═══════════════════════════════════════════════════════════════════════════
# T4: AiderHeal簡單 vs 重構)
# ═══════════════════════════════════════════════════════════════════════════
def test_aider_heal_small_diff_routes_to_7b(monkeypatch):
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
from services.llm_model_router import select_model
result = select_model(
caller='aider_heal',
context={'diff_lines': 50},
default='qwen2.5-coder:7b',
)
assert result == 'qwen2.5-coder:7b'
def test_aider_heal_large_refactor_routes_to_32b(monkeypatch):
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
from services.llm_model_router import select_model
# diff > 200 行 → 32b 重構級
result = select_model(
caller='aider_heal',
context={'diff_lines': 350},
default='qwen2.5-coder:7b',
)
assert result == 'qwen2.5-coder:32b'
# ═══════════════════════════════════════════════════════════════════════════
# T5: PPT vision主備援
# ═══════════════════════════════════════════════════════════════════════════
def test_ppt_vision_normal_routes_to_minicpm(monkeypatch):
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
from services.llm_model_router import select_model
result = select_model(
caller='ppt_vision',
context={},
default='minicpm-v:latest',
)
assert result == 'minicpm-v:latest'
def test_ppt_vision_minicpm_unhealthy_routes_to_llava(monkeypatch):
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
from services.llm_model_router import select_model
result = select_model(
caller='ppt_vision',
context={'minicpm_unhealthy': True},
default='minicpm-v:latest',
)
assert result == 'llava:latest'
# ═══════════════════════════════════════════════════════════════════════════
# T6: EA engine推理需求 → deepseek-r1
# ═══════════════════════════════════════════════════════════════════════════
def test_ea_engine_no_cot_returns_default(monkeypatch):
"""規則命中但 model_name=None → 回 defaultcaller 用既有 Gemini"""
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
from services.llm_model_router import select_model
result = select_model(
caller='ea_engine',
context={'require_chain_of_thought': False},
default='gemini-2.0-flash',
)
assert result == 'gemini-2.0-flash'
def test_ea_engine_cot_routes_to_deepseek_r1(monkeypatch):
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
from services.llm_model_router import select_model
result = select_model(
caller='ea_engine',
context={'require_chain_of_thought': True},
default='gemini-2.0-flash',
)
assert result == 'deepseek-r1:14b'
# ═══════════════════════════════════════════════════════════════════════════
# T7: 規則例外不阻擋(容錯)
# ═══════════════════════════════════════════════════════════════════════════
def test_predicate_exception_skipped_to_next_rule(monkeypatch):
"""predicate 拋例外應 skip 到下一條(不 raise 給 caller"""
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
from services.llm_model_router import select_model
# context 給非數字會讓 int() 拋例外
# 規則 1 期待 expected_length 可 int 化;給 'abc' 會炸
# 但規則應 catch + skip 到規則 2 (always True → llama3.1:8b)
result = select_model(
caller='sales_copy',
context={'expected_length': 'abc'}, # 故意給壞值
default='llama3.1:8b',
)
# 結果:規則 1 失敗int('abc') raise→ skip → 規則 2 命中 → 'llama3.1:8b'
assert result == 'llama3.1:8b'
# ═══════════════════════════════════════════════════════════════════════════
# T8: utility 函數
# ═══════════════════════════════════════════════════════════════════════════
def test_list_routes_for_known_caller():
from services.llm_model_router import list_routes_for_caller
sales_routes = list_routes_for_caller('sales_copy')
assert 'gemma3:4b' in sales_routes
assert 'llama3.1:8b' in sales_routes
def test_list_routes_for_unknown_caller():
from services.llm_model_router import list_routes_for_caller
assert list_routes_for_caller('nonexistent') == []
def test_all_callers_with_routes():
from services.llm_model_router import all_callers_with_routes
callers = all_callers_with_routes()
expected = {'sales_copy', 'hermes_analyst', 'aider_heal',
'openclaw_qa', 'ppt_vision', 'ea_engine'}
assert expected.issubset(set(callers))