All checks were successful
CD Pipeline / deploy (push) Successful in 2m45s
Operation Ollama-First v5.0 / Phase 21 — 動態路由治理 services/llm_model_router.py (160+ 行) - 純規則引擎,零 LLM 成本(Python lambda predicate) - 6 caller × 12 條路由規則: • sales_copy: 短文 < 100 字 → gemma3:4b / 長文 → llama3.1:8b • hermes_analyst: gap > 20% 或銷量 < -50% → qwen3:14b / 預設 hermes3 • aider_heal: diff > 200 行 → qwen2.5-coder:32b / 預設 7b • openclaw_qa: query > 200 字或 multi_turn → qwen3:14b / 預設 qwen2.5:7b-instruct • ppt_vision: minicpm 不健康 → llava / 預設 minicpm-v • ea_engine: require_chain_of_thought → deepseek-r1:14b / 預設 Gemini - feature flag MODEL_ROUTER_ENABLED 預設 OFF(向下相容) - 失敗安全:predicate 例外 skip 到下一條 tests/test_llm_model_router.py (18 tests 全綠) - T1 flag OFF 不路由 - T2 sales_copy 短/長文路由 - T3 hermes 簡單/複雜 SKU - T4 aider_heal 簡單/重構 - T5 ppt_vision 主備援 - T6 ea_engine CoT 路由 - T7 predicate 例外容錯 - T8 utility 函數 ADR-034 — Caller × Context 動態 Model Router - 6 caller 路由規則對應表 - 5 段否決方案(LLM-based / hardcode / 配置檔 / 統一升級) - Phase 21.2-21.6 戰略性遷移計畫 - V1-V3 驗收 SQL(caller 整合後 model 分布觀察) 關聯:Primary + Secondary 兩台 GCP 已備齊 10 模型(67GB 對稱)支援所有 路由規則;caller 整合可分階段進行(Phase 21.2-21.5)。 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
255 lines
11 KiB
Python
255 lines
11 KiB
Python
"""
|
||
tests/test_llm_model_router.py
|
||
─────────────────────────────────────────────────────────────────
|
||
Operation Ollama-First v5.0 / Phase 21 — Caller × Context 動態路由驗證
|
||
"""
|
||
|
||
import pytest
|
||
|
||
|
||
@pytest.fixture(autouse=True)
|
||
def _reset_env(monkeypatch):
|
||
monkeypatch.delenv('MODEL_ROUTER_ENABLED', raising=False)
|
||
yield
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# T1: feature flag OFF 時不路由(向下相容)
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_flag_off_returns_default():
|
||
from services.llm_model_router import select_model
|
||
|
||
# flag OFF 直接回 default(不評估規則)
|
||
result = select_model(
|
||
caller='sales_copy',
|
||
context={'expected_length': 50},
|
||
default='llama3.1:8b',
|
||
)
|
||
assert result == 'llama3.1:8b'
|
||
|
||
|
||
def test_flag_off_unknown_caller_returns_default():
|
||
from services.llm_model_router import select_model
|
||
|
||
result = select_model(caller='nonexistent', default='hermes3:latest')
|
||
assert result == 'hermes3:latest'
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# T2: sales_copy 路由(短文 vs 長文)
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_sales_copy_short_text_routes_to_gemma3(monkeypatch):
|
||
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
|
||
from services.llm_model_router import select_model
|
||
|
||
# 50 字短文 → gemma3:4b 輕量
|
||
result = select_model(
|
||
caller='sales_copy',
|
||
context={'expected_length': 50},
|
||
default='llama3.1:8b',
|
||
)
|
||
assert result == 'gemma3:4b'
|
||
|
||
|
||
def test_sales_copy_long_text_routes_to_llama(monkeypatch):
|
||
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
|
||
from services.llm_model_router import select_model
|
||
|
||
result = select_model(
|
||
caller='sales_copy',
|
||
context={'expected_length': 200},
|
||
default='llama3.1:8b',
|
||
)
|
||
assert result == 'llama3.1:8b'
|
||
|
||
|
||
def test_sales_copy_no_length_falls_back_to_default(monkeypatch):
|
||
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
|
||
from services.llm_model_router import select_model
|
||
|
||
# 沒給 expected_length → 規則 1 不觸發 → 規則 2 always True → 回 llama3.1:8b
|
||
result = select_model(
|
||
caller='sales_copy',
|
||
context={},
|
||
default='llama3.1:8b',
|
||
)
|
||
assert result == 'llama3.1:8b'
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# T3: Hermes 競價(簡單 vs 複雜 SKU)
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_hermes_simple_routes_to_hermes3(monkeypatch):
|
||
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
|
||
from services.llm_model_router import select_model
|
||
|
||
result = select_model(
|
||
caller='hermes_analyst',
|
||
context={'max_gap_pct': 5.2, 'min_sales_delta': -10.0},
|
||
default='hermes3:latest',
|
||
)
|
||
assert result == 'hermes3:latest'
|
||
|
||
|
||
def test_hermes_high_gap_routes_to_qwen3(monkeypatch):
|
||
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
|
||
from services.llm_model_router import select_model
|
||
|
||
# gap > 20% → 升 qwen3:14b
|
||
result = select_model(
|
||
caller='hermes_analyst',
|
||
context={'max_gap_pct': 25.0, 'min_sales_delta': -5.0},
|
||
default='hermes3:latest',
|
||
)
|
||
assert result == 'qwen3:14b'
|
||
|
||
|
||
def test_hermes_sales_crash_routes_to_qwen3(monkeypatch):
|
||
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
|
||
from services.llm_model_router import select_model
|
||
|
||
# 銷量 < -50% → 升 qwen3:14b
|
||
result = select_model(
|
||
caller='hermes_analyst',
|
||
context={'max_gap_pct': 5.0, 'min_sales_delta': -60.0},
|
||
default='hermes3:latest',
|
||
)
|
||
assert result == 'qwen3:14b'
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# T4: AiderHeal(簡單 vs 重構)
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_aider_heal_small_diff_routes_to_7b(monkeypatch):
|
||
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
|
||
from services.llm_model_router import select_model
|
||
|
||
result = select_model(
|
||
caller='aider_heal',
|
||
context={'diff_lines': 50},
|
||
default='qwen2.5-coder:7b',
|
||
)
|
||
assert result == 'qwen2.5-coder:7b'
|
||
|
||
|
||
def test_aider_heal_large_refactor_routes_to_32b(monkeypatch):
|
||
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
|
||
from services.llm_model_router import select_model
|
||
|
||
# diff > 200 行 → 32b 重構級
|
||
result = select_model(
|
||
caller='aider_heal',
|
||
context={'diff_lines': 350},
|
||
default='qwen2.5-coder:7b',
|
||
)
|
||
assert result == 'qwen2.5-coder:32b'
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# T5: PPT vision(主備援)
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_ppt_vision_normal_routes_to_minicpm(monkeypatch):
|
||
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
|
||
from services.llm_model_router import select_model
|
||
|
||
result = select_model(
|
||
caller='ppt_vision',
|
||
context={},
|
||
default='minicpm-v:latest',
|
||
)
|
||
assert result == 'minicpm-v:latest'
|
||
|
||
|
||
def test_ppt_vision_minicpm_unhealthy_routes_to_llava(monkeypatch):
|
||
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
|
||
from services.llm_model_router import select_model
|
||
|
||
result = select_model(
|
||
caller='ppt_vision',
|
||
context={'minicpm_unhealthy': True},
|
||
default='minicpm-v:latest',
|
||
)
|
||
assert result == 'llava:latest'
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# T6: EA engine(推理需求 → deepseek-r1)
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_ea_engine_no_cot_returns_default(monkeypatch):
|
||
"""規則命中但 model_name=None → 回 default(caller 用既有 Gemini)"""
|
||
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
|
||
from services.llm_model_router import select_model
|
||
|
||
result = select_model(
|
||
caller='ea_engine',
|
||
context={'require_chain_of_thought': False},
|
||
default='gemini-2.0-flash',
|
||
)
|
||
assert result == 'gemini-2.0-flash'
|
||
|
||
|
||
def test_ea_engine_cot_routes_to_deepseek_r1(monkeypatch):
|
||
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
|
||
from services.llm_model_router import select_model
|
||
|
||
result = select_model(
|
||
caller='ea_engine',
|
||
context={'require_chain_of_thought': True},
|
||
default='gemini-2.0-flash',
|
||
)
|
||
assert result == 'deepseek-r1:14b'
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# T7: 規則例外不阻擋(容錯)
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_predicate_exception_skipped_to_next_rule(monkeypatch):
|
||
"""predicate 拋例外應 skip 到下一條(不 raise 給 caller)"""
|
||
monkeypatch.setenv('MODEL_ROUTER_ENABLED', 'true')
|
||
from services.llm_model_router import select_model
|
||
|
||
# context 給非數字會讓 int() 拋例外
|
||
# 規則 1 期待 expected_length 可 int 化;給 'abc' 會炸
|
||
# 但規則應 catch + skip 到規則 2 (always True → llama3.1:8b)
|
||
result = select_model(
|
||
caller='sales_copy',
|
||
context={'expected_length': 'abc'}, # 故意給壞值
|
||
default='llama3.1:8b',
|
||
)
|
||
# 結果:規則 1 失敗(int('abc') raise)→ skip → 規則 2 命中 → 'llama3.1:8b'
|
||
assert result == 'llama3.1:8b'
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# T8: utility 函數
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_list_routes_for_known_caller():
|
||
from services.llm_model_router import list_routes_for_caller
|
||
|
||
sales_routes = list_routes_for_caller('sales_copy')
|
||
assert 'gemma3:4b' in sales_routes
|
||
assert 'llama3.1:8b' in sales_routes
|
||
|
||
|
||
def test_list_routes_for_unknown_caller():
|
||
from services.llm_model_router import list_routes_for_caller
|
||
|
||
assert list_routes_for_caller('nonexistent') == []
|
||
|
||
|
||
def test_all_callers_with_routes():
|
||
from services.llm_model_router import all_callers_with_routes
|
||
|
||
callers = all_callers_with_routes()
|
||
expected = {'sales_copy', 'hermes_analyst', 'aider_heal',
|
||
'openclaw_qa', 'ppt_vision', 'ea_engine'}
|
||
assert expected.issubset(set(callers))
|