171 lines
6.1 KiB
Python
171 lines
6.1 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
services/llm_model_router.py
|
||
Operation Ollama-First v5.0 / Phase 21 — Caller × Context 動態 Model Router
|
||
|
||
設計原則:
|
||
- 不同 caller 在不同 context 下動態選擇最佳 model(同 provider)
|
||
例:sales_copy 短文 → gemma3:4b / 長文 → llama3.1:8b / Hermes 複雜 SKU → qwen3:14b
|
||
- 純規則引擎,零 LLM 成本
|
||
- caller 透過 select_model(caller, context) 取 model name
|
||
- feature flag MODEL_ROUTER_ENABLED 預設 OFF(不影響既有預設值)
|
||
- 失敗 fallback:規則沒命中 → 回 caller 預設 model(向下相容)
|
||
|
||
對應 ADR-028 caller 白名單 + ADR-034 動態路由(待寫)。
|
||
GCP Primary + Secondary 已備齊 10 模型支援所有路由規則。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
import os
|
||
import logging
|
||
from typing import Dict, Any, Optional, Callable
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
def is_model_router_enabled() -> bool:
|
||
"""Runtime check(避免 import-time freeze)"""
|
||
return os.getenv('MODEL_ROUTER_ENABLED', 'false').strip().lower() in ('true', '1', 'yes', 'on')
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Routing 規則(ADR-034 規格)
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# 結構:caller → list of (predicate(context) → model_name) tuples
|
||
# 取第一個 predicate 回 True 的 model;都不命中 → None(caller 用預設)
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
|
||
ROUTING_RULES: Dict[str, list] = {
|
||
# Sales Copy: 短文走 gemma3:4b(輕量快),長文走 llama3.1:8b
|
||
'sales_copy': [
|
||
(lambda ctx: int(ctx.get('expected_length', 0) or 0) > 0
|
||
and int(ctx.get('expected_length', 0)) < 100,
|
||
'gemma3:4b'),
|
||
(lambda ctx: True, # 預設
|
||
'llama3.1:8b'),
|
||
],
|
||
|
||
# Hermes 競價:簡單比價走 hermes3,複雜分析(gap > 20% 或銷量大跌)升 qwen3:14b
|
||
'hermes_analyst': [
|
||
(lambda ctx: float(ctx.get('max_gap_pct', 0) or 0) > 20
|
||
or float(ctx.get('min_sales_delta', 0) or 0) < -50,
|
||
'qwen3:14b'),
|
||
(lambda ctx: True,
|
||
'hermes3:latest'),
|
||
],
|
||
|
||
# AiderHeal: 簡單 syntax fix 走 qwen2.5-coder:7b,重構級(diff > 200 行)升 32b
|
||
'aider_heal': [
|
||
(lambda ctx: int(ctx.get('diff_lines', 0) or 0) > 200,
|
||
'qwen2.5-coder:32b'),
|
||
(lambda ctx: True,
|
||
'qwen2.5-coder:7b'),
|
||
],
|
||
|
||
# OpenClaw Q&A: 簡單問題走 qwen2.5:7b-instruct,複雜走 qwen3:14b
|
||
'openclaw_qa': [
|
||
(lambda ctx: int(ctx.get('query_length', 0) or 0) > 200
|
||
or bool(ctx.get('multi_turn', False)),
|
||
'qwen3:14b'),
|
||
(lambda ctx: True,
|
||
'qwen2.5:7b-instruct'),
|
||
],
|
||
|
||
# PPT vision: 主用 minicpm-v,主機標 unhealthy 時切 llava
|
||
'ppt_vision': [
|
||
(lambda ctx: bool(ctx.get('minicpm_unhealthy', False)),
|
||
'llava:latest'),
|
||
(lambda ctx: True,
|
||
'minicpm-v:latest'),
|
||
],
|
||
|
||
# 推理增強場景(EA HITL 戰略決策;Gemini 不可作為預設模型)
|
||
'ea_engine': [
|
||
(lambda ctx: bool(ctx.get('require_chain_of_thought', False)),
|
||
'deepseek-r1:14b'),
|
||
(lambda ctx: True,
|
||
'hermes3:latest'),
|
||
],
|
||
}
|
||
|
||
_CALLER_SAFE_DEFAULT_MODELS = {
|
||
'ea_engine': 'hermes3:latest',
|
||
}
|
||
|
||
|
||
def _sanitize_default_model(caller: str, default: Optional[str]) -> Optional[str]:
|
||
"""Model router must not hand Gemini back as an Ollama-route default."""
|
||
if default and default.strip().lower().startswith('gemini'):
|
||
safe_default = _CALLER_SAFE_DEFAULT_MODELS.get(caller)
|
||
if safe_default:
|
||
logger.warning(
|
||
"[ModelRouter] %s default=%s rejected; using %s",
|
||
caller,
|
||
default,
|
||
safe_default,
|
||
)
|
||
return safe_default
|
||
return default
|
||
|
||
|
||
def select_model(
|
||
caller: str,
|
||
context: Optional[Dict[str, Any]] = None,
|
||
default: Optional[str] = None,
|
||
) -> Optional[str]:
|
||
"""主入口:依 caller × context 選 model。
|
||
|
||
Args:
|
||
caller: 在 ROUTING_RULES key 內才路由;否則直接回 default
|
||
context: 路由判斷依據(如 expected_length / diff_lines / max_gap_pct)
|
||
default: caller 不在 rules 或所有 rule 都不命中時回傳
|
||
|
||
Returns:
|
||
model name 字串 / None(None 代表 caller 用既有預設)
|
||
|
||
flag OFF 時直接回 default(不評估規則,向下相容)
|
||
"""
|
||
default = _sanitize_default_model(caller, default)
|
||
|
||
if not is_model_router_enabled():
|
||
return default
|
||
|
||
if caller not in ROUTING_RULES:
|
||
return default
|
||
|
||
ctx = context or {}
|
||
for predicate, model_name in ROUTING_RULES[caller]:
|
||
try:
|
||
if predicate(ctx):
|
||
if model_name is None:
|
||
return default # 規則命中但要走預設
|
||
logger.debug("[ModelRouter] %s ctx=%s → %s", caller, ctx, model_name)
|
||
return model_name
|
||
except Exception as exc:
|
||
logger.warning("[ModelRouter] %s rule eval failed: %s", caller, exc)
|
||
continue
|
||
|
||
# 沒命中 → default
|
||
return default
|
||
|
||
|
||
def list_routes_for_caller(caller: str) -> list:
|
||
"""除錯:列出 caller 的所有路由規則 model"""
|
||
rules = ROUTING_RULES.get(caller, [])
|
||
return [model for _, model in rules]
|
||
|
||
|
||
def all_callers_with_routes() -> list:
|
||
"""所有有動態路由規則的 caller"""
|
||
return list(ROUTING_RULES.keys())
|
||
|
||
|
||
__all__ = [
|
||
'select_model',
|
||
'is_model_router_enabled',
|
||
'list_routes_for_caller',
|
||
'all_callers_with_routes',
|
||
'ROUTING_RULES',
|
||
]
|