ewoooc/services/llm_model_router.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
services/llm_model_router.py
Operation Ollama-First v5.0 / Phase 21 — Caller × Context 動態 Model Router

設計原則：
- 不同 caller 在不同 context 下動態選擇最佳 model（同 provider）
  例：sales_copy 短文 → gemma3:4b / 長文 → llama3.1:8b / Hermes 複雜 SKU → qwen3:14b
- 純規則引擎，零 LLM 成本
- caller 透過 select_model(caller, context) 取 model name
- feature flag MODEL_ROUTER_ENABLED 預設 OFF（不影響既有預設值）
- 失敗 fallback：規則沒命中 → 回 caller 預設 model（向下相容）

對應 ADR-028 caller 白名單 + ADR-034 動態路由（待寫）。
GCP Primary + Secondary 已備齊 10 模型支援所有路由規則。
"""

from __future__ import annotations
import os
import logging
from typing import Dict, Any, Optional, Callable

logger = logging.getLogger(__name__)


def is_model_router_enabled() -> bool:
    """Runtime check（避免 import-time freeze）"""
    return os.getenv('MODEL_ROUTER_ENABLED', 'false').strip().lower() in ('true', '1', 'yes', 'on')


# ─────────────────────────────────────────────────────────────────────────────
# Routing 規則（ADR-034 規格）
# ─────────────────────────────────────────────────────────────────────────────
# 結構：caller → list of (predicate(context) → model_name) tuples
# 取第一個 predicate 回 True 的 model；都不命中 → None（caller 用預設）
# ─────────────────────────────────────────────────────────────────────────────

ROUTING_RULES: Dict[str, list] = {
    # Sales Copy: 短文走 gemma3:4b（輕量快），長文走 llama3.1:8b
    'sales_copy': [
        (lambda ctx: int(ctx.get('expected_length', 0) or 0) > 0
                     and int(ctx.get('expected_length', 0)) < 100,
         'gemma3:4b'),
        (lambda ctx: True,  # 預設
         'llama3.1:8b'),
    ],

    # Hermes 競價：簡單比價走 hermes3，複雜分析（gap > 20% 或銷量大跌）升 qwen3:14b
    'hermes_analyst': [
        (lambda ctx: float(ctx.get('max_gap_pct', 0) or 0) > 20
                     or float(ctx.get('min_sales_delta', 0) or 0) < -50,
         'qwen3:14b'),
        (lambda ctx: True,
         'hermes3:latest'),
    ],

    # AiderHeal: 簡單 syntax fix 走 qwen2.5-coder:7b，重構級（diff > 200 行）升 32b
    'aider_heal': [
        (lambda ctx: int(ctx.get('diff_lines', 0) or 0) > 200,
         'qwen2.5-coder:32b'),
        (lambda ctx: True,
         'qwen2.5-coder:7b'),
    ],

    # OpenClaw Q&A: 簡單問題走 qwen2.5:7b-instruct，複雜走 qwen3:14b
    'openclaw_qa': [
        (lambda ctx: int(ctx.get('query_length', 0) or 0) > 200
                     or bool(ctx.get('multi_turn', False)),
         'qwen3:14b'),
        (lambda ctx: True,
         'qwen2.5:7b-instruct'),
    ],

    # PPT vision: 主用 minicpm-v，主機標 unhealthy 時切 llava
    'ppt_vision': [
        (lambda ctx: bool(ctx.get('minicpm_unhealthy', False)),
         'llava:latest'),
        (lambda ctx: True,
         'minicpm-v:latest'),
    ],

    # 推理增強場景（EA HITL 戰略決策；目前未啟用，預留）
    'ea_engine': [
        (lambda ctx: bool(ctx.get('require_chain_of_thought', False)),
         'deepseek-r1:14b'),
        (lambda ctx: True,
         None),  # None → caller 用預設（gemini-2.0-flash）
    ],
}


def select_model(
    caller: str,
    context: Optional[Dict[str, Any]] = None,
    default: Optional[str] = None,
) -> Optional[str]:
    """主入口：依 caller × context 選 model。

    Args:
        caller: 在 ROUTING_RULES key 內才路由；否則直接回 default
        context: 路由判斷依據（如 expected_length / diff_lines / max_gap_pct）
        default: caller 不在 rules 或所有 rule 都不命中時回傳

    Returns:
        model name 字串 / None（None 代表 caller 用既有預設）

    flag OFF 時直接回 default（不評估規則，向下相容）
    """
    if not is_model_router_enabled():
        return default

    if caller not in ROUTING_RULES:
        return default

    ctx = context or {}
    for predicate, model_name in ROUTING_RULES[caller]:
        try:
            if predicate(ctx):
                if model_name is None:
                    return default  # 規則命中但要走預設
                logger.debug("[ModelRouter] %s ctx=%s → %s", caller, ctx, model_name)
                return model_name
        except Exception as exc:
            logger.warning("[ModelRouter] %s rule eval failed: %s", caller, exc)
            continue

    # 沒命中 → default
    return default


def list_routes_for_caller(caller: str) -> list:
    """除錯：列出 caller 的所有路由規則 model"""
    rules = ROUTING_RULES.get(caller, [])
    return [model for _, model in rules]


def all_callers_with_routes() -> list:
    """所有有動態路由規則的 caller"""
    return list(ROUTING_RULES.keys())


__all__ = [
    'select_model',
    'is_model_router_enabled',
    'list_routes_for_caller',
    'all_callers_with_routes',
    'ROUTING_RULES',
]