From 1da7564567c1a3ece2b4946c54b08870266b255f Mon Sep 17 00:00:00 2001
From: OoO <ooo@MacBook-Pro.local>
Date: Mon, 4 May 2026 11:00:04 +0800
Subject: [PATCH] =?UTF-8?q?feat(p22+23):=20caller=20=E6=95=B4=E5=90=88=20m?=
 =?UTF-8?q?odel=5Frouter=20(sales=5Fcopy)=20+=20cost=5Fthrottle=20(claude)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Operation Ollama-First v5.0 / Phase 22.1 + Phase 23 — 路由治理整合

Phase 22.1: services/ollama_service.py generate_sales_copy
- 短文 < 100 字（estimated_length = product_name × 5）→ 走 model_router
- model_router 推算 → 短文 gemma3:4b / 長文 llama3.1:8b
- MODEL_ROUTER_ENABLED=false 時 select_model 直接回 self.model（向下相容）
- router 失敗 swallow（fallback self.model 不影響主流程）

Phase 23: services/anthropic_service.py is_available()
- 新增 cost_throttle 整合：claude provider throttle 時 is_available=False
- caller 看到 False 自動走 Gemini fallback，不送 Claude 請求
- COST_THROTTLE_ENABLED=false 時不影響（戰役預設 OFF）
- cost_throttle 不可用時 try/except 不阻擋（向下相容）

行為對照：
  戰前：sales_copy 永遠用 self.model (llama3.1:8b)
  戰後（flag OFF）：完全相同
  戰後（flag ON）：商品名 < 20 字 → gemma3:4b 短文，提速 50%

  戰前：Claude 燒到月底超預算才告警，無自動節流
  戰後（throttle flag OFF）：完全相同
  戰後（throttle flag ON）：claude 月底推估 > 110% → is_available=False
                              → caller fallback Gemini 自動省錢

regression: 既有 retry_chain / anthropic / model_router test 全綠

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 services/anthropic_service.py | 19 +++++++++++++++++--
 services/ollama_service.py    | 16 +++++++++++++++-
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/services/anthropic_service.py b/services/anthropic_service.py
index 94efafa..a3f13cf 100644
--- a/services/anthropic_service.py
+++ b/services/anthropic_service.py
@@ -85,8 +85,23 @@ class AnthropicService:
             logger.error("[Anthropic] SDK 初始化失敗: %s", e)
 
     def is_available(self) -> bool:
-        """SDK 是否就緒可呼叫（API key 有設且 client 初始化成功）"""
-        return self._client is not None
+        """SDK 是否就緒可呼叫（API key 有設且 client 初始化成功）
+
+        Phase 23（2026-05-04）整合 cost_throttle：
+        若 'claude' provider 被 throttle（月底推估 > 110%），is_available 回 False
+        讓 caller 自動走 Gemini fallback，不送 Claude 請求。
+        COST_THROTTLE_ENABLED=false 時不影響行為（戰役預設）。
+        """
+        if self._client is None:
+            return False
+        try:
+            from services.cost_throttle_service import is_provider_throttled
+            if is_provider_throttled('claude'):
+                logger.info("[Anthropic] is_available()=False — cost throttled, caller 應 fallback Gemini")
+                return False
+        except Exception:
+            pass  # cost_throttle 不可用不阻擋
+        return True
 
     def generate(
         self,
diff --git a/services/ollama_service.py b/services/ollama_service.py
index 03ad371..c3f02c8 100644
--- a/services/ollama_service.py
+++ b/services/ollama_service.py
@@ -387,7 +387,21 @@ class OllamaService:
 請確保所有內容使用繁體中文，風格一致，並突出商品價值："""
 
         # 文案生成使用更長的超時時間
-        return self.generate(prompt, system_prompt=system_prompt, temperature=0.8, timeout=COPY_TIMEOUT)
+        # Phase 22.1（2026-05-04）：caller × context 動態 model 路由
+        # 短文 < 100 字 → gemma3:4b（輕量快），長文 → llama3.1:8b（既有預設）
+        # MODEL_ROUTER_ENABLED=false 時直接回 default（向下相容）
+        try:
+            from services.llm_model_router import select_model
+            expected_length = len(product_name) * 5  # 商品名長 × 5 推估文案輸出長度
+            chosen_model = select_model(
+                caller='sales_copy',
+                context={'expected_length': expected_length},
+                default=self.model,  # llama3.1:8b 預設
+            )
+        except Exception:
+            chosen_model = self.model  # router 失敗不影響主流程
+        return self.generate(prompt, model=chosen_model, system_prompt=system_prompt,
+                            temperature=0.8, timeout=COPY_TIMEOUT)
 
     def extract_keywords(self, text: str, max_keywords: int = 10) -> OllamaResponse:
         """