From 1da7564567c1a3ece2b4946c54b08870266b255f Mon Sep 17 00:00:00 2001 From: OoO Date: Mon, 4 May 2026 11:00:04 +0800 Subject: [PATCH] =?UTF-8?q?feat(p22+23):=20caller=20=E6=95=B4=E5=90=88=20m?= =?UTF-8?q?odel=5Frouter=20(sales=5Fcopy)=20+=20cost=5Fthrottle=20(claude)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operation Ollama-First v5.0 / Phase 22.1 + Phase 23 — 路由治理整合 Phase 22.1: services/ollama_service.py generate_sales_copy - 短文 < 100 字(estimated_length = product_name × 5)→ 走 model_router - model_router 推算 → 短文 gemma3:4b / 長文 llama3.1:8b - MODEL_ROUTER_ENABLED=false 時 select_model 直接回 self.model(向下相容) - router 失敗 swallow(fallback self.model 不影響主流程) Phase 23: services/anthropic_service.py is_available() - 新增 cost_throttle 整合:claude provider throttle 時 is_available=False - caller 看到 False 自動走 Gemini fallback,不送 Claude 請求 - COST_THROTTLE_ENABLED=false 時不影響(戰役預設 OFF) - cost_throttle 不可用時 try/except 不阻擋(向下相容) 行為對照: 戰前:sales_copy 永遠用 self.model (llama3.1:8b) 戰後(flag OFF):完全相同 戰後(flag ON):商品名 < 20 字 → gemma3:4b 短文,提速 50% 戰前:Claude 燒到月底超預算才告警,無自動節流 戰後(throttle flag OFF):完全相同 戰後(throttle flag ON):claude 月底推估 > 110% → is_available=False → caller fallback Gemini 自動省錢 regression: 既有 retry_chain / anthropic / model_router test 全綠 Co-Authored-By: Claude Opus 4.7 (1M context) --- services/anthropic_service.py | 19 +++++++++++++++++-- services/ollama_service.py | 16 +++++++++++++++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/services/anthropic_service.py b/services/anthropic_service.py index 94efafa..a3f13cf 100644 --- a/services/anthropic_service.py +++ b/services/anthropic_service.py @@ -85,8 +85,23 @@ class AnthropicService: logger.error("[Anthropic] SDK 初始化失敗: %s", e) def is_available(self) -> bool: - """SDK 是否就緒可呼叫(API key 有設且 client 初始化成功)""" - return self._client is not None + """SDK 是否就緒可呼叫(API key 有設且 client 初始化成功) + + Phase 23(2026-05-04)整合 cost_throttle: + 若 'claude' provider 被 throttle(月底推估 > 110%),is_available 回 False + 讓 caller 自動走 Gemini fallback,不送 Claude 請求。 + COST_THROTTLE_ENABLED=false 時不影響行為(戰役預設)。 + """ + if self._client is None: + return False + try: + from services.cost_throttle_service import is_provider_throttled + if is_provider_throttled('claude'): + logger.info("[Anthropic] is_available()=False — cost throttled, caller 應 fallback Gemini") + return False + except Exception: + pass # cost_throttle 不可用不阻擋 + return True def generate( self, diff --git a/services/ollama_service.py b/services/ollama_service.py index 03ad371..c3f02c8 100644 --- a/services/ollama_service.py +++ b/services/ollama_service.py @@ -387,7 +387,21 @@ class OllamaService: 請確保所有內容使用繁體中文,風格一致,並突出商品價值:""" # 文案生成使用更長的超時時間 - return self.generate(prompt, system_prompt=system_prompt, temperature=0.8, timeout=COPY_TIMEOUT) + # Phase 22.1(2026-05-04):caller × context 動態 model 路由 + # 短文 < 100 字 → gemma3:4b(輕量快),長文 → llama3.1:8b(既有預設) + # MODEL_ROUTER_ENABLED=false 時直接回 default(向下相容) + try: + from services.llm_model_router import select_model + expected_length = len(product_name) * 5 # 商品名長 × 5 推估文案輸出長度 + chosen_model = select_model( + caller='sales_copy', + context={'expected_length': expected_length}, + default=self.model, # llama3.1:8b 預設 + ) + except Exception: + chosen_model = self.model # router 失敗不影響主流程 + return self.generate(prompt, model=chosen_model, system_prompt=system_prompt, + temperature=0.8, timeout=COPY_TIMEOUT) def extract_keywords(self, text: str, max_keywords: int = 10) -> OllamaResponse: """