Files
ewoooc/services/anthropic_service.py
OoO 0a75d11a28
All checks were successful
CD Pipeline / deploy (push) Successful in 59s
記錄 Claude 成本節流檢查失敗
2026-05-13 10:03:13 +08:00

244 lines
9.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
services/anthropic_service.py
Operation Ollama-First v5.0 / Phase 7 — Anthropic Claude API 包裝
模型對照2026-05 市場):
- claude-opus-4-7: 程式碼 #1Arena Elo 1548200K context$15/$75 per 1M tokens
- claude-sonnet-4-6: agentic 平衡型200K context$3/$15 per 1M tokens
- claude-haiku-4-5: 輕量快速200K context$0.8/$4 per 1M tokens
設計原則:
1. SDK 包裝層;介面與 services/gemini_service.py 對齊generate / check_connection
2. 自動 prompt cache5 分鐘 ephemeral TTL重複 system_prompt 省 ~90% 成本)
3. usage 完整回傳input_tokens / output_tokens / cache_creation_input_tokens / cache_read_input_tokens
4. 主流程不爆:失敗回 ClaudeResponse(success=False),由呼叫端決定 fallback 路徑
5. feature flag 控制由呼叫端負責(如 code_review 的 CODE_REVIEW_USE_CLAUDE本模組純 SDK 包裝
"""
from __future__ import annotations
import logging
import os
import time
from dataclasses import dataclass
from typing import Optional
logger = logging.getLogger(__name__)
# 環境參數 — runtime read避免 import-time freeze 影響部署切換)
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY', '')
DEFAULT_MODEL = os.getenv('CLAUDE_MODEL', 'claude-opus-4-7')
TIMEOUT = int(os.getenv('CLAUDE_TIMEOUT', '120'))
@dataclass
class ClaudeResponse:
"""Claude API 回應結構(與 GeminiResponse 對齊風格)"""
success: bool
content: str
model: str
input_tokens: int = 0
output_tokens: int = 0
cache_creation_tokens: int = 0
cache_read_tokens: int = 0
duration_ms: int = 0
error: Optional[str] = None
@property
def cache_hit(self) -> bool:
"""是否命中 prompt cachecache_read_input_tokens > 0 即視為 hit"""
return self.cache_read_tokens > 0
@property
def total_tokens(self) -> int:
return self.input_tokens + self.output_tokens
class AnthropicService:
"""Claude API 包裝 — 支援 prompt cache + 結構化 usage 回傳"""
def __init__(self, model: str = None, api_key: str = None):
"""
Args:
model: 預設模型,預設讀 CLAUDE_MODEL envclaude-opus-4-7
api_key: API key預設讀 ANTHROPIC_API_KEY env
"""
self.model = model or DEFAULT_MODEL
self.api_key = api_key or ANTHROPIC_API_KEY
self._client = None
self._init_client()
def _init_client(self) -> None:
"""初始化 SDK無 API key 或 SDK 未安裝時靜默退化is_available()→False"""
if not self.api_key:
logger.info("[Anthropic] ANTHROPIC_API_KEY 未設定service 不可用")
return
try:
import anthropic
self._client = anthropic.Anthropic(api_key=self.api_key)
logger.info("[Anthropic] SDK 初始化成功,預設模型=%s", self.model)
except ImportError:
logger.error("[Anthropic] SDK 未安裝pip install anthropic>=0.40.0")
except Exception as e:
logger.error("[Anthropic] SDK 初始化失敗: %s", e)
def is_available(self) -> bool:
"""SDK 是否就緒可呼叫API key 有設且 client 初始化成功)
Phase 232026-05-04整合 cost_throttle
'claude' provider 被 throttle月底推估 > 110%is_available 回 False
讓 caller 自動走 Gemini fallback不送 Claude 請求。
COST_THROTTLE_ENABLED=false 時不影響行為(戰役預設)。
"""
if self._client is None:
return False
try:
from services.cost_throttle_service import is_provider_throttled
if is_provider_throttled('claude'):
logger.info("[Anthropic] is_available()=False — cost throttled, caller 應 fallback Gemini")
return False
except Exception:
logger.warning("[Anthropic] cost_throttle check failed; continuing as available", exc_info=True)
return True
def generate(
self,
prompt: str,
system_prompt: Optional[str] = None,
model: Optional[str] = None,
max_tokens: int = 4096,
temperature: float = 0.3,
cache_system: bool = True,
timeout: Optional[int] = None,
) -> ClaudeResponse:
"""
呼叫 Claude API 生成內容。
Args:
prompt: user prompt
system_prompt: system instruction建議放穩定不變的指令搭配 cache_system=True 省成本)
model: 模型名(預設用 self.model
max_tokens: 輸出上限Claude 4 系列皆 ≥ 8192 OK
temperature: 0.0-1.0code review 建議 0.2,文案 0.7
cache_system: True 時 system_prompt 加 ephemeral cache_control5 分鐘 TTL
timeout: 自訂 timeout預設 CLAUDE_TIMEOUT env
Returns:
ClaudeResponse永遠回 dataclass失敗時 success=False + error 訊息(不 raise
"""
model_name = model or self.model
request_timeout = timeout if timeout is not None else TIMEOUT
start = time.monotonic()
if not self._client:
return ClaudeResponse(
success=False, content="", model=model_name,
error="anthropic client not initialized (check ANTHROPIC_API_KEY)",
)
try:
messages = [{"role": "user", "content": prompt}]
kwargs = {
"model": model_name,
"max_tokens": max_tokens,
"temperature": temperature,
"messages": messages,
"timeout": request_timeout,
}
if system_prompt:
if cache_system:
# Anthropic ephemeral cache5 分鐘 TTL重複 system_prompt 省 90% 成本
kwargs["system"] = [
{
"type": "text",
"text": system_prompt,
"cache_control": {"type": "ephemeral"},
}
]
else:
kwargs["system"] = system_prompt
resp = self._client.messages.create(**kwargs)
# 解析回應內容(多個 ContentBlock 串接)
content_parts = []
for block in (resp.content or []):
text = getattr(block, 'text', None)
if text:
content_parts.append(text)
content = "\n".join(content_parts)
usage = getattr(resp, 'usage', None)
input_tokens = int(getattr(usage, 'input_tokens', 0) or 0) if usage else 0
output_tokens = int(getattr(usage, 'output_tokens', 0) or 0) if usage else 0
cache_creation = int(getattr(usage, 'cache_creation_input_tokens', 0) or 0) if usage else 0
cache_read = int(getattr(usage, 'cache_read_input_tokens', 0) or 0) if usage else 0
duration_ms = int((time.monotonic() - start) * 1000)
logger.info(
"[Anthropic] generate ok model=%s tokens=%d/%d cache=%d/%d duration=%dms",
model_name, input_tokens, output_tokens, cache_creation, cache_read, duration_ms,
)
return ClaudeResponse(
success=True,
content=content,
model=getattr(resp, 'model', model_name) or model_name,
input_tokens=input_tokens,
output_tokens=output_tokens,
cache_creation_tokens=cache_creation,
cache_read_tokens=cache_read,
duration_ms=duration_ms,
)
except Exception as e:
duration_ms = int((time.monotonic() - start) * 1000)
err_msg = f"{type(e).__name__}: {str(e)[:300]}"
logger.warning("[Anthropic] generate failed model=%s duration=%dms err=%s",
model_name, duration_ms, err_msg)
return ClaudeResponse(
success=False, content="", model=model_name,
duration_ms=duration_ms, error=err_msg,
)
def check_connection(self) -> bool:
"""輕量連線檢查:發 1 token 探測 message。不可用時回 False不 raise。"""
if not self._client:
return False
try:
r = self.generate(
prompt="ping",
max_tokens=10,
temperature=0.0,
cache_system=False,
timeout=15,
)
return r.success
except Exception as e:
logger.warning("[Anthropic] check_connection failed: %s", e)
return False
# 全域單例(與 gemini_service 模式對齊)
anthropic_service = AnthropicService()
if __name__ == "__main__":
# 手動煙霧測試(需設 ANTHROPIC_API_KEY
logging.basicConfig(level=logging.INFO)
svc = AnthropicService()
print(f"is_available: {svc.is_available()}")
if svc.is_available():
r = svc.generate(
prompt="用一句話介紹 Python。",
system_prompt="你是繁體中文助手。",
max_tokens=100,
)
print(f"success={r.success} tokens={r.input_tokens}/{r.output_tokens} "
f"cache={r.cache_creation_tokens}/{r.cache_read_tokens} duration={r.duration_ms}ms")
print(r.content if r.success else r.error)