244 lines
9.4 KiB
Python
244 lines
9.4 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
services/anthropic_service.py
|
||
Operation Ollama-First v5.0 / Phase 7 — Anthropic Claude API 包裝
|
||
|
||
模型對照(2026-05 市場):
|
||
- claude-opus-4-7: 程式碼 #1(Arena Elo 1548),200K context,$15/$75 per 1M tokens
|
||
- claude-sonnet-4-6: agentic 平衡型,200K context,$3/$15 per 1M tokens
|
||
- claude-haiku-4-5: 輕量快速,200K context,$0.8/$4 per 1M tokens
|
||
|
||
設計原則:
|
||
1. SDK 包裝層;介面與 services/gemini_service.py 對齊(generate / check_connection)
|
||
2. 自動 prompt cache(5 分鐘 ephemeral TTL,重複 system_prompt 省 ~90% 成本)
|
||
3. usage 完整回傳:input_tokens / output_tokens / cache_creation_input_tokens / cache_read_input_tokens
|
||
4. 主流程不爆:失敗回 ClaudeResponse(success=False),由呼叫端決定 fallback 路徑
|
||
5. feature flag 控制由呼叫端負責(如 code_review 的 CODE_REVIEW_USE_CLAUDE);本模組純 SDK 包裝
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
import os
|
||
import time
|
||
from dataclasses import dataclass
|
||
from typing import Optional
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# 環境參數 — runtime read(避免 import-time freeze 影響部署切換)
|
||
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY', '')
|
||
DEFAULT_MODEL = os.getenv('CLAUDE_MODEL', 'claude-opus-4-7')
|
||
TIMEOUT = int(os.getenv('CLAUDE_TIMEOUT', '120'))
|
||
|
||
|
||
@dataclass
|
||
class ClaudeResponse:
|
||
"""Claude API 回應結構(與 GeminiResponse 對齊風格)"""
|
||
success: bool
|
||
content: str
|
||
model: str
|
||
input_tokens: int = 0
|
||
output_tokens: int = 0
|
||
cache_creation_tokens: int = 0
|
||
cache_read_tokens: int = 0
|
||
duration_ms: int = 0
|
||
error: Optional[str] = None
|
||
|
||
@property
|
||
def cache_hit(self) -> bool:
|
||
"""是否命中 prompt cache(cache_read_input_tokens > 0 即視為 hit)"""
|
||
return self.cache_read_tokens > 0
|
||
|
||
@property
|
||
def total_tokens(self) -> int:
|
||
return self.input_tokens + self.output_tokens
|
||
|
||
|
||
class AnthropicService:
|
||
"""Claude API 包裝 — 支援 prompt cache + 結構化 usage 回傳"""
|
||
|
||
def __init__(self, model: str = None, api_key: str = None):
|
||
"""
|
||
Args:
|
||
model: 預設模型,預設讀 CLAUDE_MODEL env(claude-opus-4-7)
|
||
api_key: API key,預設讀 ANTHROPIC_API_KEY env
|
||
"""
|
||
self.model = model or DEFAULT_MODEL
|
||
self.api_key = api_key or ANTHROPIC_API_KEY
|
||
self._client = None
|
||
self._init_client()
|
||
|
||
def _init_client(self) -> None:
|
||
"""初始化 SDK;無 API key 或 SDK 未安裝時靜默退化(is_available()→False)"""
|
||
if not self.api_key:
|
||
logger.info("[Anthropic] ANTHROPIC_API_KEY 未設定,service 不可用")
|
||
return
|
||
try:
|
||
import anthropic
|
||
self._client = anthropic.Anthropic(api_key=self.api_key)
|
||
logger.info("[Anthropic] SDK 初始化成功,預設模型=%s", self.model)
|
||
except ImportError:
|
||
logger.error("[Anthropic] SDK 未安裝(pip install anthropic>=0.40.0)")
|
||
except Exception as e:
|
||
logger.error("[Anthropic] SDK 初始化失敗: %s", e)
|
||
|
||
def is_available(self) -> bool:
|
||
"""SDK 是否就緒可呼叫(API key 有設且 client 初始化成功)
|
||
|
||
Phase 23(2026-05-04)整合 cost_throttle:
|
||
若 'claude' provider 被 throttle(月底推估 > 110%),is_available 回 False
|
||
讓 caller 自動走 Gemini fallback,不送 Claude 請求。
|
||
COST_THROTTLE_ENABLED=false 時不影響行為(戰役預設)。
|
||
"""
|
||
if self._client is None:
|
||
return False
|
||
try:
|
||
from services.cost_throttle_service import is_provider_throttled
|
||
if is_provider_throttled('claude'):
|
||
logger.info("[Anthropic] is_available()=False — cost throttled, caller 應 fallback Gemini")
|
||
return False
|
||
except Exception:
|
||
logger.warning("[Anthropic] cost_throttle check failed; continuing as available", exc_info=True)
|
||
return True
|
||
|
||
def generate(
|
||
self,
|
||
prompt: str,
|
||
system_prompt: Optional[str] = None,
|
||
model: Optional[str] = None,
|
||
max_tokens: int = 4096,
|
||
temperature: float = 0.3,
|
||
cache_system: bool = True,
|
||
timeout: Optional[int] = None,
|
||
) -> ClaudeResponse:
|
||
"""
|
||
呼叫 Claude API 生成內容。
|
||
|
||
Args:
|
||
prompt: user prompt
|
||
system_prompt: system instruction(建議放穩定不變的指令,搭配 cache_system=True 省成本)
|
||
model: 模型名(預設用 self.model)
|
||
max_tokens: 輸出上限(Claude 4 系列皆 ≥ 8192 OK)
|
||
temperature: 0.0-1.0(code review 建議 0.2,文案 0.7)
|
||
cache_system: True 時 system_prompt 加 ephemeral cache_control(5 分鐘 TTL)
|
||
timeout: 自訂 timeout(秒),預設 CLAUDE_TIMEOUT env
|
||
|
||
Returns:
|
||
ClaudeResponse:永遠回 dataclass,失敗時 success=False + error 訊息(不 raise)
|
||
"""
|
||
model_name = model or self.model
|
||
request_timeout = timeout if timeout is not None else TIMEOUT
|
||
start = time.monotonic()
|
||
|
||
if not self._client:
|
||
return ClaudeResponse(
|
||
success=False, content="", model=model_name,
|
||
error="anthropic client not initialized (check ANTHROPIC_API_KEY)",
|
||
)
|
||
|
||
try:
|
||
messages = [{"role": "user", "content": prompt}]
|
||
|
||
kwargs = {
|
||
"model": model_name,
|
||
"max_tokens": max_tokens,
|
||
"temperature": temperature,
|
||
"messages": messages,
|
||
"timeout": request_timeout,
|
||
}
|
||
|
||
if system_prompt:
|
||
if cache_system:
|
||
# Anthropic ephemeral cache:5 分鐘 TTL,重複 system_prompt 省 90% 成本
|
||
kwargs["system"] = [
|
||
{
|
||
"type": "text",
|
||
"text": system_prompt,
|
||
"cache_control": {"type": "ephemeral"},
|
||
}
|
||
]
|
||
else:
|
||
kwargs["system"] = system_prompt
|
||
|
||
resp = self._client.messages.create(**kwargs)
|
||
|
||
# 解析回應內容(多個 ContentBlock 串接)
|
||
content_parts = []
|
||
for block in (resp.content or []):
|
||
text = getattr(block, 'text', None)
|
||
if text:
|
||
content_parts.append(text)
|
||
content = "\n".join(content_parts)
|
||
|
||
usage = getattr(resp, 'usage', None)
|
||
input_tokens = int(getattr(usage, 'input_tokens', 0) or 0) if usage else 0
|
||
output_tokens = int(getattr(usage, 'output_tokens', 0) or 0) if usage else 0
|
||
cache_creation = int(getattr(usage, 'cache_creation_input_tokens', 0) or 0) if usage else 0
|
||
cache_read = int(getattr(usage, 'cache_read_input_tokens', 0) or 0) if usage else 0
|
||
|
||
duration_ms = int((time.monotonic() - start) * 1000)
|
||
logger.info(
|
||
"[Anthropic] generate ok model=%s tokens=%d/%d cache=%d/%d duration=%dms",
|
||
model_name, input_tokens, output_tokens, cache_creation, cache_read, duration_ms,
|
||
)
|
||
|
||
return ClaudeResponse(
|
||
success=True,
|
||
content=content,
|
||
model=getattr(resp, 'model', model_name) or model_name,
|
||
input_tokens=input_tokens,
|
||
output_tokens=output_tokens,
|
||
cache_creation_tokens=cache_creation,
|
||
cache_read_tokens=cache_read,
|
||
duration_ms=duration_ms,
|
||
)
|
||
|
||
except Exception as e:
|
||
duration_ms = int((time.monotonic() - start) * 1000)
|
||
err_msg = f"{type(e).__name__}: {str(e)[:300]}"
|
||
logger.warning("[Anthropic] generate failed model=%s duration=%dms err=%s",
|
||
model_name, duration_ms, err_msg)
|
||
return ClaudeResponse(
|
||
success=False, content="", model=model_name,
|
||
duration_ms=duration_ms, error=err_msg,
|
||
)
|
||
|
||
def check_connection(self) -> bool:
|
||
"""輕量連線檢查:發 1 token 探測 message。不可用時回 False,不 raise。"""
|
||
if not self._client:
|
||
return False
|
||
try:
|
||
r = self.generate(
|
||
prompt="ping",
|
||
max_tokens=10,
|
||
temperature=0.0,
|
||
cache_system=False,
|
||
timeout=15,
|
||
)
|
||
return r.success
|
||
except Exception as e:
|
||
logger.warning("[Anthropic] check_connection failed: %s", e)
|
||
return False
|
||
|
||
|
||
# 全域單例(與 gemini_service 模式對齊)
|
||
anthropic_service = AnthropicService()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# 手動煙霧測試(需設 ANTHROPIC_API_KEY)
|
||
logging.basicConfig(level=logging.INFO)
|
||
svc = AnthropicService()
|
||
print(f"is_available: {svc.is_available()}")
|
||
if svc.is_available():
|
||
r = svc.generate(
|
||
prompt="用一句話介紹 Python。",
|
||
system_prompt="你是繁體中文助手。",
|
||
max_tokens=100,
|
||
)
|
||
print(f"success={r.success} tokens={r.input_tokens}/{r.output_tokens} "
|
||
f"cache={r.cache_creation_tokens}/{r.cache_read_tokens} duration={r.duration_ms}ms")
|
||
print(r.content if r.success else r.error)
|