Files
ewoooc/services/anthropic_service.py
OoO 943de8466c
Some checks failed
CD Pipeline / deploy (push) Has been cancelled
feat(p7): Anthropic SDK + Claude Opus 4.7 接 Code Review (feature flag OFF)
Operation Ollama-First v5.0 / Phase 7 Frontier 升級

services/anthropic_service.py (新檔, 226 行)
- AnthropicService 包裝 + ClaudeResponse dataclass
- Ephemeral prompt cache 5 分鐘 TTL(重複 system_prompt 省 90% 成本)
- usage 解析 input/output/cache_creation/cache_read 四欄位
- ANTHROPIC_API_KEY 未設或 SDK 缺失時 is_available()=False 靜默退化

code_review_pipeline_service.py — _openclaw_assess 加 L1 Claude 分支
- CODE_REVIEW_USE_CLAUDE flag (預設 OFF,等 ANTHROPIC_API_KEY 設定後翻 ON)
- 路由:Claude Opus 4.7 (Arena code Elo 1548) → Gemini → ElephantAlpha 三層
- request_id 串鏈不變

ai_call_logger.py COST_TABLE 補 3 個 Claude 模型:
- claude-opus-4-7:    $15/$75 per M tokens (程式碼 #1)
- claude-sonnet-4-6:  $3/$15  per M tokens (agentic 平衡)
- claude-haiku-4-5:   $0.8/$4 per M tokens (輕量快速)

requirements.txt: 加 anthropic>=0.40.0
.env.example: 加 ANTHROPIC_API_KEY / CODE_REVIEW_USE_CLAUDE / CLAUDE_MODEL

52 unit tests 全綠(22 logger + 18 anthropic + 5 routing + 7 security)

啟用步驟(待統帥手動):
  1. .env 加 ANTHROPIC_API_KEY=sk-ant-...
  2. CODE_REVIEW_USE_CLAUDE=true + restart momo-app
  3. 觀察 ai_calls.cache_read_tokens > 0 確認 cache 生效

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 23:31:30 +08:00

229 lines
8.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
services/anthropic_service.py
Operation Ollama-First v5.0 / Phase 7 — Anthropic Claude API 包裝
模型對照2026-05 市場):
- claude-opus-4-7: 程式碼 #1Arena Elo 1548200K context$15/$75 per 1M tokens
- claude-sonnet-4-6: agentic 平衡型200K context$3/$15 per 1M tokens
- claude-haiku-4-5: 輕量快速200K context$0.8/$4 per 1M tokens
設計原則:
1. SDK 包裝層;介面與 services/gemini_service.py 對齊generate / check_connection
2. 自動 prompt cache5 分鐘 ephemeral TTL重複 system_prompt 省 ~90% 成本)
3. usage 完整回傳input_tokens / output_tokens / cache_creation_input_tokens / cache_read_input_tokens
4. 主流程不爆:失敗回 ClaudeResponse(success=False),由呼叫端決定 fallback 路徑
5. feature flag 控制由呼叫端負責(如 code_review 的 CODE_REVIEW_USE_CLAUDE本模組純 SDK 包裝
"""
from __future__ import annotations
import logging
import os
import time
from dataclasses import dataclass
from typing import Optional
logger = logging.getLogger(__name__)
# 環境參數 — runtime read避免 import-time freeze 影響部署切換)
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY', '')
DEFAULT_MODEL = os.getenv('CLAUDE_MODEL', 'claude-opus-4-7')
TIMEOUT = int(os.getenv('CLAUDE_TIMEOUT', '120'))
@dataclass
class ClaudeResponse:
"""Claude API 回應結構(與 GeminiResponse 對齊風格)"""
success: bool
content: str
model: str
input_tokens: int = 0
output_tokens: int = 0
cache_creation_tokens: int = 0
cache_read_tokens: int = 0
duration_ms: int = 0
error: Optional[str] = None
@property
def cache_hit(self) -> bool:
"""是否命中 prompt cachecache_read_input_tokens > 0 即視為 hit"""
return self.cache_read_tokens > 0
@property
def total_tokens(self) -> int:
return self.input_tokens + self.output_tokens
class AnthropicService:
"""Claude API 包裝 — 支援 prompt cache + 結構化 usage 回傳"""
def __init__(self, model: str = None, api_key: str = None):
"""
Args:
model: 預設模型,預設讀 CLAUDE_MODEL envclaude-opus-4-7
api_key: API key預設讀 ANTHROPIC_API_KEY env
"""
self.model = model or DEFAULT_MODEL
self.api_key = api_key or ANTHROPIC_API_KEY
self._client = None
self._init_client()
def _init_client(self) -> None:
"""初始化 SDK無 API key 或 SDK 未安裝時靜默退化is_available()→False"""
if not self.api_key:
logger.info("[Anthropic] ANTHROPIC_API_KEY 未設定service 不可用")
return
try:
import anthropic
self._client = anthropic.Anthropic(api_key=self.api_key)
logger.info("[Anthropic] SDK 初始化成功,預設模型=%s", self.model)
except ImportError:
logger.error("[Anthropic] SDK 未安裝pip install anthropic>=0.40.0")
except Exception as e:
logger.error("[Anthropic] SDK 初始化失敗: %s", e)
def is_available(self) -> bool:
"""SDK 是否就緒可呼叫API key 有設且 client 初始化成功)"""
return self._client is not None
def generate(
self,
prompt: str,
system_prompt: Optional[str] = None,
model: Optional[str] = None,
max_tokens: int = 4096,
temperature: float = 0.3,
cache_system: bool = True,
timeout: Optional[int] = None,
) -> ClaudeResponse:
"""
呼叫 Claude API 生成內容。
Args:
prompt: user prompt
system_prompt: system instruction建議放穩定不變的指令搭配 cache_system=True 省成本)
model: 模型名(預設用 self.model
max_tokens: 輸出上限Claude 4 系列皆 ≥ 8192 OK
temperature: 0.0-1.0code review 建議 0.2,文案 0.7
cache_system: True 時 system_prompt 加 ephemeral cache_control5 分鐘 TTL
timeout: 自訂 timeout預設 CLAUDE_TIMEOUT env
Returns:
ClaudeResponse永遠回 dataclass失敗時 success=False + error 訊息(不 raise
"""
model_name = model or self.model
request_timeout = timeout if timeout is not None else TIMEOUT
start = time.monotonic()
if not self._client:
return ClaudeResponse(
success=False, content="", model=model_name,
error="anthropic client not initialized (check ANTHROPIC_API_KEY)",
)
try:
messages = [{"role": "user", "content": prompt}]
kwargs = {
"model": model_name,
"max_tokens": max_tokens,
"temperature": temperature,
"messages": messages,
"timeout": request_timeout,
}
if system_prompt:
if cache_system:
# Anthropic ephemeral cache5 分鐘 TTL重複 system_prompt 省 90% 成本
kwargs["system"] = [
{
"type": "text",
"text": system_prompt,
"cache_control": {"type": "ephemeral"},
}
]
else:
kwargs["system"] = system_prompt
resp = self._client.messages.create(**kwargs)
# 解析回應內容(多個 ContentBlock 串接)
content_parts = []
for block in (resp.content or []):
text = getattr(block, 'text', None)
if text:
content_parts.append(text)
content = "\n".join(content_parts)
usage = getattr(resp, 'usage', None)
input_tokens = int(getattr(usage, 'input_tokens', 0) or 0) if usage else 0
output_tokens = int(getattr(usage, 'output_tokens', 0) or 0) if usage else 0
cache_creation = int(getattr(usage, 'cache_creation_input_tokens', 0) or 0) if usage else 0
cache_read = int(getattr(usage, 'cache_read_input_tokens', 0) or 0) if usage else 0
duration_ms = int((time.monotonic() - start) * 1000)
logger.info(
"[Anthropic] generate ok model=%s tokens=%d/%d cache=%d/%d duration=%dms",
model_name, input_tokens, output_tokens, cache_creation, cache_read, duration_ms,
)
return ClaudeResponse(
success=True,
content=content,
model=getattr(resp, 'model', model_name) or model_name,
input_tokens=input_tokens,
output_tokens=output_tokens,
cache_creation_tokens=cache_creation,
cache_read_tokens=cache_read,
duration_ms=duration_ms,
)
except Exception as e:
duration_ms = int((time.monotonic() - start) * 1000)
err_msg = f"{type(e).__name__}: {str(e)[:300]}"
logger.warning("[Anthropic] generate failed model=%s duration=%dms err=%s",
model_name, duration_ms, err_msg)
return ClaudeResponse(
success=False, content="", model=model_name,
duration_ms=duration_ms, error=err_msg,
)
def check_connection(self) -> bool:
"""輕量連線檢查:發 1 token 探測 message。不可用時回 False不 raise。"""
if not self._client:
return False
try:
r = self.generate(
prompt="ping",
max_tokens=10,
temperature=0.0,
cache_system=False,
timeout=15,
)
return r.success
except Exception as e:
logger.warning("[Anthropic] check_connection failed: %s", e)
return False
# 全域單例(與 gemini_service 模式對齊)
anthropic_service = AnthropicService()
if __name__ == "__main__":
# 手動煙霧測試(需設 ANTHROPIC_API_KEY
logging.basicConfig(level=logging.INFO)
svc = AnthropicService()
print(f"is_available: {svc.is_available()}")
if svc.is_available():
r = svc.generate(
prompt="用一句話介紹 Python。",
system_prompt="你是繁體中文助手。",
max_tokens=100,
)
print(f"success={r.success} tokens={r.input_tokens}/{r.output_tokens} "
f"cache={r.cache_creation_tokens}/{r.cache_read_tokens} duration={r.duration_ms}ms")
print(r.content if r.success else r.error)