feat(p7): Anthropic SDK + Claude Opus 4.7 接 Code Review (feature flag OFF)
Some checks failed
CD Pipeline / deploy (push) Has been cancelled

Operation Ollama-First v5.0 / Phase 7 Frontier 升級

services/anthropic_service.py (新檔, 226 行)
- AnthropicService 包裝 + ClaudeResponse dataclass
- Ephemeral prompt cache 5 分鐘 TTL(重複 system_prompt 省 90% 成本)
- usage 解析 input/output/cache_creation/cache_read 四欄位
- ANTHROPIC_API_KEY 未設或 SDK 缺失時 is_available()=False 靜默退化

code_review_pipeline_service.py — _openclaw_assess 加 L1 Claude 分支
- CODE_REVIEW_USE_CLAUDE flag (預設 OFF,等 ANTHROPIC_API_KEY 設定後翻 ON)
- 路由:Claude Opus 4.7 (Arena code Elo 1548) → Gemini → ElephantAlpha 三層
- request_id 串鏈不變

ai_call_logger.py COST_TABLE 補 3 個 Claude 模型:
- claude-opus-4-7:    $15/$75 per M tokens (程式碼 #1)
- claude-sonnet-4-6:  $3/$15  per M tokens (agentic 平衡)
- claude-haiku-4-5:   $0.8/$4 per M tokens (輕量快速)

requirements.txt: 加 anthropic>=0.40.0
.env.example: 加 ANTHROPIC_API_KEY / CODE_REVIEW_USE_CLAUDE / CLAUDE_MODEL

52 unit tests 全綠(22 logger + 18 anthropic + 5 routing + 7 security)

啟用步驟(待統帥手動):
  1. .env 加 ANTHROPIC_API_KEY=sk-ant-...
  2. CODE_REVIEW_USE_CLAUDE=true + restart momo-app
  3. 觀察 ai_calls.cache_read_tokens > 0 確認 cache 生效

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
OoO
2026-05-03 23:31:30 +08:00
parent bc4332d53f
commit 943de8466c
7 changed files with 855 additions and 8 deletions

View File

@@ -161,6 +161,16 @@ GEMINI_API_KEY=<change-me>
GEMINI_MODEL=gemini-1.5-flash
OPENCLAW_MODEL=gemini-2.5-flash-preview-05-20
# ── Anthropic Claude APIPhase 7 Frontier 升級)───────────────────────────
# 用途Code Review 升級到 Claude Opus 4.7(程式碼能力 #1Arena Elo 1548
# 取得方式https://console.anthropic.com/settings/keys
# feature flag CODE_REVIEW_USE_CLAUDE 預設 false → 行為與 Phase 6 完全相同
# 啟用步驟:(1) 設 ANTHROPIC_API_KEY (2) CODE_REVIEW_USE_CLAUDE=true
ANTHROPIC_API_KEY=
CODE_REVIEW_USE_CLAUDE=false
CLAUDE_MODEL=claude-opus-4-7
CLAUDE_TIMEOUT=120
# Debug and Monitoring
ELEPHANT_ALPHA_DEBUG_MODE=false
ELEPHANT_ALPHA_METRICS_ENABLED=true

View File

@@ -17,6 +17,7 @@ google-auth-oauthlib
google-auth-httplib2
google-api-python-client
google-generativeai
anthropic>=0.40.0 # Phase 7 Frontier 升級Claude Opus 4.7 Code Reviewfeature flag CODE_REVIEW_USE_CLAUDE 預設 OFF
feedparser
beautifulsoup4
lxml

View File

@@ -51,9 +51,10 @@ COST_TABLE: Dict[str, Dict[str, float]] = {
'meta/llama-3.3-70b-instruct': {'in': 0.0, 'out': 0.0},
'nvidia/llama-3.3-nemotron-super-49b-v1.5': {'in': 0.0, 'out': 0.0},
'deepseek-ai/deepseek-v3.2': {'in': 0.0, 'out': 0.0},
# Claude
'claude-opus-4-7': {'in': 15.0, 'out': 75.0},
'claude-sonnet-4-6': {'in': 3.0, 'out': 15.0},
# Claude (Anthropic) — 2026-05 市場價USD per 1M tokens
'claude-opus-4-7': {'in': 15.0, 'out': 75.0}, # 程式碼 #1Arena Elo 1548
'claude-sonnet-4-6': {'in': 3.0, 'out': 15.0}, # agentic 平衡
'claude-haiku-4-5': {'in': 0.8, 'out': 4.0}, # 輕量快速
# Ollama 自架 (全 0)
'hermes3:latest': {'in': 0.0, 'out': 0.0},
'qwen2.5-coder:7b': {'in': 0.0, 'out': 0.0},

View File

@@ -0,0 +1,228 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
services/anthropic_service.py
Operation Ollama-First v5.0 / Phase 7 — Anthropic Claude API 包裝
模型對照2026-05 市場):
- claude-opus-4-7: 程式碼 #1Arena Elo 1548200K context$15/$75 per 1M tokens
- claude-sonnet-4-6: agentic 平衡型200K context$3/$15 per 1M tokens
- claude-haiku-4-5: 輕量快速200K context$0.8/$4 per 1M tokens
設計原則:
1. SDK 包裝層;介面與 services/gemini_service.py 對齊generate / check_connection
2. 自動 prompt cache5 分鐘 ephemeral TTL重複 system_prompt 省 ~90% 成本)
3. usage 完整回傳input_tokens / output_tokens / cache_creation_input_tokens / cache_read_input_tokens
4. 主流程不爆:失敗回 ClaudeResponse(success=False),由呼叫端決定 fallback 路徑
5. feature flag 控制由呼叫端負責(如 code_review 的 CODE_REVIEW_USE_CLAUDE本模組純 SDK 包裝
"""
from __future__ import annotations
import logging
import os
import time
from dataclasses import dataclass
from typing import Optional
logger = logging.getLogger(__name__)
# 環境參數 — runtime read避免 import-time freeze 影響部署切換)
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY', '')
DEFAULT_MODEL = os.getenv('CLAUDE_MODEL', 'claude-opus-4-7')
TIMEOUT = int(os.getenv('CLAUDE_TIMEOUT', '120'))
@dataclass
class ClaudeResponse:
"""Claude API 回應結構(與 GeminiResponse 對齊風格)"""
success: bool
content: str
model: str
input_tokens: int = 0
output_tokens: int = 0
cache_creation_tokens: int = 0
cache_read_tokens: int = 0
duration_ms: int = 0
error: Optional[str] = None
@property
def cache_hit(self) -> bool:
"""是否命中 prompt cachecache_read_input_tokens > 0 即視為 hit"""
return self.cache_read_tokens > 0
@property
def total_tokens(self) -> int:
return self.input_tokens + self.output_tokens
class AnthropicService:
"""Claude API 包裝 — 支援 prompt cache + 結構化 usage 回傳"""
def __init__(self, model: str = None, api_key: str = None):
"""
Args:
model: 預設模型,預設讀 CLAUDE_MODEL envclaude-opus-4-7
api_key: API key預設讀 ANTHROPIC_API_KEY env
"""
self.model = model or DEFAULT_MODEL
self.api_key = api_key or ANTHROPIC_API_KEY
self._client = None
self._init_client()
def _init_client(self) -> None:
"""初始化 SDK無 API key 或 SDK 未安裝時靜默退化is_available()→False"""
if not self.api_key:
logger.info("[Anthropic] ANTHROPIC_API_KEY 未設定service 不可用")
return
try:
import anthropic
self._client = anthropic.Anthropic(api_key=self.api_key)
logger.info("[Anthropic] SDK 初始化成功,預設模型=%s", self.model)
except ImportError:
logger.error("[Anthropic] SDK 未安裝pip install anthropic>=0.40.0")
except Exception as e:
logger.error("[Anthropic] SDK 初始化失敗: %s", e)
def is_available(self) -> bool:
"""SDK 是否就緒可呼叫API key 有設且 client 初始化成功)"""
return self._client is not None
def generate(
self,
prompt: str,
system_prompt: Optional[str] = None,
model: Optional[str] = None,
max_tokens: int = 4096,
temperature: float = 0.3,
cache_system: bool = True,
timeout: Optional[int] = None,
) -> ClaudeResponse:
"""
呼叫 Claude API 生成內容。
Args:
prompt: user prompt
system_prompt: system instruction建議放穩定不變的指令搭配 cache_system=True 省成本)
model: 模型名(預設用 self.model
max_tokens: 輸出上限Claude 4 系列皆 ≥ 8192 OK
temperature: 0.0-1.0code review 建議 0.2,文案 0.7
cache_system: True 時 system_prompt 加 ephemeral cache_control5 分鐘 TTL
timeout: 自訂 timeout預設 CLAUDE_TIMEOUT env
Returns:
ClaudeResponse永遠回 dataclass失敗時 success=False + error 訊息(不 raise
"""
model_name = model or self.model
request_timeout = timeout if timeout is not None else TIMEOUT
start = time.monotonic()
if not self._client:
return ClaudeResponse(
success=False, content="", model=model_name,
error="anthropic client not initialized (check ANTHROPIC_API_KEY)",
)
try:
messages = [{"role": "user", "content": prompt}]
kwargs = {
"model": model_name,
"max_tokens": max_tokens,
"temperature": temperature,
"messages": messages,
"timeout": request_timeout,
}
if system_prompt:
if cache_system:
# Anthropic ephemeral cache5 分鐘 TTL重複 system_prompt 省 90% 成本
kwargs["system"] = [
{
"type": "text",
"text": system_prompt,
"cache_control": {"type": "ephemeral"},
}
]
else:
kwargs["system"] = system_prompt
resp = self._client.messages.create(**kwargs)
# 解析回應內容(多個 ContentBlock 串接)
content_parts = []
for block in (resp.content or []):
text = getattr(block, 'text', None)
if text:
content_parts.append(text)
content = "\n".join(content_parts)
usage = getattr(resp, 'usage', None)
input_tokens = int(getattr(usage, 'input_tokens', 0) or 0) if usage else 0
output_tokens = int(getattr(usage, 'output_tokens', 0) or 0) if usage else 0
cache_creation = int(getattr(usage, 'cache_creation_input_tokens', 0) or 0) if usage else 0
cache_read = int(getattr(usage, 'cache_read_input_tokens', 0) or 0) if usage else 0
duration_ms = int((time.monotonic() - start) * 1000)
logger.info(
"[Anthropic] generate ok model=%s tokens=%d/%d cache=%d/%d duration=%dms",
model_name, input_tokens, output_tokens, cache_creation, cache_read, duration_ms,
)
return ClaudeResponse(
success=True,
content=content,
model=getattr(resp, 'model', model_name) or model_name,
input_tokens=input_tokens,
output_tokens=output_tokens,
cache_creation_tokens=cache_creation,
cache_read_tokens=cache_read,
duration_ms=duration_ms,
)
except Exception as e:
duration_ms = int((time.monotonic() - start) * 1000)
err_msg = f"{type(e).__name__}: {str(e)[:300]}"
logger.warning("[Anthropic] generate failed model=%s duration=%dms err=%s",
model_name, duration_ms, err_msg)
return ClaudeResponse(
success=False, content="", model=model_name,
duration_ms=duration_ms, error=err_msg,
)
def check_connection(self) -> bool:
"""輕量連線檢查:發 1 token 探測 message。不可用時回 False不 raise。"""
if not self._client:
return False
try:
r = self.generate(
prompt="ping",
max_tokens=10,
temperature=0.0,
cache_system=False,
timeout=15,
)
return r.success
except Exception as e:
logger.warning("[Anthropic] check_connection failed: %s", e)
return False
# 全域單例(與 gemini_service 模式對齊)
anthropic_service = AnthropicService()
if __name__ == "__main__":
# 手動煙霧測試(需設 ANTHROPIC_API_KEY
logging.basicConfig(level=logging.INFO)
svc = AnthropicService()
print(f"is_available: {svc.is_available()}")
if svc.is_available():
r = svc.generate(
prompt="用一句話介紹 Python。",
system_prompt="你是繁體中文助手。",
max_tokens=100,
)
print(f"success={r.success} tokens={r.input_tokens}/{r.output_tokens} "
f"cache={r.cache_creation_tokens}/{r.cache_read_tokens} duration={r.duration_ms}ms")
print(r.content if r.success else r.error)

View File

@@ -44,13 +44,17 @@ _pipeline_lock = threading.Lock()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
# LOCKED-GEMINI: Code Review 全 repo diff 可達 100K+ tokens超過 Ollama 32K context
# 未來可升 Claude Opus 4.7 (200K context, Arena code Elo 1548) — Phase 7 任務
# ADR-028 鎖定場景 #5
# Phase 7 升級CODE_REVIEW_USE_CLAUDE=true 時改走 Claude Opus 4.7200K context, Arena code Elo 1548
# 預設 OFF行為與 Phase 6 完全相同;ADR-028 鎖定場景 #5
REVIEW_MODEL = os.getenv("OPENCLAW_MODEL", "gemini-2.5-flash")
INTERNAL_TOKEN = os.getenv("INTERNAL_WEBHOOK_TOKEN", "")
AUTO_FIX_ENABLED = os.getenv("CODE_REVIEW_AUTO_FIX_ENABLED", "true").lower() == "true"
ALLOW_INSECURE_WEBHOOK = os.getenv("MOMO_ALLOW_INSECURE_INTERNAL_WEBHOOK_FOR_DEV", "").lower() == "true"
# Phase 7 Frontier 升級 feature flag — 預設 OFF啟用後 _openclaw_assess 改走 Claude Opus 4.7
CODE_REVIEW_USE_CLAUDE = os.getenv("CODE_REVIEW_USE_CLAUDE", "false").lower() == "true"
CLAUDE_REVIEW_MODEL = os.getenv("CLAUDE_MODEL", "claude-opus-4-7")
# ═══════════════════════════════════════════════════════════════════════════════
# Pipeline Class
@@ -276,8 +280,11 @@ class CodeReviewPipeline:
def _openclaw_assess(self, files: Dict[str, str], findings: List[Dict]) -> str:
"""
優先用 GeminiGEMINI_API_KEY降級用 ElephantAlpha via OpenRouter
(容器內 OPENROUTER_API_KEY 一定存在)
路由優先序:
L1 (Phase 7, flag CODE_REVIEW_USE_CLAUDE=true) → Claude Opus 4.7 (Arena code #1)
L2 (預設) → GeminiGEMINI_API_KEY
L3 (降級) → ElephantAlpha via OpenRouter
feature flag 預設 OFF行為與 Phase 6 完全相同。
"""
sev = self.state["severity_summary"]
findings_json = json.dumps(findings[:8], ensure_ascii=False, indent=2)
@@ -301,7 +308,54 @@ class CodeReviewPipeline:
<b>💡 架構優化方向</b>1條長期建議
<b>✅ 本次部署亮點</b>"""
# 優先 Gemini — Phase 1 v5.0 logger 追蹤
# ── L1Phase 7 Frontier — Claude Opus 4.7(程式碼能力 #1────────────
# feature flag 預設 OFFON 時優先走,失敗 fallback 到 L2 Gemini
if CODE_REVIEW_USE_CLAUDE:
try:
from services.anthropic_service import anthropic_service
except Exception as e:
logger.warning("[CodeReview] Claude service import 失敗,退回 Gemini: %s", e)
anthropic_service = None # type: ignore
if anthropic_service is not None and anthropic_service.is_available():
with log_ai_call(
caller='code_review_openclaw',
provider='claude',
model=CLAUDE_REVIEW_MODEL,
request_id=f"cr-{self.commit_sha[:8]}",
meta={
'commit': self.commit_sha[:8],
'branch': self.branch,
'flag': 'CODE_REVIEW_USE_CLAUDE',
},
) as _ctx:
resp = anthropic_service.generate(
prompt=user_prompt,
system_prompt=system, # ephemeral cache5 分鐘 TTL省 ~90% 成本)
model=CLAUDE_REVIEW_MODEL,
max_tokens=2048,
temperature=0.2, # code review 要精確
cache_system=True,
timeout=120,
)
if resp.success:
_ctx.set_tokens(input=resp.input_tokens, output=resp.output_tokens)
_ctx.set_cache_hit(resp.cache_hit)
_ctx.add_meta('cache_creation_tokens', resp.cache_creation_tokens)
_ctx.add_meta('cache_read_tokens', resp.cache_read_tokens)
return resp.content or ""
# Claude 失敗 → fallback 到 GeminiL2
_ctx.set_error(resp.error or 'claude generate failed')
_ctx.fallback_to_caller('code_review_openclaw_gemini')
logger.warning(
"[CodeReview] Claude 失敗,降級 Gemini: %s", resp.error,
)
else:
logger.info(
"[CodeReview] CODE_REVIEW_USE_CLAUDE=true 但 Claude 不可用(缺 API key 或 SDK退回 Gemini",
)
# ── L2Gemini — Phase 1 v5.0 logger 追蹤 ────────────────────────────
if GEMINI_API_KEY:
with log_ai_call(
caller='code_review_openclaw',

View File

@@ -0,0 +1,309 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
tests/test_anthropic_service.py
services/anthropic_service.py 單元測試 — Operation Ollama-First v5.0 Phase 7
測試紀律Phase 7 spec
- generate 正常路徑cache_creation + cache_read 解析正確
- generate prompt cachecache_system=True 時 system 加 cache_control
- ANTHROPIC_API_KEY 未設定時 is_available() == False
- SDK ImportError 時不爆log.erroris_available()=False
- SDK 例外時 generate 回 success=False 不 raise
- cache_hit property 邏輯cache_read_tokens > 0 → True
- check_connection 正常與失敗
"""
from __future__ import annotations
import os
import sys
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# ─────────────────────────────────────────────────────────────────────────────
# Helpers仿造 anthropic SDK Response 結構
# ─────────────────────────────────────────────────────────────────────────────
def _fake_anthropic_response(
text: str = "ok",
input_tokens: int = 100,
output_tokens: int = 50,
cache_creation_input_tokens: int = 0,
cache_read_input_tokens: int = 0,
model: str = "claude-opus-4-7",
):
"""模擬 anthropic.types.Message"""
block = SimpleNamespace(text=text, type="text")
usage = SimpleNamespace(
input_tokens=input_tokens,
output_tokens=output_tokens,
cache_creation_input_tokens=cache_creation_input_tokens,
cache_read_input_tokens=cache_read_input_tokens,
)
return SimpleNamespace(content=[block], usage=usage, model=model)
@pytest.fixture
def mock_sdk(monkeypatch):
"""模擬 anthropic SDK注入 fake module 到 sys.modules
讓 AnthropicService._init_client 走 import anthropic 成功路徑。"""
fake_anthropic = MagicMock()
fake_client = MagicMock()
fake_anthropic.Anthropic.return_value = fake_client
monkeypatch.setitem(sys.modules, 'anthropic', fake_anthropic)
return fake_anthropic, fake_client
# ─────────────────────────────────────────────────────────────────────────────
# is_available() 測試
# ─────────────────────────────────────────────────────────────────────────────
def test_is_available_false_when_no_api_key(monkeypatch):
"""ANTHROPIC_API_KEY 未設 → is_available() False"""
monkeypatch.delenv('ANTHROPIC_API_KEY', raising=False)
# reload 確保模組層 ANTHROPIC_API_KEY 重新讀取
import importlib
import services.anthropic_service as svc_mod
importlib.reload(svc_mod)
svc = svc_mod.AnthropicService(api_key='')
assert svc.is_available() is False
def test_is_available_true_when_sdk_ready(mock_sdk):
"""API key + SDK 都有 → is_available() True"""
from services.anthropic_service import AnthropicService
svc = AnthropicService(api_key='sk-ant-test')
assert svc.is_available() is True
def test_is_available_false_on_import_error(monkeypatch):
"""SDK 未安裝ImportError→ is_available() False不 raise"""
# 移除 anthropic 模組讓 import 失敗
monkeypatch.setitem(sys.modules, 'anthropic', None)
from services.anthropic_service import AnthropicService
svc = AnthropicService(api_key='sk-ant-test')
assert svc.is_available() is False
def test_is_available_false_on_init_exception(monkeypatch):
"""SDK 初始化拋例外 → is_available() False不 raise"""
fake_anthropic = MagicMock()
fake_anthropic.Anthropic.side_effect = RuntimeError("auth failed")
monkeypatch.setitem(sys.modules, 'anthropic', fake_anthropic)
from services.anthropic_service import AnthropicService
svc = AnthropicService(api_key='sk-ant-test')
assert svc.is_available() is False
# ─────────────────────────────────────────────────────────────────────────────
# generate() 正常路徑
# ─────────────────────────────────────────────────────────────────────────────
def test_generate_success_basic(mock_sdk):
"""generate 正常路徑tokens 正確解析"""
fake_anthropic, fake_client = mock_sdk
fake_client.messages.create.return_value = _fake_anthropic_response(
text="hello world",
input_tokens=120,
output_tokens=40,
)
from services.anthropic_service import AnthropicService
svc = AnthropicService(api_key='sk-ant-test')
resp = svc.generate(prompt="say hi", max_tokens=100)
assert resp.success is True
assert resp.content == "hello world"
assert resp.input_tokens == 120
assert resp.output_tokens == 40
assert resp.cache_creation_tokens == 0
assert resp.cache_read_tokens == 0
assert resp.cache_hit is False
assert resp.duration_ms >= 0
assert resp.error is None
def test_generate_with_cache_creation_and_read(mock_sdk):
"""generate 解析 cache_creation_input_tokens / cache_read_input_tokens"""
fake_anthropic, fake_client = mock_sdk
fake_client.messages.create.return_value = _fake_anthropic_response(
cache_creation_input_tokens=500,
cache_read_input_tokens=2000,
)
from services.anthropic_service import AnthropicService
svc = AnthropicService(api_key='sk-ant-test')
resp = svc.generate(prompt="reuse", system_prompt="stable system")
assert resp.success is True
assert resp.cache_creation_tokens == 500
assert resp.cache_read_tokens == 2000
assert resp.cache_hit is True
def test_generate_with_cache_system_adds_cache_control(mock_sdk):
"""cache_system=True 時 system_prompt 帶 ephemeral cache_control"""
fake_anthropic, fake_client = mock_sdk
fake_client.messages.create.return_value = _fake_anthropic_response()
from services.anthropic_service import AnthropicService
svc = AnthropicService(api_key='sk-ant-test')
svc.generate(
prompt="user",
system_prompt="my-system",
cache_system=True,
)
_, kwargs = fake_client.messages.create.call_args
assert isinstance(kwargs['system'], list)
assert kwargs['system'][0]['type'] == 'text'
assert kwargs['system'][0]['text'] == 'my-system'
assert kwargs['system'][0]['cache_control'] == {"type": "ephemeral"}
def test_generate_without_cache_system_uses_string(mock_sdk):
"""cache_system=False 時 system 為純字串"""
fake_anthropic, fake_client = mock_sdk
fake_client.messages.create.return_value = _fake_anthropic_response()
from services.anthropic_service import AnthropicService
svc = AnthropicService(api_key='sk-ant-test')
svc.generate(prompt="user", system_prompt="my-system", cache_system=False)
_, kwargs = fake_client.messages.create.call_args
assert kwargs['system'] == 'my-system'
def test_generate_without_system_prompt(mock_sdk):
"""無 system_prompt 時 kwargs 不含 system"""
fake_anthropic, fake_client = mock_sdk
fake_client.messages.create.return_value = _fake_anthropic_response()
from services.anthropic_service import AnthropicService
svc = AnthropicService(api_key='sk-ant-test')
svc.generate(prompt="user")
_, kwargs = fake_client.messages.create.call_args
assert 'system' not in kwargs
def test_generate_passes_temperature_and_max_tokens(mock_sdk):
"""temperature / max_tokens / model 正確傳給 SDK"""
fake_anthropic, fake_client = mock_sdk
fake_client.messages.create.return_value = _fake_anthropic_response()
from services.anthropic_service import AnthropicService
svc = AnthropicService(api_key='sk-ant-test')
svc.generate(
prompt="x", model="claude-sonnet-4-6",
max_tokens=2048, temperature=0.5,
)
_, kwargs = fake_client.messages.create.call_args
assert kwargs['model'] == 'claude-sonnet-4-6'
assert kwargs['max_tokens'] == 2048
assert kwargs['temperature'] == 0.5
assert kwargs['messages'] == [{"role": "user", "content": "x"}]
# ─────────────────────────────────────────────────────────────────────────────
# generate() 失敗路徑
# ─────────────────────────────────────────────────────────────────────────────
def test_generate_returns_failure_when_no_client(monkeypatch):
"""無 API key → generate 回 success=False 不 raise"""
monkeypatch.delenv('ANTHROPIC_API_KEY', raising=False)
from services.anthropic_service import AnthropicService
svc = AnthropicService(api_key='')
resp = svc.generate(prompt="x")
assert resp.success is False
assert resp.content == ""
assert "not initialized" in (resp.error or "")
def test_generate_handles_sdk_exception(mock_sdk):
"""SDK 拋例外 → generate 回 success=Falseerror 含 type+msg"""
fake_anthropic, fake_client = mock_sdk
fake_client.messages.create.side_effect = RuntimeError("rate limit")
from services.anthropic_service import AnthropicService
svc = AnthropicService(api_key='sk-ant-test')
resp = svc.generate(prompt="x")
assert resp.success is False
assert "RuntimeError" in (resp.error or "")
assert "rate limit" in (resp.error or "")
assert resp.duration_ms >= 0
# ─────────────────────────────────────────────────────────────────────────────
# ClaudeResponse cache_hit property
# ─────────────────────────────────────────────────────────────────────────────
def test_cache_hit_property():
from services.anthropic_service import ClaudeResponse
assert ClaudeResponse(success=True, content="x", model="m",
cache_read_tokens=0).cache_hit is False
assert ClaudeResponse(success=True, content="x", model="m",
cache_read_tokens=1).cache_hit is True
assert ClaudeResponse(success=True, content="x", model="m",
cache_read_tokens=10000).cache_hit is True
def test_total_tokens_property():
from services.anthropic_service import ClaudeResponse
r = ClaudeResponse(success=True, content="x", model="m",
input_tokens=100, output_tokens=50)
assert r.total_tokens == 150
# ─────────────────────────────────────────────────────────────────────────────
# check_connection
# ─────────────────────────────────────────────────────────────────────────────
def test_check_connection_success(mock_sdk):
fake_anthropic, fake_client = mock_sdk
fake_client.messages.create.return_value = _fake_anthropic_response(text="pong")
from services.anthropic_service import AnthropicService
svc = AnthropicService(api_key='sk-ant-test')
assert svc.check_connection() is True
def test_check_connection_fail_no_client(monkeypatch):
monkeypatch.delenv('ANTHROPIC_API_KEY', raising=False)
from services.anthropic_service import AnthropicService
svc = AnthropicService(api_key='')
assert svc.check_connection() is False
def test_check_connection_fail_on_sdk_error(mock_sdk):
fake_anthropic, fake_client = mock_sdk
fake_client.messages.create.side_effect = RuntimeError("boom")
from services.anthropic_service import AnthropicService
svc = AnthropicService(api_key='sk-ant-test')
assert svc.check_connection() is False
# ─────────────────────────────────────────────────────────────────────────────
# COST_TABLE 整合(確認 ai_call_logger 認得 claude 模型)
# ─────────────────────────────────────────────────────────────────────────────
def test_cost_table_has_claude_models():
from services.ai_call_logger import COST_TABLE, _calc_cost
assert 'claude-opus-4-7' in COST_TABLE
assert 'claude-sonnet-4-6' in COST_TABLE
assert 'claude-haiku-4-5' in COST_TABLE
# opus 1M in/1M out 應為 15 + 75 = 90 USD
assert abs(_calc_cost('claude-opus-4-7', 1_000_000, 1_000_000) - 90.0) < 1e-6
# haiku 1M in/1M out 應為 0.8 + 4.0 = 4.8 USD
assert abs(_calc_cost('claude-haiku-4-5', 1_000_000, 1_000_000) - 4.8) < 1e-6

View File

@@ -0,0 +1,244 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
tests/test_code_review_claude_routing.py
Phase 7 Frontier 升級CodeReviewPipeline._openclaw_assess Claude 路由測試
驗收項目:
- flag=false → Claude 路徑零觸發regression 防護)
- flag=true + Claude 可用 → 走 Claude回傳 Claude content
- flag=true + Claude 失敗 → fallback 進入 Gemini/Elephant 路徑
- flag=true + Claude 不可用is_available=False→ 跳過 Claude
"""
from __future__ import annotations
import importlib
import os
import sys
import types
from unittest.mock import MagicMock
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# ─────────────────────────────────────────────────────────────────────────────
# 共用工具
# ─────────────────────────────────────────────────────────────────────────────
def _reload_pipeline():
"""重新載入 pipeline 模組(讓 module-level CODE_REVIEW_USE_CLAUDE flag 即時生效)"""
import services.code_review_pipeline_service as svc_mod
importlib.reload(svc_mod)
return svc_mod
def _make_pipeline(svc_mod, commit="abc12345"):
return svc_mod.CodeReviewPipeline(
commit_sha=commit,
changed_files=["services/foo.py"],
branch="main",
deploy_type="sync",
)
def _stub_anthropic(monkeypatch, svc_mod, *, available: bool, success: bool = True,
content: str = "CLAUDE-RESULT", error: str = None):
"""注入假的 anthropic_service 全域單例到 services.anthropic_service。
目標:避免 monkeypatch sys.modules 失敗pipeline 內是 from services.anthropic_service import anthropic_service
"""
fake_resp = MagicMock()
fake_resp.success = success
fake_resp.content = content if success else ""
fake_resp.input_tokens = 200
fake_resp.output_tokens = 100
fake_resp.cache_creation_tokens = 50
fake_resp.cache_read_tokens = 150
fake_resp.cache_hit = True
fake_resp.error = error
fake_svc = MagicMock()
fake_svc.is_available.return_value = available
fake_svc.generate.return_value = fake_resp
# 動態造一個假 module 並寫入 sys.modules覆蓋既有 import 結果)
fake_module = types.ModuleType('services.anthropic_service')
fake_module.anthropic_service = fake_svc
monkeypatch.setitem(sys.modules, 'services.anthropic_service', fake_module)
return fake_svc
def _stub_logger(monkeypatch):
"""避免 log_ai_call 真寫 DB"""
import services.ai_call_logger as logger_mod
monkeypatch.setattr(logger_mod, '_write_to_db', lambda state: None)
monkeypatch.setenv('AI_CALL_LOGGING_ENABLED', 'false')
def _stub_gemini_and_elephant(monkeypatch, *,
gemini_text: str = "GEMINI-RESULT",
elephant_text: str = "ELEPHANT-RESULT",
gemini_works: bool = True):
"""攔截 _openclaw_assess 內的 import google.generativeai / elephant_service。
pipeline 內是 lazy import所以注入到 sys.modules 即可生效。
"""
# 1) Fake google.generativeai
fake_genai = types.ModuleType('google.generativeai')
fake_types = types.SimpleNamespace(GenerationConfig=lambda **kw: MagicMock())
fake_genai.types = fake_types
fake_genai.configure = lambda **kw: None
if gemini_works:
fake_resp = MagicMock()
fake_resp.text = gemini_text
fake_resp.usage_metadata = MagicMock(prompt_token_count=10, candidates_token_count=5)
fake_model = MagicMock()
fake_model.generate_content.return_value = fake_resp
fake_genai.GenerativeModel = MagicMock(return_value=fake_model)
else:
fake_genai.GenerativeModel = MagicMock(side_effect=RuntimeError("gemini broken"))
# google.generativeai 是子模組;注入它和父模組
fake_google = types.ModuleType('google')
fake_google.generativeai = fake_genai
monkeypatch.setitem(sys.modules, 'google', fake_google)
monkeypatch.setitem(sys.modules, 'google.generativeai', fake_genai)
# 2) Fake elephant_service
fake_eresp = MagicMock(success=True, content=elephant_text,
input_tokens=20, output_tokens=10, error=None)
fake_elephant = MagicMock()
fake_elephant.generate.return_value = fake_eresp
fake_eservice_mod = types.ModuleType('services.elephant_service')
fake_eservice_mod.elephant_service = fake_elephant
monkeypatch.setitem(sys.modules, 'services.elephant_service', fake_eservice_mod)
return fake_genai, fake_elephant
# ─────────────────────────────────────────────────────────────────────────────
# Tests
# ─────────────────────────────────────────────────────────────────────────────
def test_flag_false_uses_gemini(monkeypatch):
"""CODE_REVIEW_USE_CLAUDE=false預設→ Claude 路徑完全不觸發"""
monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'false')
monkeypatch.setenv('GEMINI_API_KEY', 'test-key')
_stub_logger(monkeypatch)
svc_mod = _reload_pipeline()
fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=True)
_stub_gemini_and_elephant(monkeypatch)
pipeline = _make_pipeline(svc_mod)
result = pipeline._openclaw_assess(
files={"services/foo.py": "def x(): pass"},
findings=[],
)
assert result == "GEMINI-RESULT"
fake_claude.generate.assert_not_called()
def test_flag_true_uses_claude(monkeypatch):
"""flag=true + Claude 可用 → 走 ClaudeGemini 不被呼叫"""
monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'true')
monkeypatch.setenv('GEMINI_API_KEY', 'test-key')
_stub_logger(monkeypatch)
svc_mod = _reload_pipeline()
fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=True,
success=True, content="CLAUDE-RESULT")
fake_genai, fake_elephant = _stub_gemini_and_elephant(monkeypatch)
pipeline = _make_pipeline(svc_mod)
result = pipeline._openclaw_assess(
files={"services/foo.py": "def x(): pass"},
findings=[{"severity": "HIGH", "file": "services/foo.py",
"description": "x", "type": "bug"}],
)
assert result == "CLAUDE-RESULT"
fake_claude.generate.assert_called_once()
# Gemini 不應被觸發Claude 成功時短路)
fake_genai.GenerativeModel.assert_not_called()
fake_elephant.generate.assert_not_called()
# 確認 Claude 呼叫參數正確
call_kwargs = fake_claude.generate.call_args.kwargs
assert call_kwargs['cache_system'] is True # 啟用 ephemeral cache
assert call_kwargs['temperature'] == 0.2 # code review 精確設定
assert call_kwargs['model'] == 'claude-opus-4-7' # 預設 Opus 4.7
assert call_kwargs['system_prompt'] # system prompt 有傳
assert 'commit ' + 'abc12345'[:8] in call_kwargs['prompt'].lower() or \
'abc12345' in call_kwargs['prompt']
def test_flag_true_claude_fails_falls_back_to_gemini(monkeypatch):
"""Claude 回 success=False → fallback Gemini 接手"""
monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'true')
monkeypatch.setenv('GEMINI_API_KEY', 'test-key')
_stub_logger(monkeypatch)
svc_mod = _reload_pipeline()
fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=True,
success=False, error="RateLimitError: too many")
fake_genai, fake_elephant = _stub_gemini_and_elephant(monkeypatch)
pipeline = _make_pipeline(svc_mod)
result = pipeline._openclaw_assess(
files={"services/foo.py": "def x(): pass"},
findings=[],
)
assert result == "GEMINI-RESULT"
fake_claude.generate.assert_called_once()
fake_genai.GenerativeModel.assert_called_once()
def test_flag_true_claude_unavailable_uses_gemini(monkeypatch):
"""flag=true 但 is_available()=False → 跳 L1 直走 L2"""
monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'true')
monkeypatch.setenv('GEMINI_API_KEY', 'test-key')
_stub_logger(monkeypatch)
svc_mod = _reload_pipeline()
fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=False)
fake_genai, fake_elephant = _stub_gemini_and_elephant(monkeypatch)
pipeline = _make_pipeline(svc_mod)
result = pipeline._openclaw_assess(
files={"services/foo.py": "def x(): pass"},
findings=[],
)
assert result == "GEMINI-RESULT"
fake_claude.generate.assert_not_called()
fake_genai.GenerativeModel.assert_called_once()
def test_flag_true_full_fallback_chain(monkeypatch):
"""Claude 失敗 + Gemini 也失敗 → 最終 Elephant 接手"""
monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'true')
monkeypatch.setenv('GEMINI_API_KEY', 'test-key')
_stub_logger(monkeypatch)
svc_mod = _reload_pipeline()
fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=True,
success=False, error="claude down")
fake_genai, fake_elephant = _stub_gemini_and_elephant(
monkeypatch, gemini_works=False,
)
pipeline = _make_pipeline(svc_mod)
result = pipeline._openclaw_assess(
files={"services/foo.py": "def x(): pass"},
findings=[],
)
assert result == "ELEPHANT-RESULT"
fake_claude.generate.assert_called_once()
fake_elephant.generate.assert_called_once()