From 943de8466c6318170783e2ca4e00f7715549b0b0 Mon Sep 17 00:00:00 2001 From: OoO Date: Sun, 3 May 2026 23:31:30 +0800 Subject: [PATCH] =?UTF-8?q?feat(p7):=20Anthropic=20SDK=20+=20Claude=20Opus?= =?UTF-8?q?=204.7=20=E6=8E=A5=20Code=20Review=20(feature=20flag=20OFF)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operation Ollama-First v5.0 / Phase 7 Frontier 升級 services/anthropic_service.py (新檔, 226 行) - AnthropicService 包裝 + ClaudeResponse dataclass - Ephemeral prompt cache 5 分鐘 TTL(重複 system_prompt 省 90% 成本) - usage 解析 input/output/cache_creation/cache_read 四欄位 - ANTHROPIC_API_KEY 未設或 SDK 缺失時 is_available()=False 靜默退化 code_review_pipeline_service.py — _openclaw_assess 加 L1 Claude 分支 - CODE_REVIEW_USE_CLAUDE flag (預設 OFF,等 ANTHROPIC_API_KEY 設定後翻 ON) - 路由:Claude Opus 4.7 (Arena code Elo 1548) → Gemini → ElephantAlpha 三層 - request_id 串鏈不變 ai_call_logger.py COST_TABLE 補 3 個 Claude 模型: - claude-opus-4-7: $15/$75 per M tokens (程式碼 #1) - claude-sonnet-4-6: $3/$15 per M tokens (agentic 平衡) - claude-haiku-4-5: $0.8/$4 per M tokens (輕量快速) requirements.txt: 加 anthropic>=0.40.0 .env.example: 加 ANTHROPIC_API_KEY / CODE_REVIEW_USE_CLAUDE / CLAUDE_MODEL 52 unit tests 全綠(22 logger + 18 anthropic + 5 routing + 7 security) 啟用步驟(待統帥手動): 1. .env 加 ANTHROPIC_API_KEY=sk-ant-... 2. CODE_REVIEW_USE_CLAUDE=true + restart momo-app 3. 觀察 ai_calls.cache_read_tokens > 0 確認 cache 生效 Co-Authored-By: Claude Opus 4.7 (1M context) --- .env.example | 10 + requirements.txt | 1 + services/ai_call_logger.py | 7 +- services/anthropic_service.py | 228 +++++++++++++++++ services/code_review_pipeline_service.py | 64 ++++- tests/test_anthropic_service.py | 309 +++++++++++++++++++++++ tests/test_code_review_claude_routing.py | 244 ++++++++++++++++++ 7 files changed, 855 insertions(+), 8 deletions(-) create mode 100644 services/anthropic_service.py create mode 100644 tests/test_anthropic_service.py create mode 100644 tests/test_code_review_claude_routing.py diff --git a/.env.example b/.env.example index cae6476..a9bec7e 100644 --- a/.env.example +++ b/.env.example @@ -161,6 +161,16 @@ GEMINI_API_KEY= GEMINI_MODEL=gemini-1.5-flash OPENCLAW_MODEL=gemini-2.5-flash-preview-05-20 +# ── Anthropic Claude API(Phase 7 Frontier 升級)─────────────────────────── +# 用途:Code Review 升級到 Claude Opus 4.7(程式碼能力 #1,Arena Elo 1548) +# 取得方式:https://console.anthropic.com/settings/keys +# feature flag CODE_REVIEW_USE_CLAUDE 預設 false → 行為與 Phase 6 完全相同 +# 啟用步驟:(1) 設 ANTHROPIC_API_KEY (2) CODE_REVIEW_USE_CLAUDE=true +ANTHROPIC_API_KEY= +CODE_REVIEW_USE_CLAUDE=false +CLAUDE_MODEL=claude-opus-4-7 +CLAUDE_TIMEOUT=120 + # Debug and Monitoring ELEPHANT_ALPHA_DEBUG_MODE=false ELEPHANT_ALPHA_METRICS_ENABLED=true diff --git a/requirements.txt b/requirements.txt index 25276ad..f95a9a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,6 +17,7 @@ google-auth-oauthlib google-auth-httplib2 google-api-python-client google-generativeai +anthropic>=0.40.0 # Phase 7 Frontier 升級:Claude Opus 4.7 Code Review(feature flag CODE_REVIEW_USE_CLAUDE 預設 OFF) feedparser beautifulsoup4 lxml diff --git a/services/ai_call_logger.py b/services/ai_call_logger.py index ad5676e..0f862a9 100644 --- a/services/ai_call_logger.py +++ b/services/ai_call_logger.py @@ -51,9 +51,10 @@ COST_TABLE: Dict[str, Dict[str, float]] = { 'meta/llama-3.3-70b-instruct': {'in': 0.0, 'out': 0.0}, 'nvidia/llama-3.3-nemotron-super-49b-v1.5': {'in': 0.0, 'out': 0.0}, 'deepseek-ai/deepseek-v3.2': {'in': 0.0, 'out': 0.0}, - # Claude - 'claude-opus-4-7': {'in': 15.0, 'out': 75.0}, - 'claude-sonnet-4-6': {'in': 3.0, 'out': 15.0}, + # Claude (Anthropic) — 2026-05 市場價(USD per 1M tokens) + 'claude-opus-4-7': {'in': 15.0, 'out': 75.0}, # 程式碼 #1,Arena Elo 1548 + 'claude-sonnet-4-6': {'in': 3.0, 'out': 15.0}, # agentic 平衡 + 'claude-haiku-4-5': {'in': 0.8, 'out': 4.0}, # 輕量快速 # Ollama 自架 (全 0) 'hermes3:latest': {'in': 0.0, 'out': 0.0}, 'qwen2.5-coder:7b': {'in': 0.0, 'out': 0.0}, diff --git a/services/anthropic_service.py b/services/anthropic_service.py new file mode 100644 index 0000000..94efafa --- /dev/null +++ b/services/anthropic_service.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +services/anthropic_service.py +Operation Ollama-First v5.0 / Phase 7 — Anthropic Claude API 包裝 + +模型對照(2026-05 市場): +- claude-opus-4-7: 程式碼 #1(Arena Elo 1548),200K context,$15/$75 per 1M tokens +- claude-sonnet-4-6: agentic 平衡型,200K context,$3/$15 per 1M tokens +- claude-haiku-4-5: 輕量快速,200K context,$0.8/$4 per 1M tokens + +設計原則: +1. SDK 包裝層;介面與 services/gemini_service.py 對齊(generate / check_connection) +2. 自動 prompt cache(5 分鐘 ephemeral TTL,重複 system_prompt 省 ~90% 成本) +3. usage 完整回傳:input_tokens / output_tokens / cache_creation_input_tokens / cache_read_input_tokens +4. 主流程不爆:失敗回 ClaudeResponse(success=False),由呼叫端決定 fallback 路徑 +5. feature flag 控制由呼叫端負責(如 code_review 的 CODE_REVIEW_USE_CLAUDE);本模組純 SDK 包裝 +""" + +from __future__ import annotations + +import logging +import os +import time +from dataclasses import dataclass +from typing import Optional + +logger = logging.getLogger(__name__) + +# 環境參數 — runtime read(避免 import-time freeze 影響部署切換) +ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY', '') +DEFAULT_MODEL = os.getenv('CLAUDE_MODEL', 'claude-opus-4-7') +TIMEOUT = int(os.getenv('CLAUDE_TIMEOUT', '120')) + + +@dataclass +class ClaudeResponse: + """Claude API 回應結構(與 GeminiResponse 對齊風格)""" + success: bool + content: str + model: str + input_tokens: int = 0 + output_tokens: int = 0 + cache_creation_tokens: int = 0 + cache_read_tokens: int = 0 + duration_ms: int = 0 + error: Optional[str] = None + + @property + def cache_hit(self) -> bool: + """是否命中 prompt cache(cache_read_input_tokens > 0 即視為 hit)""" + return self.cache_read_tokens > 0 + + @property + def total_tokens(self) -> int: + return self.input_tokens + self.output_tokens + + +class AnthropicService: + """Claude API 包裝 — 支援 prompt cache + 結構化 usage 回傳""" + + def __init__(self, model: str = None, api_key: str = None): + """ + Args: + model: 預設模型,預設讀 CLAUDE_MODEL env(claude-opus-4-7) + api_key: API key,預設讀 ANTHROPIC_API_KEY env + """ + self.model = model or DEFAULT_MODEL + self.api_key = api_key or ANTHROPIC_API_KEY + self._client = None + self._init_client() + + def _init_client(self) -> None: + """初始化 SDK;無 API key 或 SDK 未安裝時靜默退化(is_available()→False)""" + if not self.api_key: + logger.info("[Anthropic] ANTHROPIC_API_KEY 未設定,service 不可用") + return + try: + import anthropic + self._client = anthropic.Anthropic(api_key=self.api_key) + logger.info("[Anthropic] SDK 初始化成功,預設模型=%s", self.model) + except ImportError: + logger.error("[Anthropic] SDK 未安裝(pip install anthropic>=0.40.0)") + except Exception as e: + logger.error("[Anthropic] SDK 初始化失敗: %s", e) + + def is_available(self) -> bool: + """SDK 是否就緒可呼叫(API key 有設且 client 初始化成功)""" + return self._client is not None + + def generate( + self, + prompt: str, + system_prompt: Optional[str] = None, + model: Optional[str] = None, + max_tokens: int = 4096, + temperature: float = 0.3, + cache_system: bool = True, + timeout: Optional[int] = None, + ) -> ClaudeResponse: + """ + 呼叫 Claude API 生成內容。 + + Args: + prompt: user prompt + system_prompt: system instruction(建議放穩定不變的指令,搭配 cache_system=True 省成本) + model: 模型名(預設用 self.model) + max_tokens: 輸出上限(Claude 4 系列皆 ≥ 8192 OK) + temperature: 0.0-1.0(code review 建議 0.2,文案 0.7) + cache_system: True 時 system_prompt 加 ephemeral cache_control(5 分鐘 TTL) + timeout: 自訂 timeout(秒),預設 CLAUDE_TIMEOUT env + + Returns: + ClaudeResponse:永遠回 dataclass,失敗時 success=False + error 訊息(不 raise) + """ + model_name = model or self.model + request_timeout = timeout if timeout is not None else TIMEOUT + start = time.monotonic() + + if not self._client: + return ClaudeResponse( + success=False, content="", model=model_name, + error="anthropic client not initialized (check ANTHROPIC_API_KEY)", + ) + + try: + messages = [{"role": "user", "content": prompt}] + + kwargs = { + "model": model_name, + "max_tokens": max_tokens, + "temperature": temperature, + "messages": messages, + "timeout": request_timeout, + } + + if system_prompt: + if cache_system: + # Anthropic ephemeral cache:5 分鐘 TTL,重複 system_prompt 省 90% 成本 + kwargs["system"] = [ + { + "type": "text", + "text": system_prompt, + "cache_control": {"type": "ephemeral"}, + } + ] + else: + kwargs["system"] = system_prompt + + resp = self._client.messages.create(**kwargs) + + # 解析回應內容(多個 ContentBlock 串接) + content_parts = [] + for block in (resp.content or []): + text = getattr(block, 'text', None) + if text: + content_parts.append(text) + content = "\n".join(content_parts) + + usage = getattr(resp, 'usage', None) + input_tokens = int(getattr(usage, 'input_tokens', 0) or 0) if usage else 0 + output_tokens = int(getattr(usage, 'output_tokens', 0) or 0) if usage else 0 + cache_creation = int(getattr(usage, 'cache_creation_input_tokens', 0) or 0) if usage else 0 + cache_read = int(getattr(usage, 'cache_read_input_tokens', 0) or 0) if usage else 0 + + duration_ms = int((time.monotonic() - start) * 1000) + logger.info( + "[Anthropic] generate ok model=%s tokens=%d/%d cache=%d/%d duration=%dms", + model_name, input_tokens, output_tokens, cache_creation, cache_read, duration_ms, + ) + + return ClaudeResponse( + success=True, + content=content, + model=getattr(resp, 'model', model_name) or model_name, + input_tokens=input_tokens, + output_tokens=output_tokens, + cache_creation_tokens=cache_creation, + cache_read_tokens=cache_read, + duration_ms=duration_ms, + ) + + except Exception as e: + duration_ms = int((time.monotonic() - start) * 1000) + err_msg = f"{type(e).__name__}: {str(e)[:300]}" + logger.warning("[Anthropic] generate failed model=%s duration=%dms err=%s", + model_name, duration_ms, err_msg) + return ClaudeResponse( + success=False, content="", model=model_name, + duration_ms=duration_ms, error=err_msg, + ) + + def check_connection(self) -> bool: + """輕量連線檢查:發 1 token 探測 message。不可用時回 False,不 raise。""" + if not self._client: + return False + try: + r = self.generate( + prompt="ping", + max_tokens=10, + temperature=0.0, + cache_system=False, + timeout=15, + ) + return r.success + except Exception as e: + logger.warning("[Anthropic] check_connection failed: %s", e) + return False + + +# 全域單例(與 gemini_service 模式對齊) +anthropic_service = AnthropicService() + + +if __name__ == "__main__": + # 手動煙霧測試(需設 ANTHROPIC_API_KEY) + logging.basicConfig(level=logging.INFO) + svc = AnthropicService() + print(f"is_available: {svc.is_available()}") + if svc.is_available(): + r = svc.generate( + prompt="用一句話介紹 Python。", + system_prompt="你是繁體中文助手。", + max_tokens=100, + ) + print(f"success={r.success} tokens={r.input_tokens}/{r.output_tokens} " + f"cache={r.cache_creation_tokens}/{r.cache_read_tokens} duration={r.duration_ms}ms") + print(r.content if r.success else r.error) diff --git a/services/code_review_pipeline_service.py b/services/code_review_pipeline_service.py index a6e96be..3b57a28 100644 --- a/services/code_review_pipeline_service.py +++ b/services/code_review_pipeline_service.py @@ -44,13 +44,17 @@ _pipeline_lock = threading.Lock() GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "") # LOCKED-GEMINI: Code Review 全 repo diff 可達 100K+ tokens,超過 Ollama 32K context -# 未來可升 Claude Opus 4.7 (200K context, Arena code Elo 1548) — Phase 7 任務 -# ADR-028 鎖定場景 #5 +# Phase 7 升級:CODE_REVIEW_USE_CLAUDE=true 時改走 Claude Opus 4.7(200K context, Arena code Elo 1548) +# 預設 OFF,行為與 Phase 6 完全相同;ADR-028 鎖定場景 #5 REVIEW_MODEL = os.getenv("OPENCLAW_MODEL", "gemini-2.5-flash") INTERNAL_TOKEN = os.getenv("INTERNAL_WEBHOOK_TOKEN", "") AUTO_FIX_ENABLED = os.getenv("CODE_REVIEW_AUTO_FIX_ENABLED", "true").lower() == "true" ALLOW_INSECURE_WEBHOOK = os.getenv("MOMO_ALLOW_INSECURE_INTERNAL_WEBHOOK_FOR_DEV", "").lower() == "true" +# Phase 7 Frontier 升級 feature flag — 預設 OFF,啟用後 _openclaw_assess 改走 Claude Opus 4.7 +CODE_REVIEW_USE_CLAUDE = os.getenv("CODE_REVIEW_USE_CLAUDE", "false").lower() == "true" +CLAUDE_REVIEW_MODEL = os.getenv("CLAUDE_MODEL", "claude-opus-4-7") + # ═══════════════════════════════════════════════════════════════════════════════ # Pipeline Class @@ -276,8 +280,11 @@ class CodeReviewPipeline: def _openclaw_assess(self, files: Dict[str, str], findings: List[Dict]) -> str: """ - 優先用 Gemini(GEMINI_API_KEY),降級用 ElephantAlpha via OpenRouter - (容器內 OPENROUTER_API_KEY 一定存在) + 路由優先序: + L1 (Phase 7, flag CODE_REVIEW_USE_CLAUDE=true) → Claude Opus 4.7 (Arena code #1) + L2 (預設) → Gemini(GEMINI_API_KEY) + L3 (降級) → ElephantAlpha via OpenRouter + feature flag 預設 OFF,行為與 Phase 6 完全相同。 """ sev = self.state["severity_summary"] findings_json = json.dumps(findings[:8], ensure_ascii=False, indent=2) @@ -301,7 +308,54 @@ class CodeReviewPipeline: 💡 架構優化方向(1條長期建議) ✅ 本次部署亮點""" - # 優先 Gemini — Phase 1 v5.0 logger 追蹤 + # ── L1:Phase 7 Frontier — Claude Opus 4.7(程式碼能力 #1)──────────── + # feature flag 預設 OFF;ON 時優先走,失敗 fallback 到 L2 Gemini + if CODE_REVIEW_USE_CLAUDE: + try: + from services.anthropic_service import anthropic_service + except Exception as e: + logger.warning("[CodeReview] Claude service import 失敗,退回 Gemini: %s", e) + anthropic_service = None # type: ignore + + if anthropic_service is not None and anthropic_service.is_available(): + with log_ai_call( + caller='code_review_openclaw', + provider='claude', + model=CLAUDE_REVIEW_MODEL, + request_id=f"cr-{self.commit_sha[:8]}", + meta={ + 'commit': self.commit_sha[:8], + 'branch': self.branch, + 'flag': 'CODE_REVIEW_USE_CLAUDE', + }, + ) as _ctx: + resp = anthropic_service.generate( + prompt=user_prompt, + system_prompt=system, # ephemeral cache(5 分鐘 TTL,省 ~90% 成本) + model=CLAUDE_REVIEW_MODEL, + max_tokens=2048, + temperature=0.2, # code review 要精確 + cache_system=True, + timeout=120, + ) + if resp.success: + _ctx.set_tokens(input=resp.input_tokens, output=resp.output_tokens) + _ctx.set_cache_hit(resp.cache_hit) + _ctx.add_meta('cache_creation_tokens', resp.cache_creation_tokens) + _ctx.add_meta('cache_read_tokens', resp.cache_read_tokens) + return resp.content or "" + # Claude 失敗 → fallback 到 Gemini(L2) + _ctx.set_error(resp.error or 'claude generate failed') + _ctx.fallback_to_caller('code_review_openclaw_gemini') + logger.warning( + "[CodeReview] Claude 失敗,降級 Gemini: %s", resp.error, + ) + else: + logger.info( + "[CodeReview] CODE_REVIEW_USE_CLAUDE=true 但 Claude 不可用(缺 API key 或 SDK),退回 Gemini", + ) + + # ── L2:Gemini — Phase 1 v5.0 logger 追蹤 ──────────────────────────── if GEMINI_API_KEY: with log_ai_call( caller='code_review_openclaw', diff --git a/tests/test_anthropic_service.py b/tests/test_anthropic_service.py new file mode 100644 index 0000000..c705d1b --- /dev/null +++ b/tests/test_anthropic_service.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +tests/test_anthropic_service.py +services/anthropic_service.py 單元測試 — Operation Ollama-First v5.0 Phase 7 + +測試紀律(Phase 7 spec): + - generate 正常路徑:cache_creation + cache_read 解析正確 + - generate prompt cache:cache_system=True 時 system 加 cache_control + - ANTHROPIC_API_KEY 未設定時 is_available() == False + - SDK ImportError 時不爆(log.error,is_available()=False) + - SDK 例外時 generate 回 success=False 不 raise + - cache_hit property 邏輯(cache_read_tokens > 0 → True) + - check_connection 正常與失敗 +""" + +from __future__ import annotations + +import os +import sys +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +# ───────────────────────────────────────────────────────────────────────────── +# Helpers:仿造 anthropic SDK Response 結構 +# ───────────────────────────────────────────────────────────────────────────── + +def _fake_anthropic_response( + text: str = "ok", + input_tokens: int = 100, + output_tokens: int = 50, + cache_creation_input_tokens: int = 0, + cache_read_input_tokens: int = 0, + model: str = "claude-opus-4-7", +): + """模擬 anthropic.types.Message""" + block = SimpleNamespace(text=text, type="text") + usage = SimpleNamespace( + input_tokens=input_tokens, + output_tokens=output_tokens, + cache_creation_input_tokens=cache_creation_input_tokens, + cache_read_input_tokens=cache_read_input_tokens, + ) + return SimpleNamespace(content=[block], usage=usage, model=model) + + +@pytest.fixture +def mock_sdk(monkeypatch): + """模擬 anthropic SDK:注入 fake module 到 sys.modules, + 讓 AnthropicService._init_client 走 import anthropic 成功路徑。""" + fake_anthropic = MagicMock() + fake_client = MagicMock() + fake_anthropic.Anthropic.return_value = fake_client + monkeypatch.setitem(sys.modules, 'anthropic', fake_anthropic) + return fake_anthropic, fake_client + + +# ───────────────────────────────────────────────────────────────────────────── +# is_available() 測試 +# ───────────────────────────────────────────────────────────────────────────── + +def test_is_available_false_when_no_api_key(monkeypatch): + """ANTHROPIC_API_KEY 未設 → is_available() False""" + monkeypatch.delenv('ANTHROPIC_API_KEY', raising=False) + # reload 確保模組層 ANTHROPIC_API_KEY 重新讀取 + import importlib + import services.anthropic_service as svc_mod + importlib.reload(svc_mod) + svc = svc_mod.AnthropicService(api_key='') + assert svc.is_available() is False + + +def test_is_available_true_when_sdk_ready(mock_sdk): + """API key + SDK 都有 → is_available() True""" + from services.anthropic_service import AnthropicService + svc = AnthropicService(api_key='sk-ant-test') + assert svc.is_available() is True + + +def test_is_available_false_on_import_error(monkeypatch): + """SDK 未安裝(ImportError)→ is_available() False,不 raise""" + # 移除 anthropic 模組讓 import 失敗 + monkeypatch.setitem(sys.modules, 'anthropic', None) + from services.anthropic_service import AnthropicService + svc = AnthropicService(api_key='sk-ant-test') + assert svc.is_available() is False + + +def test_is_available_false_on_init_exception(monkeypatch): + """SDK 初始化拋例外 → is_available() False,不 raise""" + fake_anthropic = MagicMock() + fake_anthropic.Anthropic.side_effect = RuntimeError("auth failed") + monkeypatch.setitem(sys.modules, 'anthropic', fake_anthropic) + + from services.anthropic_service import AnthropicService + svc = AnthropicService(api_key='sk-ant-test') + assert svc.is_available() is False + + +# ───────────────────────────────────────────────────────────────────────────── +# generate() 正常路徑 +# ───────────────────────────────────────────────────────────────────────────── + +def test_generate_success_basic(mock_sdk): + """generate 正常路徑:tokens 正確解析""" + fake_anthropic, fake_client = mock_sdk + fake_client.messages.create.return_value = _fake_anthropic_response( + text="hello world", + input_tokens=120, + output_tokens=40, + ) + + from services.anthropic_service import AnthropicService + svc = AnthropicService(api_key='sk-ant-test') + + resp = svc.generate(prompt="say hi", max_tokens=100) + assert resp.success is True + assert resp.content == "hello world" + assert resp.input_tokens == 120 + assert resp.output_tokens == 40 + assert resp.cache_creation_tokens == 0 + assert resp.cache_read_tokens == 0 + assert resp.cache_hit is False + assert resp.duration_ms >= 0 + assert resp.error is None + + +def test_generate_with_cache_creation_and_read(mock_sdk): + """generate 解析 cache_creation_input_tokens / cache_read_input_tokens""" + fake_anthropic, fake_client = mock_sdk + fake_client.messages.create.return_value = _fake_anthropic_response( + cache_creation_input_tokens=500, + cache_read_input_tokens=2000, + ) + + from services.anthropic_service import AnthropicService + svc = AnthropicService(api_key='sk-ant-test') + + resp = svc.generate(prompt="reuse", system_prompt="stable system") + assert resp.success is True + assert resp.cache_creation_tokens == 500 + assert resp.cache_read_tokens == 2000 + assert resp.cache_hit is True + + +def test_generate_with_cache_system_adds_cache_control(mock_sdk): + """cache_system=True 時 system_prompt 帶 ephemeral cache_control""" + fake_anthropic, fake_client = mock_sdk + fake_client.messages.create.return_value = _fake_anthropic_response() + + from services.anthropic_service import AnthropicService + svc = AnthropicService(api_key='sk-ant-test') + + svc.generate( + prompt="user", + system_prompt="my-system", + cache_system=True, + ) + _, kwargs = fake_client.messages.create.call_args + assert isinstance(kwargs['system'], list) + assert kwargs['system'][0]['type'] == 'text' + assert kwargs['system'][0]['text'] == 'my-system' + assert kwargs['system'][0]['cache_control'] == {"type": "ephemeral"} + + +def test_generate_without_cache_system_uses_string(mock_sdk): + """cache_system=False 時 system 為純字串""" + fake_anthropic, fake_client = mock_sdk + fake_client.messages.create.return_value = _fake_anthropic_response() + + from services.anthropic_service import AnthropicService + svc = AnthropicService(api_key='sk-ant-test') + + svc.generate(prompt="user", system_prompt="my-system", cache_system=False) + _, kwargs = fake_client.messages.create.call_args + assert kwargs['system'] == 'my-system' + + +def test_generate_without_system_prompt(mock_sdk): + """無 system_prompt 時 kwargs 不含 system""" + fake_anthropic, fake_client = mock_sdk + fake_client.messages.create.return_value = _fake_anthropic_response() + + from services.anthropic_service import AnthropicService + svc = AnthropicService(api_key='sk-ant-test') + + svc.generate(prompt="user") + _, kwargs = fake_client.messages.create.call_args + assert 'system' not in kwargs + + +def test_generate_passes_temperature_and_max_tokens(mock_sdk): + """temperature / max_tokens / model 正確傳給 SDK""" + fake_anthropic, fake_client = mock_sdk + fake_client.messages.create.return_value = _fake_anthropic_response() + + from services.anthropic_service import AnthropicService + svc = AnthropicService(api_key='sk-ant-test') + + svc.generate( + prompt="x", model="claude-sonnet-4-6", + max_tokens=2048, temperature=0.5, + ) + _, kwargs = fake_client.messages.create.call_args + assert kwargs['model'] == 'claude-sonnet-4-6' + assert kwargs['max_tokens'] == 2048 + assert kwargs['temperature'] == 0.5 + assert kwargs['messages'] == [{"role": "user", "content": "x"}] + + +# ───────────────────────────────────────────────────────────────────────────── +# generate() 失敗路徑 +# ───────────────────────────────────────────────────────────────────────────── + +def test_generate_returns_failure_when_no_client(monkeypatch): + """無 API key → generate 回 success=False 不 raise""" + monkeypatch.delenv('ANTHROPIC_API_KEY', raising=False) + from services.anthropic_service import AnthropicService + svc = AnthropicService(api_key='') + + resp = svc.generate(prompt="x") + assert resp.success is False + assert resp.content == "" + assert "not initialized" in (resp.error or "") + + +def test_generate_handles_sdk_exception(mock_sdk): + """SDK 拋例外 → generate 回 success=False,error 含 type+msg""" + fake_anthropic, fake_client = mock_sdk + fake_client.messages.create.side_effect = RuntimeError("rate limit") + + from services.anthropic_service import AnthropicService + svc = AnthropicService(api_key='sk-ant-test') + + resp = svc.generate(prompt="x") + assert resp.success is False + assert "RuntimeError" in (resp.error or "") + assert "rate limit" in (resp.error or "") + assert resp.duration_ms >= 0 + + +# ───────────────────────────────────────────────────────────────────────────── +# ClaudeResponse cache_hit property +# ───────────────────────────────────────────────────────────────────────────── + +def test_cache_hit_property(): + from services.anthropic_service import ClaudeResponse + assert ClaudeResponse(success=True, content="x", model="m", + cache_read_tokens=0).cache_hit is False + assert ClaudeResponse(success=True, content="x", model="m", + cache_read_tokens=1).cache_hit is True + assert ClaudeResponse(success=True, content="x", model="m", + cache_read_tokens=10000).cache_hit is True + + +def test_total_tokens_property(): + from services.anthropic_service import ClaudeResponse + r = ClaudeResponse(success=True, content="x", model="m", + input_tokens=100, output_tokens=50) + assert r.total_tokens == 150 + + +# ───────────────────────────────────────────────────────────────────────────── +# check_connection +# ───────────────────────────────────────────────────────────────────────────── + +def test_check_connection_success(mock_sdk): + fake_anthropic, fake_client = mock_sdk + fake_client.messages.create.return_value = _fake_anthropic_response(text="pong") + + from services.anthropic_service import AnthropicService + svc = AnthropicService(api_key='sk-ant-test') + assert svc.check_connection() is True + + +def test_check_connection_fail_no_client(monkeypatch): + monkeypatch.delenv('ANTHROPIC_API_KEY', raising=False) + from services.anthropic_service import AnthropicService + svc = AnthropicService(api_key='') + assert svc.check_connection() is False + + +def test_check_connection_fail_on_sdk_error(mock_sdk): + fake_anthropic, fake_client = mock_sdk + fake_client.messages.create.side_effect = RuntimeError("boom") + + from services.anthropic_service import AnthropicService + svc = AnthropicService(api_key='sk-ant-test') + assert svc.check_connection() is False + + +# ───────────────────────────────────────────────────────────────────────────── +# COST_TABLE 整合(確認 ai_call_logger 認得 claude 模型) +# ───────────────────────────────────────────────────────────────────────────── + +def test_cost_table_has_claude_models(): + from services.ai_call_logger import COST_TABLE, _calc_cost + assert 'claude-opus-4-7' in COST_TABLE + assert 'claude-sonnet-4-6' in COST_TABLE + assert 'claude-haiku-4-5' in COST_TABLE + # opus 1M in/1M out 應為 15 + 75 = 90 USD + assert abs(_calc_cost('claude-opus-4-7', 1_000_000, 1_000_000) - 90.0) < 1e-6 + # haiku 1M in/1M out 應為 0.8 + 4.0 = 4.8 USD + assert abs(_calc_cost('claude-haiku-4-5', 1_000_000, 1_000_000) - 4.8) < 1e-6 diff --git a/tests/test_code_review_claude_routing.py b/tests/test_code_review_claude_routing.py new file mode 100644 index 0000000..cc074ef --- /dev/null +++ b/tests/test_code_review_claude_routing.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +tests/test_code_review_claude_routing.py +Phase 7 Frontier 升級:CodeReviewPipeline._openclaw_assess Claude 路由測試 + +驗收項目: + - flag=false → Claude 路徑零觸發(regression 防護) + - flag=true + Claude 可用 → 走 Claude,回傳 Claude content + - flag=true + Claude 失敗 → fallback 進入 Gemini/Elephant 路徑 + - flag=true + Claude 不可用(is_available=False)→ 跳過 Claude +""" + +from __future__ import annotations + +import importlib +import os +import sys +import types +from unittest.mock import MagicMock + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +# ───────────────────────────────────────────────────────────────────────────── +# 共用工具 +# ───────────────────────────────────────────────────────────────────────────── + +def _reload_pipeline(): + """重新載入 pipeline 模組(讓 module-level CODE_REVIEW_USE_CLAUDE flag 即時生效)""" + import services.code_review_pipeline_service as svc_mod + importlib.reload(svc_mod) + return svc_mod + + +def _make_pipeline(svc_mod, commit="abc12345"): + return svc_mod.CodeReviewPipeline( + commit_sha=commit, + changed_files=["services/foo.py"], + branch="main", + deploy_type="sync", + ) + + +def _stub_anthropic(monkeypatch, svc_mod, *, available: bool, success: bool = True, + content: str = "CLAUDE-RESULT", error: str = None): + """注入假的 anthropic_service 全域單例到 services.anthropic_service。 + 目標:避免 monkeypatch sys.modules 失敗(pipeline 內是 from services.anthropic_service import anthropic_service) + """ + fake_resp = MagicMock() + fake_resp.success = success + fake_resp.content = content if success else "" + fake_resp.input_tokens = 200 + fake_resp.output_tokens = 100 + fake_resp.cache_creation_tokens = 50 + fake_resp.cache_read_tokens = 150 + fake_resp.cache_hit = True + fake_resp.error = error + + fake_svc = MagicMock() + fake_svc.is_available.return_value = available + fake_svc.generate.return_value = fake_resp + + # 動態造一個假 module 並寫入 sys.modules(覆蓋既有 import 結果) + fake_module = types.ModuleType('services.anthropic_service') + fake_module.anthropic_service = fake_svc + monkeypatch.setitem(sys.modules, 'services.anthropic_service', fake_module) + return fake_svc + + +def _stub_logger(monkeypatch): + """避免 log_ai_call 真寫 DB""" + import services.ai_call_logger as logger_mod + monkeypatch.setattr(logger_mod, '_write_to_db', lambda state: None) + monkeypatch.setenv('AI_CALL_LOGGING_ENABLED', 'false') + + +def _stub_gemini_and_elephant(monkeypatch, *, + gemini_text: str = "GEMINI-RESULT", + elephant_text: str = "ELEPHANT-RESULT", + gemini_works: bool = True): + """攔截 _openclaw_assess 內的 import google.generativeai / elephant_service。 + + pipeline 內是 lazy import,所以注入到 sys.modules 即可生效。 + """ + # 1) Fake google.generativeai + fake_genai = types.ModuleType('google.generativeai') + fake_types = types.SimpleNamespace(GenerationConfig=lambda **kw: MagicMock()) + fake_genai.types = fake_types + fake_genai.configure = lambda **kw: None + + if gemini_works: + fake_resp = MagicMock() + fake_resp.text = gemini_text + fake_resp.usage_metadata = MagicMock(prompt_token_count=10, candidates_token_count=5) + fake_model = MagicMock() + fake_model.generate_content.return_value = fake_resp + fake_genai.GenerativeModel = MagicMock(return_value=fake_model) + else: + fake_genai.GenerativeModel = MagicMock(side_effect=RuntimeError("gemini broken")) + + # google.generativeai 是子模組;注入它和父模組 + fake_google = types.ModuleType('google') + fake_google.generativeai = fake_genai + monkeypatch.setitem(sys.modules, 'google', fake_google) + monkeypatch.setitem(sys.modules, 'google.generativeai', fake_genai) + + # 2) Fake elephant_service + fake_eresp = MagicMock(success=True, content=elephant_text, + input_tokens=20, output_tokens=10, error=None) + fake_elephant = MagicMock() + fake_elephant.generate.return_value = fake_eresp + fake_eservice_mod = types.ModuleType('services.elephant_service') + fake_eservice_mod.elephant_service = fake_elephant + monkeypatch.setitem(sys.modules, 'services.elephant_service', fake_eservice_mod) + + return fake_genai, fake_elephant + + +# ───────────────────────────────────────────────────────────────────────────── +# Tests +# ───────────────────────────────────────────────────────────────────────────── + +def test_flag_false_uses_gemini(monkeypatch): + """CODE_REVIEW_USE_CLAUDE=false(預設)→ Claude 路徑完全不觸發""" + monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'false') + monkeypatch.setenv('GEMINI_API_KEY', 'test-key') + _stub_logger(monkeypatch) + + svc_mod = _reload_pipeline() + fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=True) + _stub_gemini_and_elephant(monkeypatch) + + pipeline = _make_pipeline(svc_mod) + result = pipeline._openclaw_assess( + files={"services/foo.py": "def x(): pass"}, + findings=[], + ) + + assert result == "GEMINI-RESULT" + fake_claude.generate.assert_not_called() + + +def test_flag_true_uses_claude(monkeypatch): + """flag=true + Claude 可用 → 走 Claude,Gemini 不被呼叫""" + monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'true') + monkeypatch.setenv('GEMINI_API_KEY', 'test-key') + _stub_logger(monkeypatch) + + svc_mod = _reload_pipeline() + fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=True, + success=True, content="CLAUDE-RESULT") + fake_genai, fake_elephant = _stub_gemini_and_elephant(monkeypatch) + + pipeline = _make_pipeline(svc_mod) + result = pipeline._openclaw_assess( + files={"services/foo.py": "def x(): pass"}, + findings=[{"severity": "HIGH", "file": "services/foo.py", + "description": "x", "type": "bug"}], + ) + + assert result == "CLAUDE-RESULT" + fake_claude.generate.assert_called_once() + # Gemini 不應被觸發(Claude 成功時短路) + fake_genai.GenerativeModel.assert_not_called() + fake_elephant.generate.assert_not_called() + + # 確認 Claude 呼叫參數正確 + call_kwargs = fake_claude.generate.call_args.kwargs + assert call_kwargs['cache_system'] is True # 啟用 ephemeral cache + assert call_kwargs['temperature'] == 0.2 # code review 精確設定 + assert call_kwargs['model'] == 'claude-opus-4-7' # 預設 Opus 4.7 + assert call_kwargs['system_prompt'] # system prompt 有傳 + assert 'commit ' + 'abc12345'[:8] in call_kwargs['prompt'].lower() or \ + 'abc12345' in call_kwargs['prompt'] + + +def test_flag_true_claude_fails_falls_back_to_gemini(monkeypatch): + """Claude 回 success=False → fallback Gemini 接手""" + monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'true') + monkeypatch.setenv('GEMINI_API_KEY', 'test-key') + _stub_logger(monkeypatch) + + svc_mod = _reload_pipeline() + fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=True, + success=False, error="RateLimitError: too many") + fake_genai, fake_elephant = _stub_gemini_and_elephant(monkeypatch) + + pipeline = _make_pipeline(svc_mod) + result = pipeline._openclaw_assess( + files={"services/foo.py": "def x(): pass"}, + findings=[], + ) + + assert result == "GEMINI-RESULT" + fake_claude.generate.assert_called_once() + fake_genai.GenerativeModel.assert_called_once() + + +def test_flag_true_claude_unavailable_uses_gemini(monkeypatch): + """flag=true 但 is_available()=False → 跳 L1 直走 L2""" + monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'true') + monkeypatch.setenv('GEMINI_API_KEY', 'test-key') + _stub_logger(monkeypatch) + + svc_mod = _reload_pipeline() + fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=False) + fake_genai, fake_elephant = _stub_gemini_and_elephant(monkeypatch) + + pipeline = _make_pipeline(svc_mod) + result = pipeline._openclaw_assess( + files={"services/foo.py": "def x(): pass"}, + findings=[], + ) + + assert result == "GEMINI-RESULT" + fake_claude.generate.assert_not_called() + fake_genai.GenerativeModel.assert_called_once() + + +def test_flag_true_full_fallback_chain(monkeypatch): + """Claude 失敗 + Gemini 也失敗 → 最終 Elephant 接手""" + monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'true') + monkeypatch.setenv('GEMINI_API_KEY', 'test-key') + _stub_logger(monkeypatch) + + svc_mod = _reload_pipeline() + fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=True, + success=False, error="claude down") + fake_genai, fake_elephant = _stub_gemini_and_elephant( + monkeypatch, gemini_works=False, + ) + + pipeline = _make_pipeline(svc_mod) + result = pipeline._openclaw_assess( + files={"services/foo.py": "def x(): pass"}, + findings=[], + ) + + assert result == "ELEPHANT-RESULT" + fake_claude.generate.assert_called_once() + fake_elephant.generate.assert_called_once()