Files
ewoooc/tests/test_code_review_claude_routing.py
OoO 943de8466c
Some checks failed
CD Pipeline / deploy (push) Has been cancelled
feat(p7): Anthropic SDK + Claude Opus 4.7 接 Code Review (feature flag OFF)
Operation Ollama-First v5.0 / Phase 7 Frontier 升級

services/anthropic_service.py (新檔, 226 行)
- AnthropicService 包裝 + ClaudeResponse dataclass
- Ephemeral prompt cache 5 分鐘 TTL(重複 system_prompt 省 90% 成本)
- usage 解析 input/output/cache_creation/cache_read 四欄位
- ANTHROPIC_API_KEY 未設或 SDK 缺失時 is_available()=False 靜默退化

code_review_pipeline_service.py — _openclaw_assess 加 L1 Claude 分支
- CODE_REVIEW_USE_CLAUDE flag (預設 OFF,等 ANTHROPIC_API_KEY 設定後翻 ON)
- 路由:Claude Opus 4.7 (Arena code Elo 1548) → Gemini → ElephantAlpha 三層
- request_id 串鏈不變

ai_call_logger.py COST_TABLE 補 3 個 Claude 模型:
- claude-opus-4-7:    $15/$75 per M tokens (程式碼 #1)
- claude-sonnet-4-6:  $3/$15  per M tokens (agentic 平衡)
- claude-haiku-4-5:   $0.8/$4 per M tokens (輕量快速)

requirements.txt: 加 anthropic>=0.40.0
.env.example: 加 ANTHROPIC_API_KEY / CODE_REVIEW_USE_CLAUDE / CLAUDE_MODEL

52 unit tests 全綠(22 logger + 18 anthropic + 5 routing + 7 security)

啟用步驟(待統帥手動):
  1. .env 加 ANTHROPIC_API_KEY=sk-ant-...
  2. CODE_REVIEW_USE_CLAUDE=true + restart momo-app
  3. 觀察 ai_calls.cache_read_tokens > 0 確認 cache 生效

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 23:31:30 +08:00

245 lines
9.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
tests/test_code_review_claude_routing.py
Phase 7 Frontier 升級CodeReviewPipeline._openclaw_assess Claude 路由測試
驗收項目:
- flag=false → Claude 路徑零觸發regression 防護)
- flag=true + Claude 可用 → 走 Claude回傳 Claude content
- flag=true + Claude 失敗 → fallback 進入 Gemini/Elephant 路徑
- flag=true + Claude 不可用is_available=False→ 跳過 Claude
"""
from __future__ import annotations
import importlib
import os
import sys
import types
from unittest.mock import MagicMock
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# ─────────────────────────────────────────────────────────────────────────────
# 共用工具
# ─────────────────────────────────────────────────────────────────────────────
def _reload_pipeline():
"""重新載入 pipeline 模組(讓 module-level CODE_REVIEW_USE_CLAUDE flag 即時生效)"""
import services.code_review_pipeline_service as svc_mod
importlib.reload(svc_mod)
return svc_mod
def _make_pipeline(svc_mod, commit="abc12345"):
return svc_mod.CodeReviewPipeline(
commit_sha=commit,
changed_files=["services/foo.py"],
branch="main",
deploy_type="sync",
)
def _stub_anthropic(monkeypatch, svc_mod, *, available: bool, success: bool = True,
content: str = "CLAUDE-RESULT", error: str = None):
"""注入假的 anthropic_service 全域單例到 services.anthropic_service。
目標:避免 monkeypatch sys.modules 失敗pipeline 內是 from services.anthropic_service import anthropic_service
"""
fake_resp = MagicMock()
fake_resp.success = success
fake_resp.content = content if success else ""
fake_resp.input_tokens = 200
fake_resp.output_tokens = 100
fake_resp.cache_creation_tokens = 50
fake_resp.cache_read_tokens = 150
fake_resp.cache_hit = True
fake_resp.error = error
fake_svc = MagicMock()
fake_svc.is_available.return_value = available
fake_svc.generate.return_value = fake_resp
# 動態造一個假 module 並寫入 sys.modules覆蓋既有 import 結果)
fake_module = types.ModuleType('services.anthropic_service')
fake_module.anthropic_service = fake_svc
monkeypatch.setitem(sys.modules, 'services.anthropic_service', fake_module)
return fake_svc
def _stub_logger(monkeypatch):
"""避免 log_ai_call 真寫 DB"""
import services.ai_call_logger as logger_mod
monkeypatch.setattr(logger_mod, '_write_to_db', lambda state: None)
monkeypatch.setenv('AI_CALL_LOGGING_ENABLED', 'false')
def _stub_gemini_and_elephant(monkeypatch, *,
gemini_text: str = "GEMINI-RESULT",
elephant_text: str = "ELEPHANT-RESULT",
gemini_works: bool = True):
"""攔截 _openclaw_assess 內的 import google.generativeai / elephant_service。
pipeline 內是 lazy import所以注入到 sys.modules 即可生效。
"""
# 1) Fake google.generativeai
fake_genai = types.ModuleType('google.generativeai')
fake_types = types.SimpleNamespace(GenerationConfig=lambda **kw: MagicMock())
fake_genai.types = fake_types
fake_genai.configure = lambda **kw: None
if gemini_works:
fake_resp = MagicMock()
fake_resp.text = gemini_text
fake_resp.usage_metadata = MagicMock(prompt_token_count=10, candidates_token_count=5)
fake_model = MagicMock()
fake_model.generate_content.return_value = fake_resp
fake_genai.GenerativeModel = MagicMock(return_value=fake_model)
else:
fake_genai.GenerativeModel = MagicMock(side_effect=RuntimeError("gemini broken"))
# google.generativeai 是子模組;注入它和父模組
fake_google = types.ModuleType('google')
fake_google.generativeai = fake_genai
monkeypatch.setitem(sys.modules, 'google', fake_google)
monkeypatch.setitem(sys.modules, 'google.generativeai', fake_genai)
# 2) Fake elephant_service
fake_eresp = MagicMock(success=True, content=elephant_text,
input_tokens=20, output_tokens=10, error=None)
fake_elephant = MagicMock()
fake_elephant.generate.return_value = fake_eresp
fake_eservice_mod = types.ModuleType('services.elephant_service')
fake_eservice_mod.elephant_service = fake_elephant
monkeypatch.setitem(sys.modules, 'services.elephant_service', fake_eservice_mod)
return fake_genai, fake_elephant
# ─────────────────────────────────────────────────────────────────────────────
# Tests
# ─────────────────────────────────────────────────────────────────────────────
def test_flag_false_uses_gemini(monkeypatch):
"""CODE_REVIEW_USE_CLAUDE=false預設→ Claude 路徑完全不觸發"""
monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'false')
monkeypatch.setenv('GEMINI_API_KEY', 'test-key')
_stub_logger(monkeypatch)
svc_mod = _reload_pipeline()
fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=True)
_stub_gemini_and_elephant(monkeypatch)
pipeline = _make_pipeline(svc_mod)
result = pipeline._openclaw_assess(
files={"services/foo.py": "def x(): pass"},
findings=[],
)
assert result == "GEMINI-RESULT"
fake_claude.generate.assert_not_called()
def test_flag_true_uses_claude(monkeypatch):
"""flag=true + Claude 可用 → 走 ClaudeGemini 不被呼叫"""
monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'true')
monkeypatch.setenv('GEMINI_API_KEY', 'test-key')
_stub_logger(monkeypatch)
svc_mod = _reload_pipeline()
fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=True,
success=True, content="CLAUDE-RESULT")
fake_genai, fake_elephant = _stub_gemini_and_elephant(monkeypatch)
pipeline = _make_pipeline(svc_mod)
result = pipeline._openclaw_assess(
files={"services/foo.py": "def x(): pass"},
findings=[{"severity": "HIGH", "file": "services/foo.py",
"description": "x", "type": "bug"}],
)
assert result == "CLAUDE-RESULT"
fake_claude.generate.assert_called_once()
# Gemini 不應被觸發Claude 成功時短路)
fake_genai.GenerativeModel.assert_not_called()
fake_elephant.generate.assert_not_called()
# 確認 Claude 呼叫參數正確
call_kwargs = fake_claude.generate.call_args.kwargs
assert call_kwargs['cache_system'] is True # 啟用 ephemeral cache
assert call_kwargs['temperature'] == 0.2 # code review 精確設定
assert call_kwargs['model'] == 'claude-opus-4-7' # 預設 Opus 4.7
assert call_kwargs['system_prompt'] # system prompt 有傳
assert 'commit ' + 'abc12345'[:8] in call_kwargs['prompt'].lower() or \
'abc12345' in call_kwargs['prompt']
def test_flag_true_claude_fails_falls_back_to_gemini(monkeypatch):
"""Claude 回 success=False → fallback Gemini 接手"""
monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'true')
monkeypatch.setenv('GEMINI_API_KEY', 'test-key')
_stub_logger(monkeypatch)
svc_mod = _reload_pipeline()
fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=True,
success=False, error="RateLimitError: too many")
fake_genai, fake_elephant = _stub_gemini_and_elephant(monkeypatch)
pipeline = _make_pipeline(svc_mod)
result = pipeline._openclaw_assess(
files={"services/foo.py": "def x(): pass"},
findings=[],
)
assert result == "GEMINI-RESULT"
fake_claude.generate.assert_called_once()
fake_genai.GenerativeModel.assert_called_once()
def test_flag_true_claude_unavailable_uses_gemini(monkeypatch):
"""flag=true 但 is_available()=False → 跳 L1 直走 L2"""
monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'true')
monkeypatch.setenv('GEMINI_API_KEY', 'test-key')
_stub_logger(monkeypatch)
svc_mod = _reload_pipeline()
fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=False)
fake_genai, fake_elephant = _stub_gemini_and_elephant(monkeypatch)
pipeline = _make_pipeline(svc_mod)
result = pipeline._openclaw_assess(
files={"services/foo.py": "def x(): pass"},
findings=[],
)
assert result == "GEMINI-RESULT"
fake_claude.generate.assert_not_called()
fake_genai.GenerativeModel.assert_called_once()
def test_flag_true_full_fallback_chain(monkeypatch):
"""Claude 失敗 + Gemini 也失敗 → 最終 Elephant 接手"""
monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'true')
monkeypatch.setenv('GEMINI_API_KEY', 'test-key')
_stub_logger(monkeypatch)
svc_mod = _reload_pipeline()
fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=True,
success=False, error="claude down")
fake_genai, fake_elephant = _stub_gemini_and_elephant(
monkeypatch, gemini_works=False,
)
pipeline = _make_pipeline(svc_mod)
result = pipeline._openclaw_assess(
files={"services/foo.py": "def x(): pass"},
findings=[],
)
assert result == "ELEPHANT-RESULT"
fake_claude.generate.assert_called_once()
fake_elephant.generate.assert_called_once()