From f8222006b847f85aba946c593f8251925e414590 Mon Sep 17 00:00:00 2001 From: OoO Date: Tue, 19 May 2026 22:15:48 +0800 Subject: [PATCH] =?UTF-8?q?[V10.281]=20=E5=BC=B7=E5=8C=96=20Code=20Review?= =?UTF-8?q?=20Ollama=20=E6=9C=AC=E5=9C=B0=E5=82=99=E6=8F=B4=E7=9F=A9?= =?UTF-8?q?=E9=99=A3=20|=20code=5Freview=5Fpipeline=5Fservice.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.example | 6 + TODO_NEXT_STEPS.txt | 1 + config.py | 2 +- docs/AI_INTELLIGENCE_MODULE_SOT.md | 3 +- services/code_review_pipeline_service.py | 134 ++++++++++++++++------- tests/test_code_review_claude_routing.py | 80 +++++++++++++- 6 files changed, 185 insertions(+), 41 deletions(-) diff --git a/.env.example b/.env.example index 45f5b6f..6240096 100644 --- a/.env.example +++ b/.env.example @@ -215,6 +215,12 @@ BOT_API_TOKEN=your_bot_api_token_here # 僅在需要短期關閉自動修復鏈時設為 false CODE_REVIEW_OLLAMA_MODEL=qwen2.5-coder:7b CODE_REVIEW_OLLAMA_TIMEOUT=45 +CODE_REVIEW_OLLAMA_SECONDARY_MODEL=gemma3:4b +CODE_REVIEW_OLLAMA_SECONDARY_TIMEOUT=25 +CODE_REVIEW_OLLAMA_FALLBACK_MODEL=hermes3:latest +CODE_REVIEW_OLLAMA_FALLBACK_TIMEOUT=20 +CODE_REVIEW_OLLAMA_NUM_PREDICT=384 +CODE_REVIEW_OLLAMA_KEEP_ALIVE=24h CODE_REVIEW_HERMES_TIMEOUT=35 CODE_REVIEW_HERMES_MAX_FILES=3 CODE_REVIEW_HERMES_MAX_CHARS=2500 diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index 05c56f7..e0c7446 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.281 強化 Code Review OpenClaw 本地備援矩陣:主機順序仍為 GCP-A → GCP-B → 111,但改成 GCP-A `qwen2.5-coder:7b`、GCP-B `gemma3:4b`、111 `hermes3:latest`,三段本地 Ollama 全失敗後才允許 Claude/Gemini 備援。 - V10.279 收斂 Code Review Ollama-first 路徑:OpenClaw assessment 預設改 `qwen2.5-coder:7b` + 45s/host timeout,Hermes scan 改 compact snippet + 35s/host timeout,避免三主機各卡 120s 後被迫觸發 Gemini 備援。 - V10.278 補 PChome 競價摘要 30 分鐘共享快取與 feeder/backfill 主動清除,並新增市場情報 `candidate_queue_review_ai_summary_preflight` 預覽 gate;API 只檢查未來摘要輸入與 Ollama-first/Gemini-backup-only policy,不呼叫 LLM、不派 Telegram、不寫 DB、不掛 scheduler。 - V10.276 修正 ElephantAlpha 價格類 Hermes prefetch timeout:`price_drop` / `market_opportunity` trigger 直接把 SQL 命中的 MOMO / PChome 價差實證轉成 HITL action lines,完整 Hermes LLM prefetch 預設關閉;無 DB 實證仍只記 suppressed telemetry / cooldown,不寫 `human_review`、不發空 Telegram。 diff --git a/config.py b/config.py index f99a605..21d361e 100644 --- a/config.py +++ b/config.py @@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.280" +SYSTEM_VERSION = "V10.281" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index aa8a213..3aebb48 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -2,7 +2,7 @@ > **最後更新**: 2026-05-19 (台北時間) > **狀態**: 🟢 四 AI Agent 自動化閉環已落地;LLM 路由紅線升級為 Ollama-first 三主機級聯,Gemini 僅備援 / 鎖定場景 -> **適用版本**: V10.279 +> **適用版本**: V10.281 --- @@ -18,6 +18,7 @@ - PPT vision、PPT 文案 final fallback、MCP 離線 final fallback 等特殊 Ollama 路徑也不得只打單一 host;如需 `/api/generate`,一律透過 `OllamaService.generate()`。 - Code Review pipeline 也必須 Ollama-first:Hermes scan 與 OpenClaw assessment 都走 `OllamaService` 三主機 retry;Gemini telemetry 只能以 `code_review_openclaw_gemini` 出現,表示 Ollama/可選 Claude 備援都失敗後才啟用。 - Code Review 的 OpenClaw assessment 預設使用 `qwen2.5-coder:7b` 與 45s/host timeout;Hermes scan 只送 compact snippet(預設 3 檔、每檔 2500 字)並使用 35s/host timeout,避免三主機各卡 120s 後把正常 code review 推進 Gemini 備援。 +- Code Review OpenClaw assessment 保持主機順序 GCP-A → GCP-B → 111,但可使用主機適配本地模型:GCP-A `qwen2.5-coder:7b`、GCP-B `gemma3:4b`、111 `hermes3:latest`;三段本地 Ollama 全失敗後才允許雲端備援。 - OpenClaw Telegram Q&A 主路徑也不得綁單一 host:`_call_qwen3_qa()` 必須透過 `OllamaService` 跑 GCP-A → GCP-B → 111,並把實際落點寫入 `ai_calls.provider`。 - OpenClaw Telegram 圖片商品辨識也必須 Ollama-first:`_identify_product_name_with_ollama_vision()` 透過 `OllamaService` 嘗試 GCP-A → GCP-B → 111;Gemini 只允許以 `openclaw_bot_image_gemini` caller 作為失敗後備援。 diff --git a/services/code_review_pipeline_service.py b/services/code_review_pipeline_service.py index 7ab0b6b..bdfcf4d 100644 --- a/services/code_review_pipeline_service.py +++ b/services/code_review_pipeline_service.py @@ -48,6 +48,22 @@ CODE_REVIEW_OLLAMA_MODEL = os.getenv( os.getenv("OPENCLAW_OLLAMA_MODEL", "qwen2.5-coder:7b"), ) CODE_REVIEW_OLLAMA_TIMEOUT = int(os.getenv("CODE_REVIEW_OLLAMA_TIMEOUT", "45")) +CODE_REVIEW_OLLAMA_SECONDARY_MODEL = os.getenv( + "CODE_REVIEW_OLLAMA_SECONDARY_MODEL", + "gemma3:4b", +) +CODE_REVIEW_OLLAMA_SECONDARY_TIMEOUT = int( + os.getenv("CODE_REVIEW_OLLAMA_SECONDARY_TIMEOUT", "25") +) +CODE_REVIEW_OLLAMA_FALLBACK_MODEL = os.getenv( + "CODE_REVIEW_OLLAMA_FALLBACK_MODEL", + _HERMES_MODEL, +) +CODE_REVIEW_OLLAMA_FALLBACK_TIMEOUT = int( + os.getenv("CODE_REVIEW_OLLAMA_FALLBACK_TIMEOUT", "20") +) +CODE_REVIEW_OLLAMA_NUM_PREDICT = int(os.getenv("CODE_REVIEW_OLLAMA_NUM_PREDICT", "384")) +CODE_REVIEW_OLLAMA_KEEP_ALIVE = os.getenv("CODE_REVIEW_OLLAMA_KEEP_ALIVE", "24h") CODE_REVIEW_HERMES_TIMEOUT = int(os.getenv("CODE_REVIEW_HERMES_TIMEOUT", "35")) CODE_REVIEW_HERMES_MAX_FILES = int(os.getenv("CODE_REVIEW_HERMES_MAX_FILES", "3")) CODE_REVIEW_HERMES_MAX_CHARS = int(os.getenv("CODE_REVIEW_HERMES_MAX_CHARS", "2500")) @@ -316,45 +332,87 @@ class CodeReviewPipeline: ✅ 本次部署亮點""" # ── L1:Ollama-first — GCP-A → GCP-B → 111 ────────────────────────── - from services.ollama_service import OllamaService, get_host_label, get_provider_tag + from services.ollama_service import ( + OLLAMA_HOST_FALLBACK, + OLLAMA_HOST_PRIMARY, + OLLAMA_HOST_SECONDARY, + OllamaService, + get_host_label, + get_provider_tag, + ) - with log_ai_call( - caller='code_review_openclaw', - provider='gcp_ollama', - model=CODE_REVIEW_OLLAMA_MODEL, - request_id=f"cr-{self.commit_sha[:8]}", - meta={ - 'commit': self.commit_sha[:8], - 'branch': self.branch, - 'route': 'ollama_first', - }, - ) as _ctx: - ollama = OllamaService(model=CODE_REVIEW_OLLAMA_MODEL) - resp = ollama.generate( - prompt=user_prompt, - system_prompt=system, - model=CODE_REVIEW_OLLAMA_MODEL, - temperature=0.2, - timeout=CODE_REVIEW_OLLAMA_TIMEOUT, - ) - actual_provider = get_provider_tag(resp.host or '') - _ctx.set_provider(actual_provider) - _ctx.set_tokens(input=resp.input_tokens, output=resp.output_tokens) - _ctx.add_meta('host', resp.host) - _ctx.add_meta('host_label', get_host_label(resp.host or '')) - if resp.success and (resp.content or '').strip(): - return resp.content or "" - _ctx.set_error(resp.error or 'ollama generate failed') - fallback_caller = ( - 'code_review_openclaw' - if CODE_REVIEW_USE_CLAUDE - else 'code_review_openclaw_gemini' - ) - _ctx.fallback_to_caller(fallback_caller) - logger.warning( - "[CodeReview] OpenClaw Ollama 三主機皆失敗,才啟用雲端備援: %s", - resp.error, - ) + fallback_caller = ( + 'code_review_openclaw' + if CODE_REVIEW_USE_CLAUDE + else 'code_review_openclaw_gemini' + ) + ollama_attempts = [ + ( + "primary_code", + OLLAMA_HOST_PRIMARY, + CODE_REVIEW_OLLAMA_MODEL, + CODE_REVIEW_OLLAMA_TIMEOUT, + ), + ( + "secondary_fast", + OLLAMA_HOST_SECONDARY, + CODE_REVIEW_OLLAMA_SECONDARY_MODEL, + CODE_REVIEW_OLLAMA_SECONDARY_TIMEOUT, + ), + ( + "lan_111_hermes", + OLLAMA_HOST_FALLBACK, + CODE_REVIEW_OLLAMA_FALLBACK_MODEL, + CODE_REVIEW_OLLAMA_FALLBACK_TIMEOUT, + ), + ] + last_ollama_error = None + + for attempt_index, (attempt_key, host, model_name, timeout_s) in enumerate( + ollama_attempts, + start=1, + ): + with log_ai_call( + caller='code_review_openclaw', + provider=get_provider_tag(host), + model=model_name, + request_id=f"cr-{self.commit_sha[:8]}", + meta={ + 'commit': self.commit_sha[:8], + 'branch': self.branch, + 'route': 'ollama_first', + 'attempt': attempt_index, + 'attempt_key': attempt_key, + 'timeout_s': timeout_s, + }, + ) as _ctx: + ollama = OllamaService(host=host, model=model_name) + resp = ollama.generate( + prompt=user_prompt, + system_prompt=system, + model=model_name, + temperature=0.2, + timeout=timeout_s, + keep_alive=CODE_REVIEW_OLLAMA_KEEP_ALIVE, + options={"num_predict": CODE_REVIEW_OLLAMA_NUM_PREDICT}, + ) + actual_host = resp.host or host + _ctx.set_provider(get_provider_tag(actual_host)) + _ctx.set_tokens(input=resp.input_tokens, output=resp.output_tokens) + _ctx.add_meta('host', actual_host) + _ctx.add_meta('host_label', get_host_label(actual_host)) + _ctx.add_meta('model', model_name) + if resp.success and (resp.content or '').strip(): + return resp.content or "" + last_ollama_error = resp.error or 'ollama generate failed' + _ctx.set_error(last_ollama_error) + if attempt_index == len(ollama_attempts): + _ctx.fallback_to_caller(fallback_caller) + + logger.warning( + "[CodeReview] OpenClaw 本地 Ollama 鏈全部失敗,才啟用雲端備援: %s", + last_ollama_error, + ) # ── L1:Phase 7 Frontier — Claude Opus 4.7(程式碼能力 #1)──────────── # feature flag 預設 OFF;ON 時只作 Ollama 失敗後的雲端備援。 diff --git a/tests/test_code_review_claude_routing.py b/tests/test_code_review_claude_routing.py index 75d3c0b..da0dcda 100644 --- a/tests/test_code_review_claude_routing.py +++ b/tests/test_code_review_claude_routing.py @@ -195,6 +195,12 @@ def test_code_review_ollama_defaults_use_fast_local_model(monkeypatch): "CODE_REVIEW_OLLAMA_MODEL", "OPENCLAW_OLLAMA_MODEL", "CODE_REVIEW_OLLAMA_TIMEOUT", + "CODE_REVIEW_OLLAMA_SECONDARY_MODEL", + "CODE_REVIEW_OLLAMA_SECONDARY_TIMEOUT", + "CODE_REVIEW_OLLAMA_FALLBACK_MODEL", + "CODE_REVIEW_OLLAMA_FALLBACK_TIMEOUT", + "CODE_REVIEW_OLLAMA_NUM_PREDICT", + "CODE_REVIEW_OLLAMA_KEEP_ALIVE", "CODE_REVIEW_HERMES_TIMEOUT", "CODE_REVIEW_HERMES_MAX_FILES", "CODE_REVIEW_HERMES_MAX_CHARS", @@ -205,11 +211,83 @@ def test_code_review_ollama_defaults_use_fast_local_model(monkeypatch): assert svc_mod.CODE_REVIEW_OLLAMA_MODEL == "qwen2.5-coder:7b" assert svc_mod.CODE_REVIEW_OLLAMA_TIMEOUT == 45 + assert svc_mod.CODE_REVIEW_OLLAMA_SECONDARY_MODEL == "gemma3:4b" + assert svc_mod.CODE_REVIEW_OLLAMA_SECONDARY_TIMEOUT == 25 + assert svc_mod.CODE_REVIEW_OLLAMA_FALLBACK_MODEL == "hermes3:latest" + assert svc_mod.CODE_REVIEW_OLLAMA_FALLBACK_TIMEOUT == 20 + assert svc_mod.CODE_REVIEW_OLLAMA_NUM_PREDICT == 384 + assert svc_mod.CODE_REVIEW_OLLAMA_KEEP_ALIVE == "24h" assert svc_mod.CODE_REVIEW_HERMES_TIMEOUT == 35 assert svc_mod.CODE_REVIEW_HERMES_MAX_FILES == 3 assert svc_mod.CODE_REVIEW_HERMES_MAX_CHARS == 2500 +def test_openclaw_uses_secondary_local_model_before_gemini(monkeypatch): + """GCP-A 失敗時先試 GCP-B 本地模型,成功就不得呼叫 Gemini。""" + monkeypatch.setenv('CODE_REVIEW_USE_CLAUDE', 'false') + monkeypatch.setenv('GEMINI_API_KEY', 'test-key') + _stub_logger(monkeypatch) + + svc_mod = _reload_pipeline() + import services.ollama_service as ollama_mod + + calls = [] + + class FakeResp: + def __init__(self, *, success, content, host, model, error=None): + self.success = success + self.content = content + self.host = host + self.model = model + self.error = error + self.input_tokens = 20 if success else 0 + self.output_tokens = 8 if success else 0 + + class FakeOllama: + def __init__(self, host=None, model=None): + self.host = host + self.model = model + + def generate(self, **kwargs): + calls.append({ + "host": self.host, + "model": kwargs["model"], + "timeout": kwargs["timeout"], + }) + if len(calls) == 1: + return FakeResp( + success=False, + content="", + host=self.host, + model=kwargs["model"], + error="primary timeout", + ) + return FakeResp( + success=True, + content="SECONDARY-LOCAL", + host=self.host, + model=kwargs["model"], + ) + + monkeypatch.setattr(ollama_mod, "OllamaService", FakeOllama) + fake_claude = _stub_anthropic(monkeypatch, svc_mod, available=True) + fake_genai, fake_elephant = _stub_gemini_and_elephant(monkeypatch) + + pipeline = _make_pipeline(svc_mod) + result = pipeline._openclaw_assess( + files={"services/foo.py": "def x(): pass"}, + findings=[], + ) + + assert result == "SECONDARY-LOCAL" + assert [call["model"] for call in calls] == ["qwen2.5-coder:7b", "gemma3:4b"] + assert calls[0]["timeout"] == 45 + assert calls[1]["timeout"] == 25 + fake_claude.generate.assert_not_called() + fake_genai.GenerativeModel.assert_not_called() + fake_elephant.generate.assert_not_called() + + def test_hermes_scan_uses_compact_prompt_and_short_timeout(monkeypatch): """Hermes scan 只送 compact snippet,避免大檔讓三主機各卡 120 秒。""" monkeypatch.setenv("CODE_REVIEW_HERMES_TIMEOUT", "7") @@ -336,7 +414,7 @@ def test_gemini_backup_uses_dedicated_caller_in_telemetry(monkeypatch): fake_elephant.generate.assert_not_called() assert any( state["caller"] == "code_review_openclaw" - and state["provider"] == "gcp_ollama" + and state["provider"] in {"gcp_ollama", "ollama_secondary", "ollama_111"} and state["status"] == "fallback" and state["fallback_to"] == "code_review_openclaw_gemini" for state in captured