From 350022f2ea9544f2aa9fe1d368bb9233ee815502 Mon Sep 17 00:00:00 2001 From: OoO Date: Tue, 19 May 2026 22:26:09 +0800 Subject: [PATCH] =?UTF-8?q?[V10.282]=20=E8=A3=9C=E9=BD=8A=20Code=20Review?= =?UTF-8?q?=20Hermes=20=E6=9C=AC=E5=9C=B0=E6=A8=A1=E5=9E=8B=E7=9F=A9?= =?UTF-8?q?=E9=99=A3=20|=20code=5Freview=5Fpipeline=5Fservice.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.example | 7 ++ TODO_NEXT_STEPS.txt | 1 + config.py | 2 +- docs/AI_INTELLIGENCE_MODULE_SOT.md | 3 +- services/code_review_pipeline_service.py | 138 +++++++++++++++++------ tests/test_code_review_claude_routing.py | 16 +++ 6 files changed, 130 insertions(+), 37 deletions(-) diff --git a/.env.example b/.env.example index 6240096..7a4490c 100644 --- a/.env.example +++ b/.env.example @@ -222,6 +222,13 @@ CODE_REVIEW_OLLAMA_FALLBACK_TIMEOUT=20 CODE_REVIEW_OLLAMA_NUM_PREDICT=384 CODE_REVIEW_OLLAMA_KEEP_ALIVE=24h CODE_REVIEW_HERMES_TIMEOUT=35 +CODE_REVIEW_HERMES_PRIMARY_MODEL=qwen2.5-coder:7b +CODE_REVIEW_HERMES_PRIMARY_TIMEOUT=35 +CODE_REVIEW_HERMES_SECONDARY_MODEL=gemma3:4b +CODE_REVIEW_HERMES_SECONDARY_TIMEOUT=25 +CODE_REVIEW_HERMES_FALLBACK_MODEL=hermes3:latest +CODE_REVIEW_HERMES_FALLBACK_TIMEOUT=20 +CODE_REVIEW_HERMES_NUM_PREDICT=768 CODE_REVIEW_HERMES_MAX_FILES=3 CODE_REVIEW_HERMES_MAX_CHARS=2500 CODE_REVIEW_AUTO_FIX_ENABLED=true diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index e0c7446..383d07e 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.282 補齊 Code Review Hermes scan 本地模型矩陣:掃描階段也走 GCP-A `qwen2.5-coder:7b` → GCP-B `gemma3:4b` → 111 `hermes3:latest`,避免 `hermes3` 在三主機各卡 35s 後只留下 error;Hermes scan 不會啟用 Gemini。 - V10.281 強化 Code Review OpenClaw 本地備援矩陣:主機順序仍為 GCP-A → GCP-B → 111,但改成 GCP-A `qwen2.5-coder:7b`、GCP-B `gemma3:4b`、111 `hermes3:latest`,三段本地 Ollama 全失敗後才允許 Claude/Gemini 備援。 - V10.279 收斂 Code Review Ollama-first 路徑:OpenClaw assessment 預設改 `qwen2.5-coder:7b` + 45s/host timeout,Hermes scan 改 compact snippet + 35s/host timeout,避免三主機各卡 120s 後被迫觸發 Gemini 備援。 - V10.278 補 PChome 競價摘要 30 分鐘共享快取與 feeder/backfill 主動清除,並新增市場情報 `candidate_queue_review_ai_summary_preflight` 預覽 gate;API 只檢查未來摘要輸入與 Ollama-first/Gemini-backup-only policy,不呼叫 LLM、不派 Telegram、不寫 DB、不掛 scheduler。 diff --git a/config.py b/config.py index 21d361e..c25acaf 100644 --- a/config.py +++ b/config.py @@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.281" +SYSTEM_VERSION = "V10.282" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index 7f092c5..ad6a3e1 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -2,7 +2,7 @@ > **最後更新**: 2026-05-19 (台北時間) > **狀態**: 🟢 四 AI Agent 自動化閉環已落地;LLM 路由紅線升級為 Ollama-first 三主機級聯,Gemini 僅備援 / 鎖定場景 -> **適用版本**: V10.281 +> **適用版本**: V10.282 --- @@ -18,6 +18,7 @@ - PPT vision、PPT 文案 final fallback、MCP 離線 final fallback 等特殊 Ollama 路徑也不得只打單一 host;如需 `/api/generate`,一律透過 `OllamaService.generate()`。 - Code Review pipeline 也必須 Ollama-first:Hermes scan 與 OpenClaw assessment 都走 `OllamaService` 三主機 retry;Gemini telemetry 只能以 `code_review_openclaw_gemini` 出現,表示 Ollama/可選 Claude 備援都失敗後才啟用。 - Code Review 的 OpenClaw assessment 預設使用 `qwen2.5-coder:7b` 與 45s/host timeout;Hermes scan 只送 compact snippet(預設 3 檔、每檔 2500 字)並使用 35s/host timeout,避免三主機各卡 120s 後把正常 code review 推進 Gemini 備援。 +- Code Review Hermes scan 也使用同一條本地模型矩陣:GCP-A `qwen2.5-coder:7b`、GCP-B `gemma3:4b`、111 `hermes3:latest`;不啟用 Gemini 備援,三段本地掃描失敗時只回空 findings 並交由 OpenClaw 本地矩陣續跑。 - Code Review OpenClaw assessment 保持主機順序 GCP-A → GCP-B → 111,但可使用主機適配本地模型:GCP-A `qwen2.5-coder:7b`、GCP-B `gemma3:4b`、111 `hermes3:latest`;三段本地 Ollama 全失敗後才允許雲端備援。 - OpenClaw Telegram Q&A 主路徑也不得綁單一 host:`_call_qwen3_qa()` 必須透過 `OllamaService` 跑 GCP-A → GCP-B → 111,並把實際落點寫入 `ai_calls.provider`。 - OpenClaw Telegram 圖片商品辨識也必須 Ollama-first:`_identify_product_name_with_ollama_vision()` 透過 `OllamaService` 嘗試 GCP-A → GCP-B → 111;Gemini 只允許以 `openclaw_bot_image_gemini` caller 作為失敗後備援。 diff --git a/services/code_review_pipeline_service.py b/services/code_review_pipeline_service.py index bdfcf4d..b8982c7 100644 --- a/services/code_review_pipeline_service.py +++ b/services/code_review_pipeline_service.py @@ -65,6 +65,24 @@ CODE_REVIEW_OLLAMA_FALLBACK_TIMEOUT = int( CODE_REVIEW_OLLAMA_NUM_PREDICT = int(os.getenv("CODE_REVIEW_OLLAMA_NUM_PREDICT", "384")) CODE_REVIEW_OLLAMA_KEEP_ALIVE = os.getenv("CODE_REVIEW_OLLAMA_KEEP_ALIVE", "24h") CODE_REVIEW_HERMES_TIMEOUT = int(os.getenv("CODE_REVIEW_HERMES_TIMEOUT", "35")) +CODE_REVIEW_HERMES_PRIMARY_MODEL = os.getenv( + "CODE_REVIEW_HERMES_PRIMARY_MODEL", + CODE_REVIEW_OLLAMA_MODEL, +) +CODE_REVIEW_HERMES_SECONDARY_MODEL = os.getenv( + "CODE_REVIEW_HERMES_SECONDARY_MODEL", + CODE_REVIEW_OLLAMA_SECONDARY_MODEL, +) +CODE_REVIEW_HERMES_FALLBACK_MODEL = os.getenv( + "CODE_REVIEW_HERMES_FALLBACK_MODEL", + CODE_REVIEW_OLLAMA_FALLBACK_MODEL, +) +CODE_REVIEW_HERMES_PRIMARY_TIMEOUT = int( + os.getenv("CODE_REVIEW_HERMES_PRIMARY_TIMEOUT", os.getenv("CODE_REVIEW_HERMES_TIMEOUT", "35")) +) +CODE_REVIEW_HERMES_SECONDARY_TIMEOUT = int(os.getenv("CODE_REVIEW_HERMES_SECONDARY_TIMEOUT", "25")) +CODE_REVIEW_HERMES_FALLBACK_TIMEOUT = int(os.getenv("CODE_REVIEW_HERMES_FALLBACK_TIMEOUT", "20")) +CODE_REVIEW_HERMES_NUM_PREDICT = int(os.getenv("CODE_REVIEW_HERMES_NUM_PREDICT", "768")) CODE_REVIEW_HERMES_MAX_FILES = int(os.getenv("CODE_REVIEW_HERMES_MAX_FILES", "3")) CODE_REVIEW_HERMES_MAX_CHARS = int(os.getenv("CODE_REVIEW_HERMES_MAX_CHARS", "2500")) INTERNAL_TOKEN = os.getenv("INTERNAL_WEBHOOK_TOKEN", "") @@ -248,44 +266,94 @@ class CodeReviewPipeline: 只輸出 JSON 陣列,不含其他文字。無問題時輸出 []""" - from services.ollama_service import OllamaService, get_host_label, get_provider_tag + from services.ollama_service import ( + OLLAMA_HOST_FALLBACK, + OLLAMA_HOST_PRIMARY, + OLLAMA_HOST_SECONDARY, + OllamaService, + get_host_label, + get_provider_tag, + ) - with log_ai_call( - caller='code_review_hermes', - provider='gcp_ollama', - model=_HERMES_MODEL, - request_id=f"cr-{self.commit_sha[:8]}", - meta={'commit': self.commit_sha[:8], 'branch': self.branch, - 'files': len(files), 'route': 'ollama_first', - 'max_files': CODE_REVIEW_HERMES_MAX_FILES, - 'max_chars': CODE_REVIEW_HERMES_MAX_CHARS}, - ) as _ctx: - ollama = OllamaService(model=_HERMES_MODEL) - resp = ollama.generate( - prompt=prompt, - model=_HERMES_MODEL, - temperature=0.1, - timeout=CODE_REVIEW_HERMES_TIMEOUT, - ) - actual_provider = get_provider_tag(resp.host or '') - _ctx.set_provider(actual_provider) - _ctx.set_tokens( - input=resp.input_tokens, - output=resp.output_tokens, - ) - _ctx.add_meta('host', resp.host) - _ctx.add_meta('host_label', get_host_label(resp.host or '')) - if not resp.success: - _ctx.set_error(resp.error or 'ollama generate failed') - logger.warning("[CodeReview] Hermes Ollama 掃描失敗: %s", resp.error) - return [] - raw = (resp.content or "").strip() + hermes_attempts = [ + ( + "primary_code_scan", + OLLAMA_HOST_PRIMARY, + CODE_REVIEW_HERMES_PRIMARY_MODEL, + CODE_REVIEW_HERMES_PRIMARY_TIMEOUT, + ), + ( + "secondary_fast_scan", + OLLAMA_HOST_SECONDARY, + CODE_REVIEW_HERMES_SECONDARY_MODEL, + CODE_REVIEW_HERMES_SECONDARY_TIMEOUT, + ), + ( + "lan_111_hermes_scan", + OLLAMA_HOST_FALLBACK, + CODE_REVIEW_HERMES_FALLBACK_MODEL, + CODE_REVIEW_HERMES_FALLBACK_TIMEOUT, + ), + ] + findings = None + last_error = None - match = re.search(r"\[.*\]", raw, re.DOTALL) - if not match: - logger.warning("[CodeReview] Hermes 回應無 JSON: %s", raw[:200]) + for attempt_index, (attempt_key, host, model_name, timeout_s) in enumerate( + hermes_attempts, + start=1, + ): + with log_ai_call( + caller='code_review_hermes', + provider=get_provider_tag(host), + model=model_name, + request_id=f"cr-{self.commit_sha[:8]}", + meta={'commit': self.commit_sha[:8], 'branch': self.branch, + 'files': len(files), 'route': 'ollama_first', + 'attempt': attempt_index, + 'attempt_key': attempt_key, + 'max_files': CODE_REVIEW_HERMES_MAX_FILES, + 'max_chars': CODE_REVIEW_HERMES_MAX_CHARS, + 'timeout_s': timeout_s}, + ) as _ctx: + ollama = OllamaService(host=host, model=model_name) + resp = ollama.generate( + prompt=prompt, + model=model_name, + temperature=0.1, + timeout=timeout_s, + keep_alive=CODE_REVIEW_OLLAMA_KEEP_ALIVE, + options={"num_predict": CODE_REVIEW_HERMES_NUM_PREDICT}, + ) + actual_host = resp.host or host + _ctx.set_provider(get_provider_tag(actual_host)) + _ctx.set_tokens( + input=resp.input_tokens, + output=resp.output_tokens, + ) + _ctx.add_meta('host', actual_host) + _ctx.add_meta('host_label', get_host_label(actual_host)) + if not resp.success: + last_error = resp.error or 'ollama generate failed' + _ctx.set_error(last_error) + continue + raw = (resp.content or "").strip() + match = re.search(r"\[.*\]", raw, re.DOTALL) + if not match: + last_error = f"missing JSON array: {raw[:120]}" + _ctx.set_error(last_error) + logger.warning("[CodeReview] Hermes 回應無 JSON: %s", raw[:200]) + continue + try: + findings = json.loads(match.group()) + except Exception as exc: + last_error = f"json parse failed: {type(exc).__name__}: {exc}" + _ctx.set_error(last_error) + continue + break + + if findings is None: + logger.warning("[CodeReview] Hermes 本地掃描全部失敗: %s", last_error) return [] - findings = json.loads(match.group()) for f in findings: sev = f.get("severity", "LOW").lower() diff --git a/tests/test_code_review_claude_routing.py b/tests/test_code_review_claude_routing.py index da0dcda..cc15751 100644 --- a/tests/test_code_review_claude_routing.py +++ b/tests/test_code_review_claude_routing.py @@ -202,6 +202,13 @@ def test_code_review_ollama_defaults_use_fast_local_model(monkeypatch): "CODE_REVIEW_OLLAMA_NUM_PREDICT", "CODE_REVIEW_OLLAMA_KEEP_ALIVE", "CODE_REVIEW_HERMES_TIMEOUT", + "CODE_REVIEW_HERMES_PRIMARY_MODEL", + "CODE_REVIEW_HERMES_PRIMARY_TIMEOUT", + "CODE_REVIEW_HERMES_SECONDARY_MODEL", + "CODE_REVIEW_HERMES_SECONDARY_TIMEOUT", + "CODE_REVIEW_HERMES_FALLBACK_MODEL", + "CODE_REVIEW_HERMES_FALLBACK_TIMEOUT", + "CODE_REVIEW_HERMES_NUM_PREDICT", "CODE_REVIEW_HERMES_MAX_FILES", "CODE_REVIEW_HERMES_MAX_CHARS", ): @@ -218,6 +225,13 @@ def test_code_review_ollama_defaults_use_fast_local_model(monkeypatch): assert svc_mod.CODE_REVIEW_OLLAMA_NUM_PREDICT == 384 assert svc_mod.CODE_REVIEW_OLLAMA_KEEP_ALIVE == "24h" assert svc_mod.CODE_REVIEW_HERMES_TIMEOUT == 35 + assert svc_mod.CODE_REVIEW_HERMES_PRIMARY_MODEL == "qwen2.5-coder:7b" + assert svc_mod.CODE_REVIEW_HERMES_PRIMARY_TIMEOUT == 35 + assert svc_mod.CODE_REVIEW_HERMES_SECONDARY_MODEL == "gemma3:4b" + assert svc_mod.CODE_REVIEW_HERMES_SECONDARY_TIMEOUT == 25 + assert svc_mod.CODE_REVIEW_HERMES_FALLBACK_MODEL == "hermes3:latest" + assert svc_mod.CODE_REVIEW_HERMES_FALLBACK_TIMEOUT == 20 + assert svc_mod.CODE_REVIEW_HERMES_NUM_PREDICT == 768 assert svc_mod.CODE_REVIEW_HERMES_MAX_FILES == 3 assert svc_mod.CODE_REVIEW_HERMES_MAX_CHARS == 2500 @@ -307,7 +321,9 @@ def test_hermes_scan_uses_compact_prompt_and_short_timeout(monkeypatch): assert result == [] kwargs = fake_ollama.generate.call_args.kwargs + assert kwargs["model"] == "qwen2.5-coder:7b" assert kwargs["timeout"] == 7 + assert kwargs["options"] == {"num_predict": 768} prompt = kwargs["prompt"] assert "services/a.py" in prompt assert "services/b.py" in prompt