From 65d38d46325a28bd611552a53e9b93c51cf865a9 Mon Sep 17 00:00:00 2001 From: OoO Date: Sun, 24 May 2026 13:32:26 +0800 Subject: [PATCH] V10.407 shorten ai runner residency --- .env.example | 3 +++ config.py | 2 +- docs/AI_INTELLIGENCE_MODULE_SOT.md | 4 ++-- docs/memory/history_logs.md | 2 ++ services/hermes_analyst_service.py | 3 ++- services/marketplace_product_matcher.py | 8 ++++++++ services/openclaw_strategist_service.py | 3 ++- tests/test_hermes_ollama_cascade.py | 4 ++++ tests/test_marketplace_product_matcher.py | 23 +++++++++++++++++++++++ tests/test_qwen3_runtime_usage.py | 3 +++ 10 files changed, 50 insertions(+), 5 deletions(-) diff --git a/.env.example b/.env.example index 9463535..3ee9f96 100644 --- a/.env.example +++ b/.env.example @@ -133,6 +133,8 @@ HERMES_URL= # [預設 120] Hermes 推理 timeout(秒);批量 300 筆預估 ~90s HERMES_TIMEOUT=120 +# [預設 5m] Hermes runner 熱駐留;禁止回到 24h,避免 GCP-B/111 長駐高負載 +HERMES_KEEP_ALIVE=5m # [選填] Embedding 服務主機;留空時自動走同一條 Ollama 三主機級聯 EMBEDDING_HOST= @@ -385,6 +387,7 @@ NEMOTRON_OLLAMA_TIMEOUT=180 OPENCLAW_STRATEGY_OLLAMA_MODEL=qwen3:14b OPENCLAW_STRATEGY_OLLAMA_TIMEOUT=90 OPENCLAW_STRATEGY_OLLAMA_NUM_PREDICT=2048 +OPENCLAW_STRATEGY_OLLAMA_KEEP_ALIVE=5m # [預設 OFF] MCP Router;需先部署 docker-compose.mcp.yml 並完成健康檢查再開 MCP_ROUTER_ENABLED=false diff --git a/config.py b/config.py index e55bc4e..8993933 100644 --- a/config.py +++ b/config.py @@ -325,7 +325,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.406" +SYSTEM_VERSION = "V10.408" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index 109c2fa..06fe3dd 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -22,13 +22,13 @@ - Code Review OpenClaw assessment 保持主機順序 GCP-A → GCP-B → 111,但可使用主機適配本地模型:GCP-A `qwen2.5-coder:7b`、GCP-B `gemma3:4b`;落到 111 時由 `OllamaService` 降級到 `llama3.2:latest`。Code Review 的 Ollama `keep_alive` 預設為 `5m`,不得再用 `24h` 長駐 runner 壓住 GCP-B/111。三段本地 Ollama 全失敗後才允許雲端備援。 - OpenClaw Telegram Q&A 主路徑也不得綁單一 host:`_call_qwen3_qa()` 必須透過 `OllamaService` 跑 GCP-A → GCP-B → 111,並把實際落點寫入 `ai_calls.provider`。 - OpenClaw Telegram 圖片商品辨識也必須 Ollama-first:`_identify_product_name_with_ollama_vision()` 透過 `OllamaService` 嘗試 GCP-A → GCP-B → 111;Gemini 只允許以 `openclaw_bot_image_gemini` caller 作為失敗後備援。 -- OpenClaw 週報、月報、Meta analysis、日報洞察、Telegram PPT 分析與 MCP fallback 也必須 Ollama-first;Gemini caller 只能帶 `_gemini_fallback` 或明確 fallback caller 語意,且不得先於 Ollama/NIM 被呼叫。 +- OpenClaw 週報、月報、Meta analysis、日報洞察、Telegram PPT 分析與 MCP fallback 也必須 Ollama-first;Gemini caller 只能帶 `_gemini_fallback` 或明確 fallback caller 語意,且不得先於 Ollama/NIM 被呼叫。OpenClaw strategy 的 Ollama `keep_alive` 預設為 `5m`,避免報告型任務把 GCP-B/111 runner 長駐 24h。 - OpenClaw 週報、月報、Meta analysis、日報洞察與每日報告的 Gemini/NIM 備援 caller 必須登錄在 caller registry、AI 觀測台 agent group 與 Telegram 狀態統計,避免 fallback 用量被歸類為未知或漏算。 - Gemini API 出站有第二道 kill switch:`GEMINI_FALLBACK_ENABLED` 預設為 `false`。即使 `GEMINI_API_KEY` 存在,通用 AI fallback、OpenClaw 報告/QA/PPT/圖片、MCP Grounding 與 Code Review L3 都不得呼叫 Gemini;只有操作員明確設為 `true` 時,Gemini 才能作緊急備援。 - `docker-compose.yml` 的 `momo-app`、`scheduler`、`telegram-bot` 必須明確設定 `GEMINI_API_HARD_DISABLED=${GEMINI_API_HARD_DISABLED:-true}` 與 `GEMINI_FALLBACK_ENABLED=${GEMINI_FALLBACK_ENABLED:-false}`;`.env` 可保留 `GEMINI_API_KEY`,但不得因 key 存在就讓核心容器產生 Gemini 付費出站。 - Gemini 不可被任何狀態面板或 router 推薦為主提供者:`AIProviderService._get_recommended_provider()` 不得回傳 `gemini`,只能顯示為 fallback 狀態;`llm_model_router` 的 `ea_engine` 若收到 `gemini-*` default 必須改回 `hermes3:latest`,需要深推理時才升本地 `deepseek-r1:14b`。 - ElephantAlpha prompt / agent registry 不得再把 OpenClaw 描述為 Gemini 主模型;OpenClaw 是 `qwen2.5-coder:7b` / `qwen3:14b` Ollama-first 策略師,Gemini 僅能在 guard 顯式解鎖後作 emergency fallback。 -- 111 `192.168.0.111` 只是最後一道 Mac fallback,不承接 7B+、vision、long-context 模型長駐;`OllamaService.generate()` 落到 111 時會將 `qwen3`、`deepseek-r1`、`hermes3`、`qwen2.5*`、`gemma3`、`llava`、`minicpm-v` 與 7B+ 模型依 `OLLAMA_111_MODEL_DOWNGRADE_PATTERNS` 降級到 `OLLAMA_111_MODEL_FALLBACK=llama3.2:latest`,並以 `OLLAMA_111_KEEP_ALIVE=5m`、`OLLAMA_111_MAX_TIMEOUT=20`、`OLLAMA_111_NUM_CTX=4096`、`OLLAMA_111_NUM_PREDICT=512` 封頂,避免 16GB RAM 主機被大 context runner、長輸出與 24h keep-alive 壓到 swap。 +- 111 `192.168.0.111` 只是最後一道 Mac fallback,不承接 7B+、vision、long-context 模型長駐;`OllamaService.generate()` 落到 111 時會將 `qwen3`、`deepseek-r1`、`hermes3`、`qwen2.5*`、`gemma3`、`llava`、`minicpm-v` 與 7B+ 模型依 `OLLAMA_111_MODEL_DOWNGRADE_PATTERNS` 降級到 `OLLAMA_111_MODEL_FALLBACK=llama3.2:latest`,並以 `OLLAMA_111_KEEP_ALIVE=5m`、`OLLAMA_111_MAX_TIMEOUT=20`、`OLLAMA_111_NUM_CTX=4096`、`OLLAMA_111_NUM_PREDICT=512` 封頂。Hermes / OpenClaw / Code Review 路徑的業務 keep-alive 也預設 `5m`,避免 16GB RAM 主機與 GCP-B 被長駐 runner、長輸出與 24h keep-alive 壓到高 load。 - ElephantAlpha 的 `price_drop_alert` / `market_opportunity` Telegram HITL 告警必須把同款證據獨立呈現,至少包含 `match_type`、`price_basis`、`alert_tier` 與 `match_score`;沒有高信心同款與總價可比證據時,不得把 PChome/MOMO 價差寫成可直接跟價建議。 ## 一、四 AI Agent 路由架構 diff --git a/docs/memory/history_logs.md b/docs/memory/history_logs.md index 33a5da5..e293746 100644 --- a/docs/memory/history_logs.md +++ b/docs/memory/history_logs.md @@ -13,6 +13,8 @@ ## 📅 詳細更新日誌 (考古存檔) ### 2026-05-24:PChome 近門檻身份回收第二輪 +- **V10.408 OPI 指甲油 catalog review-only 回收**: marketplace matcher 針對 OPI 類光繚指甲油加入同系列 catalog focused identity,只在「白日夢遊」或「驕傲果凍」系列名雙方一致時進 `identity_review`;12色/11色視為可選色號數差異,不當作販售件數,跨系列仍維持 suppress。下一輪回刷會搭配 KATE 怪獸級持色唇膏限量款既有規則候選,延續不放寬全域門檻的近門檻回收策略。 +- **V10.407 Hermes/OpenClaw runner 熱駐留收斂**: V10.406 後續觀測顯示 GCP-B 仍會被 Hermes/OpenClaw 的 `24h` keep-alive runner 壓高 load,導致 GCP-B generate timeout 後轉落 111。`HERMES_KEEP_ALIVE` 與 `OPENCLAW_STRATEGY_OLLAMA_KEEP_ALIVE` 改為 env 可控且預設 `5m`,並補測試禁止 OpenClaw strategy 再硬寫 `keep_alive="24h"`;GCP-A 目前從 188 仍逾時,需另行修復主機/防火牆。 - **V10.406 Code Review Ollama keep-alive 收斂**: production audit 顯示 Gemini 24h 已為 0,但 GCP-A `34.143.170.20:11434` 從 188 逾時、GCP-B 曾因多個 Ollama runner 長駐造成 generate timeout,導致部署後 Code Review 轉落 111。`CODE_REVIEW_OLLAMA_KEEP_ALIVE` 預設由 `24h` 改為 `5m`,讓 GCP-B/111 的 code review runner 不再長時間常駐;實測已重啟 GCP-B Ollama 並確認 `gemma3:4b` 可於約 6.2s 完成短生成。 - **V10.405 MCP manual fetch handoff gate**: 新增 `mcp_manual_fetch_handoff` read-only builder、GET/POST endpoint、UI handoff package 審核面板與 deployment readiness smoke target,讓 runtime promotion package 搭配操作員公開頁面、節流、無登入/反爬、無 DB、無 scheduler 確認後,只放行到人工 fetch gate operator review;API/UI 不保存 payload、不打 health、不開 DB、不抓外站、不掛 scheduler,也不會自動打開 manual fetch。 - **V10.404 Hermes 競價威脅漏斗只吃 direct alert**: `HermesAnalystService.fetch_candidates()` 的 `competitor_prices` JOIN 新增硬條件,只讓 `match_type=exact`、`price_basis=total_price`、`alert_tier=price_alert_exact` 的 identity_v2 配對進入 Hermes 競價威脅分析;`identity_review`、`unit_price_review` 與 `suppress` 仍保留在 dashboard / 人工覆核資料流,但不再消耗 Hermes token 或被上游視為直接價格威脅候選。production fresh 配對分布顯示直接告警約 497 筆、覆核型約 216 筆,本版將兩者在 AI 威脅入口切開。同版 matcher 追加 KATE 怪獸級持色唇膏、植村秀武士刀眉筆筆蕊、The Forest 焦糖楓葉擴香禮盒等近門檻 review-only 回收線,仍只進人工覆核,不直接價格告警。 diff --git a/services/hermes_analyst_service.py b/services/hermes_analyst_service.py index e3af272..7f9bcd8 100644 --- a/services/hermes_analyst_service.py +++ b/services/hermes_analyst_service.py @@ -15,6 +15,7 @@ Hermes 3 競價情報分析服務 (Module 2) import asyncio import json import logging +import os import re import time import uuid @@ -33,7 +34,7 @@ logger = logging.getLogger(__name__) from config import HERMES_TIMEOUT HERMES_MODEL = "hermes3:latest" -HERMES_KEEP_ALIVE = "24h" # ADR-012:保持模型熱駐留,避免被別模型擠下後冷啟動 30+s timeout +HERMES_KEEP_ALIVE = os.getenv("HERMES_KEEP_ALIVE", "5m") TOP_N = 20 # 輸出前 N 個威脅,控制 NemoTron 每次消耗配額 diff --git a/services/marketplace_product_matcher.py b/services/marketplace_product_matcher.py index e356654..b0480d0 100644 --- a/services/marketplace_product_matcher.py +++ b/services/marketplace_product_matcher.py @@ -470,6 +470,7 @@ FOCUSED_IDENTITY_REVIEW_ONLY_REASONS = { "im_meme_fixx_cool_setting_spray", "so_natural_fixx_setting_spray_catalog", "kate_monster_lipstick_catalog", + "opi_gel_polish_series_catalog", "shu_auto_hard_formula_refill_catalog", "the_forest_maple_diffuser_flower_brandless", } @@ -2951,6 +2952,13 @@ def _has_focused_low_score_exact_identity_line(left: ProductIdentity, right: Pro and "怪獸級持色唇膏" in right_text ): return "kate_monster_lipstick_catalog" + if ( + "opi" in (left.brand_tokens & right.brand_tokens) + and "類光繚指甲油" in left_text + and "類光繚指甲油" in right_text + and any(series in left_text and series in right_text for series in ("白日夢遊", "驕傲果凍")) + ): + return "opi_gel_polish_series_catalog" if ( {"shu uemura", "植村秀"} & (left.brand_tokens & right.brand_tokens) and "自動武士刀眉筆" in left_text diff --git a/services/openclaw_strategist_service.py b/services/openclaw_strategist_service.py index 35a93c1..ea79f04 100644 --- a/services/openclaw_strategist_service.py +++ b/services/openclaw_strategist_service.py @@ -81,6 +81,7 @@ OPENCLAW_STRATEGY_OLLAMA_MODEL = os.getenv( ) OPENCLAW_STRATEGY_OLLAMA_TIMEOUT = int(os.getenv('OPENCLAW_STRATEGY_OLLAMA_TIMEOUT', '120')) OPENCLAW_STRATEGY_OLLAMA_NUM_PREDICT = int(os.getenv('OPENCLAW_STRATEGY_OLLAMA_NUM_PREDICT', '2048')) +OPENCLAW_STRATEGY_OLLAMA_KEEP_ALIVE = os.getenv('OPENCLAW_STRATEGY_OLLAMA_KEEP_ALIVE', '5m') # 繁體中文強制 system prompt(A2 黃燈警訊「Qwen 繁中短板」緩解策略) QWEN3_TC_SYSTEM_PROMPT = """你是 momo 電商情報分析師「OpenClaw」。 @@ -1106,7 +1107,7 @@ def _call_ollama_strategy( system_prompt=system_prompt, temperature=temperature, timeout=timeout_s, - keep_alive="24h", + keep_alive=OPENCLAW_STRATEGY_OLLAMA_KEEP_ALIVE, options={"num_predict": predict}, ) ctx.set_provider(get_provider_tag(resp.host or "")) diff --git a/tests/test_hermes_ollama_cascade.py b/tests/test_hermes_ollama_cascade.py index de3b50a..f339ba2 100644 --- a/tests/test_hermes_ollama_cascade.py +++ b/tests/test_hermes_ollama_cascade.py @@ -154,3 +154,7 @@ def test_hermes_candidate_sql_only_joins_direct_price_alert_matches(): assert "match_type_exact" in sql_text assert "price_basis_total_price" in sql_text assert "alert_tier_price_alert_exact" in sql_text + + +def test_hermes_keep_alive_defaults_to_short_runner_residency(): + assert hermes_mod.HERMES_KEEP_ALIVE == "5m" diff --git a/tests/test_marketplace_product_matcher.py b/tests/test_marketplace_product_matcher.py index 4e064ce..2e8b9fe 100644 --- a/tests/test_marketplace_product_matcher.py +++ b/tests/test_marketplace_product_matcher.py @@ -887,6 +887,14 @@ def test_marketplace_matcher_keeps_catalog_variant_recoveries_in_identity_review "【The Forest 癒森林】焦糖楓葉香氛擴香花禮盒 含30ml品牌香氛油(居家香氛/香水精油/香氛擴香花/擴香禮物)", "焦糖楓葉香氛擴香花禮盒 含30ml品牌香氛油", ), + ( + "【O.P.I】類光繚指甲油 12色任選1瓶(小銀蓋/如膠似漆白日夢遊系列指彩/官方直營)", + "OPI 如膠似漆白日夢遊系列 類光繚指甲油12色任選", + ), + ( + "【O.P.I】類光繚指甲油 12色任選1瓶(小銀蓋/如膠似漆驕傲果凍系列指彩/官方直營)", + "OPI 如膠似漆驕傲果凍系列 類光繚指甲油11色任選", + ), ] for momo_name, competitor_name in cases: @@ -1199,6 +1207,16 @@ def test_marketplace_matcher_promotes_focused_low_score_exact_identity_lines(): "焦糖楓葉香氛擴香花禮盒 含30ml品牌香氛油", "focused_exact_identity_the_forest_maple_diffuser_flower_brandless", ), + ( + "【O.P.I】類光繚指甲油 12色任選1瓶(小銀蓋/如膠似漆白日夢遊系列指彩/官方直營)", + "OPI 如膠似漆白日夢遊系列 類光繚指甲油12色任選", + "focused_exact_identity_opi_gel_polish_series_catalog", + ), + ( + "【O.P.I】類光繚指甲油 12色任選1瓶(小銀蓋/如膠似漆驕傲果凍系列指彩/官方直營)", + "OPI 如膠似漆驕傲果凍系列 類光繚指甲油11色任選", + "focused_exact_identity_opi_gel_polish_series_catalog", + ), ( "【GATSBY】爆水擦澡濕巾24張入(涼感乾洗澡)", "GATSBY 爆水擦澡濕巾24張入(240g)", @@ -1252,6 +1270,10 @@ def test_marketplace_matcher_keeps_high_variant_low_score_lines_outside_focused_ "【YSL】官方直營 恆久完美持久柔霧蜜粉餅(任選1款/皮革蜜粉)", "YSL 恆久完美持久柔霧蜜粉餅(7.5g) [百貨公司專櫃貨]", ) + opi_series_gap = score_marketplace_match( + "【O.P.I】類光繚指甲油 12色任選1瓶(小銀蓋/如膠似漆白日夢遊系列指彩/官方直營)", + "OPI 如膠似漆驕傲果凍系列 類光繚指甲油11色任選", + ) for diagnostics in ( lush, @@ -1263,6 +1285,7 @@ def test_marketplace_matcher_keeps_high_variant_low_score_lines_outside_focused_ lancome_line_gap, mac_finish_gap, ysl_powder_variant_guard, + opi_series_gap, ): assert diagnostics.score < 0.76 assert not any(reason.startswith("focused_exact_identity_") for reason in diagnostics.reasons) diff --git a/tests/test_qwen3_runtime_usage.py b/tests/test_qwen3_runtime_usage.py index 3cecbae..77c9e5e 100644 --- a/tests/test_qwen3_runtime_usage.py +++ b/tests/test_qwen3_runtime_usage.py @@ -13,6 +13,9 @@ def test_qwen3_is_active_runtime_model_not_unused_ollama_weight(): assert "def _call_qwen3_qa(" in openclaw_source assert "OllamaService(model=OPENCLAW_QA_OLLAMA_MODEL)" in openclaw_source assert "OPENCLAW_QA_OLLAMA_HOST" not in openclaw_source + assert "OPENCLAW_STRATEGY_OLLAMA_KEEP_ALIVE = os.getenv('OPENCLAW_STRATEGY_OLLAMA_KEEP_ALIVE', '5m')" in openclaw_source + assert "keep_alive=OPENCLAW_STRATEGY_OLLAMA_KEEP_ALIVE" in openclaw_source + assert 'keep_alive="24h"' not in openclaw_source assert 'NEMOTRON_OLLAMA_MODEL = os.getenv("NEMOTRON_OLLAMA_MODEL", "qwen3:14b")' in nemotron_source assert "def _call_qwen3_dispatch(" in nemotron_source assert "for _attempt in range(3):" in nemotron_source