From 6788379d10a7b6d4bfbf991db278f50f1671151c Mon Sep 17 00:00:00 2001 From: OoO Date: Tue, 19 May 2026 13:04:01 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8A=91=E5=88=B6=20EA=20=E7=84=A1=E5=AF=A6?= =?UTF-8?q?=E8=AD=89=E7=A9=BA=E5=91=8A=E8=AD=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TODO_NEXT_STEPS.txt | 1 + config.py | 2 +- docs/AI_INTELLIGENCE_MODULE_SOT.md | 5 +- ...R-021-ea-hitl-prefetch-and-alert-impact.md | 10 +- services/elephant_alpha_autonomous_engine.py | 109 ++++++++++++++---- tests/test_elephant_alpha_engine.py | 45 ++++++++ 6 files changed, 144 insertions(+), 28 deletions(-) diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index caf4991..55c9478 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.253 修正 Elephant Alpha L3 HITL 空告警:價格類與資源調配低信心事件若沒有 Hermes/實證資料,只記 suppressed telemetry 與 cooldown,不寫 pending human_review、不發 Telegram;`resource_optimization` 會保留 queue/load 原始指標供追查。 - V10.251 修正 OpenClaw Q&A 備援遙測:Ollama 主路徑仍為 GCP-A → GCP-B → 111,Gemini 只記為 `openclaw_qa_gemini_fallback`,NIM 只記為 `openclaw_qa_nim`;AI Calls 會把 legacy `openclaw_qa + gemini` 標成 Gemini 備援,避免再次誤判 Gemini-first。 - V10.251 穩定 `/growth_analysis` 正式站速度:成長分析快取從單 worker memory 擴充為 `data/growth_analysis_cache.pkl` 跨 worker 共享快取,避免 Gunicorn 冷 worker 偶發掃明細表造成 5 秒級 TTFB;補 `tests/test_cache_manager.py` 覆蓋 shared file roundtrip 與清除行為。 - V10.249 收斂 `/observability/ppt_audit_history` 手機與平板第一屏密度:將 4 個產線訊號從 hero 內移出成獨立狀態列,手機版維持 2 欄狀態卡並降低 hero 卡片間距;本機 10 個 AI 觀測台頁面 rendered visual contract 全數通過,PPT 頁 hero 高度 desktop/tablet/mobile 為 214/361/398px。 diff --git a/config.py b/config.py index b5e1550..f5d61d3 100644 --- a/config.py +++ b/config.py @@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.252" +SYSTEM_VERSION = "V10.253" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/docs/AI_INTELLIGENCE_MODULE_SOT.md b/docs/AI_INTELLIGENCE_MODULE_SOT.md index 68bb15d..f467bd2 100644 --- a/docs/AI_INTELLIGENCE_MODULE_SOT.md +++ b/docs/AI_INTELLIGENCE_MODULE_SOT.md @@ -1,8 +1,8 @@ # MOMO PRO — AI 競價情報模組 Single Source of Truth -> **最後更新**: 2026-05-18 (台北時間) +> **最後更新**: 2026-05-19 (台北時間) > **狀態**: 🟢 四 AI Agent 自動化閉環已落地;LLM 路由紅線升級為 Ollama-first 三主機級聯,Gemini 僅備援 / 鎖定場景 -> **適用版本**: V10.179 +> **適用版本**: V10.253 --- @@ -105,6 +105,7 @@ SQL漏斗(~300筆) - Gunicorn runtime 預設 `worker_class = gthread`、`GUNICORN_THREADS=4`、`preload_app = False`;此組合讓 HUP 熱重載可用,也避免 Dashboard 長查詢完全阻塞 `/health`。 - CD rebuild 模式必須先 build image 成功,再短暫 stop/rm/recreate 三應用容器,避免 no-cache build 造成長時間 502。 - ElephantAlpha 使用 NVIDIA NIM hosted API;production 預設模型為 `nvidia/llama-3.3-nemotron-super-49b-v1.5`,`ELEPHANT_ALPHA_FALLBACK_MODELS` 需保留至少一個可呼叫備援;403/404、408/409/425/429、5xx、timeout 與 connection error 必須嘗試下一個模型。 +- ElephantAlpha L3 HITL 只允許發送有實證、可審核、可行動的升級告警;價格類 trigger 無 Hermes 具體威脅、或 `resource_optimization` 低信心且沒有具體行動時,只記錄 suppressed escalation telemetry 與 cooldown,不寫 pending `human_review`,不發 Telegram 空告警。 - OpenClaw/Hermes embedding 優先呼叫 Ollama `/api/embed`,只在舊節點不支援時 fallback `/api/embeddings`;timeout 由 `EMBEDDING_TIMEOUT` / `OLLAMA_EMBED_TIMEOUT` 控制。 - PPT 自動產線由 `momo-scheduler` 依節奏執行 `run_ppt_auto_generation_task(schedule_kind)`:每日 20:30 產日報、週一 20:40 產週報/市場情報、每月 1 日 20:50 產月報與管理型簡報、季初 21:00 產季報、半年初 21:10 產半年報、年初 21:20 產年報,再交給 22:00 `ppt_vision_audit` 做視覺審核;每次嘗試會寫入 `ppt_generation_runs`,`/observability/ppt_audit_history` 以精準參數檢查目標版本是否已產生,並可用 `/observability/ppt_audit/generate_missing` 手動補齊缺漏,總開關為 `PPT_AUTO_GENERATION_ENABLED`。PPT vision 需 `PPT_VISION_ENABLED=true` 與容器內 LibreOffice;`/observability/ppt_audit_file/` 會把 PPTX 轉成 PDF 快取供站內線上預覽,原始 PPTX 仍保留下載。 diff --git a/docs/adr/ADR-021-ea-hitl-prefetch-and-alert-impact.md b/docs/adr/ADR-021-ea-hitl-prefetch-and-alert-impact.md index 1d2f902..f23fae3 100644 --- a/docs/adr/ADR-021-ea-hitl-prefetch-and-alert-impact.md +++ b/docs/adr/ADR-021-ea-hitl-prefetch-and-alert-impact.md @@ -45,9 +45,13 @@ 蓋掉原本的 plan 元流程文字。**強制配套限制**: - `asyncio.wait_for(timeout=5)` 短超時:Hermes 熱駐留 < 10s,但冷啟動會拖到 30s+,HITL 訊息延遲不可大於 10s -- Pre-fetch 失敗(timeout / 0 threats / 全部缺金額)→ fallback 回原 plan 文字,**不中斷 escalation 主流程** +- Pre-fetch 失敗(timeout / 0 threats / 全部缺金額)→ **不送 Telegram、不寫 pending human_review**,只記錄 suppressed escalation telemetry 與 cooldown,避免把無實證 plan 當成可審核告警 - 「全部行皆缺金額」也視同無料 fallback,避免「乾巴巴兩行 MOMO/PChome 比價」比 plan 文字更空泛 +### 規則 1.1 — 非價格類低信心 escalation 必須有可審核內容 + +2026-05-19 補充:`resource_optimization` 不是 SKU/價格事件,不能套用 Hermes/SKU fallback 模板。當 EA 對 `resource_optimization` 只產出低信心決策且沒有具體可審核行動時,系統只記錄 telemetry 與 cooldown,不發 Telegram,也不建立 `ai_insights.status='pending'` 的人工審核。這避免「資源調配優化」告警顯示「Hermes 即時威脅清單不可用」這類錯誤診斷。 + ### 規則 2 — NemoTron 告警必填金額影響量化 新增模組級 helper `_compute_business_impact(threat) -> {revenue_loss_7d, recommended_price}`: @@ -96,6 +100,7 @@ ### 正面 - EA 升級審核 Telegram 內容從元流程描述變為「具體 SKU + 價格 + 金額流失 + 建議調價」,HITL 真正可決策 +- 無實證的價格類與資源調配低信心 escalation 不再打擾人工,避免 Telegram 出現不可批准、不可駁回、不可操作的空告警 - NemoTron 既有告警再升級,每筆都帶可批准/駁回的金額判斷依據 - `momo:eig:` 按鈕首次有對應 handler,HITL 流程閉環完整 - pre-fetch 改用 5s 短超時 + fallback,最壞情況退回原 plan 文字,不破壞既有行為 @@ -103,7 +108,7 @@ ### 負面 / 風險 - 每次價格類 escalation 多花 ≤ 5s(Hermes 熱駐留實測 < 10s 但有 timeout),整體告警延遲略增 -- Hermes 在 5s 內若沒回應,告警內容降級回 plan 文字(仍維持原行為,無新增風險) +- Hermes 在 5s 內若沒回應,價格類 escalation 會被壓制並記錄 telemetry;若後續需要追查,需從 `ai_calls.meta.suppressed_escalation` 與 scheduler log 觀察 - `gap_pct ≤ 0` 案例的銷量下滑(非價格因素)將完全不顯示流失金額——若統帥需追蹤「非價格流失」需另開告警類型(待後續 ADR) ### 監控指標 @@ -118,6 +123,7 @@ - [x] `services/nemoton_dispatcher_service.py` `_compute_business_impact` helper + 三條 dispatch 路徑注入 - [x] `services/elephant_alpha_autonomous_engine.py` `_fetch_hermes_threats_summary` + 5s timeout + fallback - [x] `services/telegram_bot_service.py` `_handle_event_ignore_callback` + HTML escape + 空 id 拒絕 +- [x] 2026-05-19:無實證價格類 / `resource_optimization` 低信心 escalation 改為 suppressed telemetry,不再送空泛 Telegram - [x] Critic 審查通過(Critical-1 / High-1 / High-2 / Medium-2 / Medium-3 全修) - [x] Smoke test:`_compute_business_impact` 對 gap≤0 / gap=0 / 銷量回升 / bogus type 四案例驗證 - [x] `docs/adr/README.md` 索引加 ADR-021 diff --git a/services/elephant_alpha_autonomous_engine.py b/services/elephant_alpha_autonomous_engine.py index 880752b..ff74e38 100644 --- a/services/elephant_alpha_autonomous_engine.py +++ b/services/elephant_alpha_autonomous_engine.py @@ -123,6 +123,11 @@ _PRICE_RELATED_TRIGGERS = frozenset({ "threat_escalation", }) +# 這些低信心觸發若沒有具體實證,不應升級打擾人工。 +_NO_CONCRETE_ESCALATION_SUPPRESSED_TRIGGERS = frozenset({ + "resource_optimization", +}) + def _zh_trigger(trigger_type: str) -> str: return _TRIGGER_ZH.get(trigger_type, trigger_type) @@ -415,8 +420,16 @@ class ElephantAlphaAutonomousEngine: session.close() async def _check_resource_optimization_trigger(self, trigger: AutonomousTrigger) -> bool: - return (self._get_action_queue_size() > 10 - or self._get_system_load_percentage() > 80) + queue_size = self._get_action_queue_size() + system_load_pct = self._get_system_load_percentage() + trigger.conditions = dict(trigger.conditions or {}) + trigger.conditions["_resource_metrics"] = { + "action_queue_size": queue_size, + "system_load_pct": system_load_pct, + "queue_threshold": 10, + "load_threshold_pct": 80, + } + return queue_size > 10 or system_load_pct > 80 async def _check_code_exception_trigger(self, trigger: AutonomousTrigger) -> bool: containers = trigger.conditions.get("scan_containers", ["momo-pro-system", "momo-scheduler"]) @@ -885,8 +898,73 @@ class ElephantAlphaAutonomousEngine: except Exception as e: self._log.error("Telegram audit failed (non-blocking): %s", e) + @staticmethod + def _get_prefetched_concrete_actions(trigger: AutonomousTrigger) -> Optional[List[str]]: + actions = (trigger.conditions or {}).get("_prefetched_hermes_threats") + if not isinstance(actions, list): + return None + cleaned = [str(action).strip() for action in actions if str(action).strip()] + return cleaned[:5] or None + + @staticmethod + def _should_suppress_no_concrete_escalation(trigger: AutonomousTrigger) -> bool: + return ( + trigger.trigger_type in _PRICE_RELATED_TRIGGERS + or trigger.trigger_type in _NO_CONCRETE_ESCALATION_SUPPRESSED_TRIGGERS + ) + + def _record_suppressed_escalation( + self, + decision: StrategicDecision, + trigger: AutonomousTrigger, + reason: str, + ) -> None: + self._log.warning( + "EA escalation suppressed: trigger=%s reason=%s confidence=%.2f conditions=%s", + trigger.trigger_type, + reason, + decision.confidence, + trigger.conditions, + ) + try: + from services.ai_call_logger import log_ai_call + + with log_ai_call( + caller="ea_engine", + provider="gcp_ollama", + model="hermes3:latest", + meta={ + "suppressed_escalation": True, + "trigger": trigger.trigger_type, + "reason": reason, + "confidence": decision.confidence, + "conditions": trigger.conditions, + }, + ) as ctx: + ctx.set_tokens(input=0, output=0) + ctx.status = "cache_only" + except Exception: + self._log.warning( + "EA suppressed escalation telemetry failed; trigger=%s", + trigger.trigger_type, + exc_info=True, + ) + async def _escalate_to_human(self, decision: StrategicDecision, trigger: AutonomousTrigger) -> None: self._log.warning("Escalating to human: %s", trigger.trigger_type) + concrete_actions = self._get_prefetched_concrete_actions(trigger) + if not concrete_actions and trigger.trigger_type in _PRICE_RELATED_TRIGGERS: + try: + concrete_actions = await self._fetch_hermes_threats_summary(top_n=5) + except Exception as e: + self._log.warning("Pre-fetch threats raised (non-blocking): %s", e) + concrete_actions = None + + if not concrete_actions and self._should_suppress_no_concrete_escalation(trigger): + self._store_escalation(trigger.trigger_type) + self._record_suppressed_escalation(decision, trigger, "no_concrete_evidence") + return + session = get_session() try: row = session.execute( @@ -934,17 +1012,6 @@ class ElephantAlphaAutonomousEngine: if not dedup_ts or (datetime.now().timestamp() - dedup_ts) / 60 >= cooldown_min: self._store_escalation(trigger.trigger_type) - # A' 軌:價格類觸發前 pre-fetch Hermes 具體威脅清單, - # 取代「步驟 1:[OpenClaw] 生成策略」這類元流程文字。 - # — Claude Opus 4.7 (2026-05-02) - concrete_actions: Optional[List[str]] = None - if trigger.trigger_type in _PRICE_RELATED_TRIGGERS: - try: - concrete_actions = await self._fetch_hermes_threats_summary(top_n=5) - except Exception as e: - self._log.warning("Pre-fetch threats raised (non-blocking): %s", e) - concrete_actions = None - # ─── Operation Ollama-First v5.0 修補:消除空泛幻覺訊息 ─── # 統帥反饋(2026-05-03):fallback 路徑帶 OpenClaw Gemini plan 文字 + # decision.reasoning 全是「312 SKU / 23% / 14 項任務」幻覺數字,無 DB 鉤住, @@ -964,27 +1031,23 @@ class ElephantAlphaAutonomousEngine: f"參與模組:{', '.join(_AGENT_LABEL.get(a.lower(), a) for a in decision.agents_required)}" ) else: - # 無實證數據路徑:極簡訊息,明確標註無數據 + # 非價格類且允許升級的低信心事件,不能套 Hermes/SKU 模板。 self._log.warning( "EA escalation 落入 no-concrete-data fallback (trigger=%s);" - "送極簡訊息避免 LLM 幻覺數字誤導統帥", + "送非價格類診斷訊息,避免 LLM 幻覺數字誤導統帥", trigger.trigger_type ) ai_actions_payload = [ - "⚠️ Hermes 即時威脅清單不可用(5s timeout 或無 SKU 命中)", - "📋 建議:手動下 SQL 查詢過去 24h competitor_price_history 確認狀況", - "🔧 或:SSH 188 跑 docker exec momo-pro-system python -c " - "'from services.hermes_analyst_service import HermesAnalystService;" - " print(HermesAnalystService().run().threats[:5])'", + f"檢查觸發條件:{json.dumps(trigger.conditions, ensure_ascii=False)[:300]}", + "不執行自動動作;請先在觀測台確認對應資料來源與最近錯誤紀錄。", ] ai_summary_text = ( - f"⚠️ 本訊息為**無實證**告警:Hermes pre-fetch 失敗," - f"以下原始決策內容含 LLM 自由發揮數字(非 DB 數據),請審慎參考。" + f"低信心且缺少可格式化的具體行動;已隱藏 LLM plan 文字,避免把推測當成事實。" ) ai_cause_text = ( f"觸發類型:{_zh_trigger(trigger.trigger_type)} | " f"信心度:{decision.confidence:.2f} | " - f"⚠️ 無 Hermes SKU 數據(不顯示 LLM 幻覺 plan 文字)" + f"缺少可直接審核的實證資料" ) try: diff --git a/tests/test_elephant_alpha_engine.py b/tests/test_elephant_alpha_engine.py index 1736251..cba70bc 100644 --- a/tests/test_elephant_alpha_engine.py +++ b/tests/test_elephant_alpha_engine.py @@ -114,3 +114,48 @@ def test_execute_autonomous_decision_logs_short_circuit_telemetry_failure(monkey asyncio.run(engine._execute_autonomous_decision(trigger)) assert "EA short-circuit telemetry failed" in caplog.text + + +def test_escalate_resource_optimization_without_evidence_is_suppressed(monkeypatch): + import services.elephant_alpha_autonomous_engine as engine_module + from services.elephant_alpha_autonomous_engine import ( + AutonomousTrigger, + ElephantAlphaAutonomousEngine, + ) + from services.elephant_alpha_orchestrator import StrategicDecision + + engine = ElephantAlphaAutonomousEngine() + suppressed = [] + cooldown = [] + + def _raise_if_db_opened(): + raise AssertionError("no-concrete resource escalation should not write human_review") + + monkeypatch.setattr(engine_module, "get_session", _raise_if_db_opened) + monkeypatch.setattr(engine, "_store_escalation", lambda trigger_type: cooldown.append(trigger_type)) + monkeypatch.setattr( + engine, + "_record_suppressed_escalation", + lambda decision, trigger, reason: suppressed.append((trigger.trigger_type, reason)), + ) + + decision = StrategicDecision( + priority="medium", + agents_required=["openclaw"], + reasoning="資源調配建議信心不足", + expected_outcome="待人工確認", + confidence=0.60, + execution_plan=[], + resource_requirements={}, + ) + trigger = AutonomousTrigger( + trigger_type="resource_optimization", + conditions={"_resource_metrics": {"action_queue_size": 14, "system_load_pct": 52.0}}, + threshold=0.6, + enabled=True, + ) + + asyncio.run(engine._escalate_to_human(decision, trigger)) + + assert cooldown == ["resource_optimization"] + assert suppressed == [("resource_optimization", "no_concrete_evidence")]