diff --git a/apps/api/src/services/telegram_gateway.py b/apps/api/src/services/telegram_gateway.py index afe2a34c..6f41054f 100644 --- a/apps/api/src/services/telegram_gateway.py +++ b/apps/api/src/services/telegram_gateway.py @@ -393,6 +393,28 @@ def normalize_alert_notification_payload(text: str, parse_mode: str) -> tuple[st return text, parse_mode +def normalize_telegram_send_message_payload(method: str, payload: dict) -> dict: + """所有 sendMessage 出口都必須套用告警脫敏與事件卡格式。""" + if method != "sendMessage": + return payload + + text = payload.get("text") + if not isinstance(text, str): + return payload + + safe_text, effective_parse_mode = normalize_alert_notification_payload( + text, + str(payload.get("parse_mode") or "HTML"), + ) + if safe_text == text and effective_parse_mode == payload.get("parse_mode"): + return payload + + normalized_payload = dict(payload) + normalized_payload["text"] = safe_text[:4096] + normalized_payload["parse_mode"] = effective_parse_mode + return normalized_payload + + def _top_gateway_bucket( buckets: list[dict[str, object]], field: str, @@ -4039,6 +4061,7 @@ class TelegramGateway: if not self._http_client: raise TelegramGatewayError("HTTP client not initialized") + payload = normalize_telegram_send_message_payload(method, payload) source_envelope_extra = payload.pop(_AWOOOP_SOURCE_ENVELOPE_EXTRA_KEY, None) await self._attach_incident_thread_reply(method, payload) diff --git a/apps/api/tests/test_telegram_message_templates.py b/apps/api/tests/test_telegram_message_templates.py index 4ec9eb43..fab9ed9c 100644 --- a/apps/api/tests/test_telegram_message_templates.py +++ b/apps/api/tests/test_telegram_message_templates.py @@ -20,6 +20,7 @@ from src.services.telegram_gateway import ( TelegramMessage, WeeklyReportMessage, format_host_resource_alert_card, + normalize_telegram_send_message_payload, ) @@ -250,6 +251,38 @@ async def test_send_text_normalizes_host_resource_alert(monkeypatch) -> None: assert "/workspace/wooo" not in payload["text"] +def test_send_request_payload_normalizer_blocks_direct_host_raw_dump() -> None: + """direct _send_request("sendMessage") 也必須在最後出口被轉成事件卡。""" + raw_alert = ( + "WARN h110-gitea 🔴 CPU 警告: used=80.4% load=19.10\n" + "WARN h110-gitea ⚠️ 容器內 root Node.js 進程:\n" + "root 311 185 0.9 29242688 606596 ? Sl 07:11 0:33 " + "node /workspace/wooo/stockplatform-v2/nodemodules/.bin/next build\n" + "root 830 0.0 0.0 1126624 57588 ? Rsl 07:14 0:00 " + '/opt/hostedtoolcache/node/22.22.3/x64/bin/node /workspace/wooo/2026FIFAWorldCup/platform/web/nodemodules/prisma/build/child ' + '{"product":"prisma","version":"5.20.0","endpoint":"https://checkpoint.prisma.io"}' + ) + + payload = { + "chat_id": "chat", + "text": raw_alert, + "parse_mode": "MarkdownV2", + "disable_web_page_preview": True, + } + + result = normalize_telegram_send_message_payload("sendMessage", payload) + + assert result["parse_mode"] == "HTML" + assert "P1 主機資源壓力|h110-gitea" in result["text"] + assert "ai_automation_alert_card_v1" in result["text"] + assert "runtime_write_gate=0" in result["text"] + assert "root 311" not in result["text"] + assert "checkpoint.prisma.io" not in result["text"] + assert "/workspace/wooo" not in result["text"] + assert "/opt/hostedtoolcache" not in result["text"] + assert '"product":"prisma"' not in result["text"] + + def test_weekly_report_marks_all_zero_as_low_trust_anomaly() -> None: report = WeeklyReportMessage( week_range="2026-W24", diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 88779374..7f5fcdbf 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,21 @@ +## 2026-06-18|Telegram 主機資源 raw dump 最後出口補洞 + +**背景**:統帥貼出 `15:11:26` 的 Telegram raw CPU / root Node.js / Prisma JSON dump,指出這類訊息既難讀又會把內部路徑、套件命令與外部檢查 endpoint 暴露到戰情室。前一段 formatter 已覆蓋 `send_alert_notification()` 與 `send_text()`,但應用內仍可能有人直接呼叫 `TelegramGateway._send_request("sendMessage", payload)`,因此需要把 `_send_request` 也升級成最後出口防線。 + +**完成內容**: +- `TelegramGateway._send_request()` 在送出 `sendMessage` 前新增 `normalize_telegram_send_message_payload()`,所有應用內 direct gateway send 都會套用 host resource formatter。 +- direct `_send_request("sendMessage", ...)` 即使帶 `MarkdownV2`、raw `CPU 警告`、`容器內 root Node.js 進程`、`/workspace/...`、`/opt/hostedtoolcache/...`、`node_modules`、Prisma checkpoint URL 或 JSON payload,也會被改成 HTML 版 `ai_automation_alert_card_v1`。 +- `docs/awooop/TELEGRAM-INCIDENT-NOTIFICATION-MODEL.md` 與 `docs/runbooks/HOST-RUNAWAY-PROCESS-AIOPS-PLAYBOOK.md` 同步補上最後出口規範。 + +**驗證**: +- `DATABASE_URL='postgresql+asyncpg://test:test@127.0.0.1:5432/awoooi_test' pytest tests/test_telegram_message_templates.py -q`:`64 passed`。 +- `python -m py_compile src/services/telegram_gateway.py`:通過。 + +**邊界 / 仍待控管**: +- 本輪沒有送 Telegram、沒有改 Bot token、沒有改 chat routing、沒有碰 Nginx / Docker / firewall / K8s runtime。 +- `.gitea/workflows` 與可能存在於主機上的 cron / 外部 bot direct Bot API 路徑仍是旁路風險;需要納入下一段「通知出口配置控管」,不可把 API 端修補誤解成全域已完全收斂。 +- IwoooS 整體仍維持 `64%`,active runtime gate 仍 `0`;runtime write / kill process / restart / reload / active scan 仍全部 `false`。 + ## 2026-06-18|AI Agent 週報全 0 改為資料缺口與下一步 **背景**:統帥指出 Telegram 週報顯示告警、AI 提案、執行、成本、部署全部為 `0`,這不是可用報表,而是資料鏈路可能斷掉卻被包裝成健康。本段先修最危險的誤導:週報資料源失效或 Git 活動讀取失敗時,不得再把 `0` 當成正常事實;Telegram 報表必須直接顯示資料缺口與下一步。 diff --git a/docs/awooop/TELEGRAM-INCIDENT-NOTIFICATION-MODEL.md b/docs/awooop/TELEGRAM-INCIDENT-NOTIFICATION-MODEL.md index 72115ee9..ca812474 100644 --- a/docs/awooop/TELEGRAM-INCIDENT-NOTIFICATION-MODEL.md +++ b/docs/awooop/TELEGRAM-INCIDENT-NOTIFICATION-MODEL.md @@ -63,6 +63,7 @@ IwoooS / AwoooP 是 AI 自動化產品,Telegram 告警不是終點,而是自 Host / runner 資源告警的第一版落地: - `TelegramGateway.send_alert_notification()` 與 `send_text()` 會在最後出口自動套用 host resource formatter;即使呼叫端傳入 Markdown,也必須強制改成脫敏 HTML 卡片。 +- `TelegramGateway._send_request("sendMessage", ...)` 也必須套用同一個 formatter,避免 approval execution、legacy helper 或未來 direct gateway 呼叫繞過 `send_alert_notification()` / `send_text()`。 - `CPU 警告`、`容器內 root Node.js 進程`、含 `ps aux` 且指向 build / package install / Prisma generate 程序的文字,會被轉成 `ai_automation_alert_card_v1`。 - raw process dump 會被壓成 `PID + CPU + 精簡命令`,不保留 `/workspace/...`、`/opt/hostedtoolcache/...`、完整 `node_modules` 路徑、外部檢查 URL、JSON payload 或整段 `ps aux`。 - build 壓力會分流到 `runner_build_resource_pressure`;Prisma / package install 壓力會分流到 `runner_prisma_generate_resource_pressure`;預設都是 `candidate_only / runtime_write_gate=0`。 diff --git a/docs/runbooks/HOST-RUNAWAY-PROCESS-AIOPS-PLAYBOOK.md b/docs/runbooks/HOST-RUNAWAY-PROCESS-AIOPS-PLAYBOOK.md index c606ca5a..d6ec71b6 100644 --- a/docs/runbooks/HOST-RUNAWAY-PROCESS-AIOPS-PLAYBOOK.md +++ b/docs/runbooks/HOST-RUNAWAY-PROCESS-AIOPS-PLAYBOOK.md @@ -67,6 +67,8 @@ Telegram / AI event packet contract: Host / runner raw dump 進入 Telegram 前必須先被 `TelegramGateway` 壓成 `P1/P2/P3 主機資源壓力` 卡片。第一屏只允許顯示 CPU、load、root process count、AI lane、candidate gate、Top evidence 與禁止事項;完整命令列、套件 JSON、外部檢查 endpoint、內部 workspace path 與 raw `ps aux` 必須留在內部 evidence / timeline,不得外送。 +應用內所有 `sendMessage` 都必須經過 `TelegramGateway._send_request()` 的最後出口正規化;如果是 Gitea workflow、Alertmanager receiver、主機 cron 或外部 bot 直接呼叫 Telegram Bot API,必須另外納入配置控管與 formatter 收斂,不能宣稱已由 API 端完全治理。 + --- ## 3. AI Triager 必做判讀