diff --git a/apps/api/src/services/telegram_gateway.py b/apps/api/src/services/telegram_gateway.py index cc5d0e25..32500e15 100644 --- a/apps/api/src/services/telegram_gateway.py +++ b/apps/api/src/services/telegram_gateway.py @@ -23,6 +23,7 @@ SOUL.md 鐵律 (4.1 Telegram 訊息壓縮原則): """ import asyncio +import hashlib import html import os import re @@ -50,6 +51,8 @@ SNOOZE_KEY_PREFIX = "telegram_snooze:" # {approval_id} -> 稍後提醒 SILENCE_KEY_PREFIX = "telegram_silence:" # {resource_name} -> 靜默 SNOOZE_TTL_SECONDS = 30 * 60 # 30 分鐘 SILENCE_TTL_SECONDS = 60 * 60 # 1 小時 +INCIDENT_UPDATE_DEDUP_PREFIX = "awoooi:tg_update_dedup:" # {incident_id}:{status_hash} +INCIDENT_UPDATE_DEDUP_TTL_SECONDS = 5 * 60 # 5 分鐘內相同狀態不重複洗版 # 2026-04-01 Claude Code: Long Polling 分散式 Leader Election # 防止多 Pod 同時 getUpdates → 409 Conflict 互搶問題 @@ -261,6 +264,28 @@ class TelegramMessage: return "analysis_degraded" return "safe_gate_pending" + def _automation_status_summary(self) -> str: + """Telegram 首屏的人類可讀處置狀態。 + + 這行是值班判斷入口:先讓人知道這張卡是「AI 已有建議待審批」、 + 「AI 無法修復需人工」或「純觀察」,細節才放到後面的鏈路區塊。 + """ + mode = self._automation_mode() + action = (self.suggested_action or "").upper() + text = f"{self.root_cause} {self.suggested_action}".lower() + + if mode == "llm_timeout_manual_gate": + return "🔴 AI 分析超時,需人工排查" + if action in {"NO_ACTION", "待分析", ""} or "invalid_target" in text: + return "🟠 AI 無可安全執行動作,需人工判斷" + if self.confidence <= 0: + return "🟡 規則建議待審批" + if mode == "analysis_degraded": + return "🟠 AI 降級分析,需人工判斷" + if mode == "ai_proposal_ready": + return "🟡 AI 已提出修復建議,等待人工批准" + return "🟡 安全閘門待審批" + def _format_automation_block(self) -> str: """Visible AI automation chain for every ACTION REQUIRED card. 2026-05-04 ogt: 加入 Token 用量 + 具體 Ollama 伺服器顯示 @@ -344,6 +369,7 @@ class TelegramMessage: safe_root_cause = html.escape(self.root_cause) safe_action = html.escape(self.suggested_action) safe_downtime = html.escape(self.estimated_downtime) + safe_automation_summary = html.escape(self._automation_status_summary()) # 2026-03-29 ogt: AI Token/Cost 顯示 ai_cost_display = "" @@ -441,6 +467,7 @@ class TelegramMessage: f"📋 {html.escape(incident_id)}\n" f"🎯 資源:{safe_resource}\n" f"{category_line}" + f"🧭 處置狀態:{safe_automation_summary}\n" f"\n" f"{automation_block}" f"\n" @@ -4462,8 +4489,6 @@ class TelegramGateway: 2026-04-09 Claude Sonnet 4.6 Asia/Taipei (統帥要求: 狀態變更在原訊息延續) """ - from src.core.redis_client import get_redis - redis = get_redis() redis_key = f"tg_msg:{incident_id}" stored = await redis.get(redis_key) @@ -4481,6 +4506,31 @@ class TelegramGateway: logger.warning("append_incident_update_invalid_message_id", stored=stored) return False + # Telegram 只適合放決策摘要;同一 incident 的相同狀態 5 分鐘內不重複回覆, + # 詳細執行紀錄應進 timeline / AwoooP Run Monitor,避免群組被 auto-failure 洗版。 + status_hash = hashlib.sha1(status_line.encode("utf-8")).hexdigest()[:16] + dedup_key = f"{INCIDENT_UPDATE_DEDUP_PREFIX}{incident_id}:{status_hash}" + try: + was_set = await redis.set( + dedup_key, + "1", + ex=INCIDENT_UPDATE_DEDUP_TTL_SECONDS, + nx=True, + ) + if not was_set: + logger.info( + "append_incident_update_dedup_suppressed", + incident_id=incident_id, + dedup_key=dedup_key, + ) + return True + except Exception as exc: + logger.warning( + "append_incident_update_dedup_failed", + incident_id=incident_id, + error=str(exc), + ) + # Step 1: 取得原始訊息文字(Telegram Bot API 不提供讀取原文,只能在 editMessageText 裡重建) # 策略: 只追加 status_line,不讀取原文(Telegram edit 要傳完整新文字) # 所以先用 editMessageReplyMarkup 換按鈕,再 sendMessage 同 chat 以 reply 方式追加狀態 diff --git a/apps/api/tests/test_telegram_message_templates.py b/apps/api/tests/test_telegram_message_templates.py index 0c10404c..26cabf6f 100644 --- a/apps/api/tests/test_telegram_message_templates.py +++ b/apps/api/tests/test_telegram_message_templates.py @@ -7,6 +7,7 @@ test_telegram_message_templates.py - Telegram 訊息模板測試 import pytest +import src.services.telegram_gateway as telegram_gateway_module from src.services.telegram_gateway import ( DailySummaryMessage, DeploySuccessMessage, @@ -15,6 +16,7 @@ from src.services.telegram_gateway import ( ResourceWarnMessage, SentryErrorMessage, TelegramMessage, + TelegramGateway, ) @@ -38,12 +40,50 @@ class TestTelegramMessageFormat: assert "🚨" in result assert "嚴重" in result assert "test-pod-123" in result + assert "處置狀態" in result + assert "規則建議待審批" in result assert "AI 自動化鏈路" in result assert "OpenClaw" in result assert "NemoTron" in result assert "ElephantAlpha" in result assert len(result) <= 4096 # Telegram HTML message limit + def test_telegram_message_ai_proposal_marks_approval_wait(self): + """有 AI 信心分數的修復建議必須標示為 AI 待審批。""" + msg = TelegramMessage( + status_emoji="⚠️", + risk_level="MEDIUM", + resource_name="awoooi-api", + root_cause="CPU sustained high", + suggested_action="kubectl rollout restart deployment/awoooi-api", + estimated_downtime="~30s", + approval_id="INC-20260506-0000", + confidence=0.82, + ai_provider="ollama_gcp_a", + ) + + result = msg.format() + + assert "處置狀態" in result + assert "AI 已提出修復建議,等待人工批准" in result + + def test_telegram_message_no_action_marks_manual_judgement(self): + """NO_ACTION 卡片必須一眼看得出需要人工判斷。""" + msg = TelegramMessage( + status_emoji="ℹ️", + risk_level="LOW", + resource_name="node-exporter-110", + root_cause="規則命中但沒有安全可執行動作", + suggested_action="NO_ACTION", + estimated_downtime="unknown", + approval_id="INC-20260506-0001", + ) + + result = msg.format() + + assert "處置狀態" in result + assert "AI 無可安全執行動作,需人工判斷" in result + def test_telegram_message_with_token_cost(self): """測試含 Token/Cost 的訊息""" msg = TelegramMessage( @@ -63,6 +103,46 @@ class TestTelegramMessageFormat: assert "💰 Tokens: 1,500 / $0.0015" in result +@pytest.mark.asyncio +async def test_append_incident_update_deduplicates_same_status(monkeypatch): + """同一 Incident 的相同狀態更新 5 分鐘內不可重複洗版。""" + + class FakeRedis: + def __init__(self): + self.set_calls = 0 + + async def get(self, key): + assert key == "tg_msg:INC-DEDUP" + return "12345" + + async def set(self, *args, **kwargs): + self.set_calls += 1 + assert kwargs["nx"] is True + assert kwargs["ex"] > 0 + return self.set_calls == 1 + + fake_redis = FakeRedis() + sent_requests = [] + gateway = TelegramGateway() + + async def fake_send_request(method, payload): + sent_requests.append((method, payload)) + return {"ok": True} + + monkeypatch.setattr(telegram_gateway_module, "get_redis", lambda: fake_redis) + monkeypatch.setattr(gateway, "_send_request", fake_send_request) + + status_line = "🤖❌ [AUTO] AI 自動修復失敗,已升級人工介入" + + assert await gateway.append_incident_update("INC-DEDUP", status_line) is True + assert await gateway.append_incident_update("INC-DEDUP", status_line) is True + + assert [method for method, _ in sent_requests] == [ + "editMessageReplyMarkup", + "sendMessage", + ] + + class TestSentryErrorMessage: """測試 Sentry 錯誤訊息""" diff --git a/apps/web/messages/en.json b/apps/web/messages/en.json index efc8fa39..5f27ab30 100644 --- a/apps/web/messages/en.json +++ b/apps/web/messages/en.json @@ -1480,5 +1480,50 @@ "error": "Failed to load queue", "retry": "Retry" } + }, + "awooop": { + "home": { + "eyebrow": "AI Automation Control Plane", + "title": "AwoooP Governance Overview", + "subtitle": "Unifies tenants, contracts, runs, approvals, and channel state into one operator surface so the AI flywheel and governance plane do not drift apart.", + "refresh": "Refresh", + "snapshotStatus": "Snapshot Status", + "lastUpdated": "Last Updated", + "migrationMode": "Migration Mode", + "migrationValue": "mirror / shadow", + "ready": "In Sync", + "loading": "Loading", + "degraded": "Degraded", + "metrics": { + "tenants": "Tenants", + "tenantsDetail": "{active} active, {shadow} in shadow", + "runs": "Operator Runs", + "runsDetail": "Run state is the single view into async work", + "approvals": "Pending Approvals", + "approvalsDetail": "Every high-risk action must stop at the human gate", + "contracts": "Contracts", + "contractsDetail": "Project / Agent / Policy contract publish state" + }, + "lanes": { + "title": "Flywheel Lanes", + "live": "Live", + "mirror": "Mirror", + "providerName": "Provider Order", + "providerDetail": "GCP-A Ollama -> GCP-B Ollama -> 111 Ollama -> OpenClaw/Nemo -> Gemini", + "mcpName": "MCP Gateway", + "mcpDetail": "MCP Gateway stays in mirror / wrap mode before audit and redaction are proven as the only execution gate", + "channelName": "Channel Hub", + "channelDetail": "Telegram / LINE / Slack enter Channel Event first, then message ownership moves gradually", + "approvalName": "Approval Plane", + "approvalDetail": "Run state and Approval plane share one approval meaning" + }, + "next": { + "title": "Next Actions", + "item1": "Review run monitor and provider fallback", + "item2": "Handle pending high-risk approvals", + "item3": "Review contract lifecycle", + "item4": "Open the AwoooP work map" + } + } } } diff --git a/apps/web/messages/zh-TW.json b/apps/web/messages/zh-TW.json index 4e27c776..aad2a159 100644 --- a/apps/web/messages/zh-TW.json +++ b/apps/web/messages/zh-TW.json @@ -1481,5 +1481,50 @@ "error": "無法載入待辦佇列", "retry": "重試" } + }, + "awooop": { + "home": { + "eyebrow": "AI 自動化飛輪控制面", + "title": "AwoooP 治理總覽", + "subtitle": "把租戶、合約、Run、審批與通道狀態收斂到同一個操作面,避免 AI 自動化飛輪和治理面各自長出一套邏輯。", + "refresh": "重新整理", + "snapshotStatus": "快照狀態", + "lastUpdated": "最後更新", + "migrationMode": "遷移模式", + "migrationValue": "mirror / shadow", + "ready": "同步中", + "loading": "讀取中", + "degraded": "降級", + "metrics": { + "tenants": "租戶", + "tenantsDetail": "{active} 個啟用,{shadow} 個 shadow", + "runs": "Operator Runs", + "runsDetail": "Run state 是非同步任務的唯一觀測入口", + "approvals": "待審批", + "approvalsDetail": "所有高風險動作都必須停在人工閘門", + "contracts": "合約", + "contractsDetail": "Project / Agent / Policy contract 發布狀態" + }, + "lanes": { + "title": "飛輪鏈路", + "live": "已接線", + "mirror": "Mirror", + "providerName": "Provider 順序", + "providerDetail": "GCP-A Ollama -> GCP-B Ollama -> 111 Ollama -> OpenClaw/Nemo -> Gemini", + "mcpName": "MCP Gateway", + "mcpDetail": "MCP Gateway 先 mirror / wrap,確認 audit 與 redaction 後才切成唯一閘門", + "channelName": "Channel Hub", + "channelDetail": "Telegram / LINE / Slack 先進 Channel Event,再逐步切換發送責任", + "approvalName": "Approval Plane", + "approvalDetail": "Run state 與 Approval plane 共享同一條審批語義" + }, + "next": { + "title": "下一步操作", + "item1": "查看 Run 監控與 provider fallback", + "item2": "處理等待審批的高風險操作", + "item3": "審查 Contract lifecycle", + "item4": "查看 AwoooP 工作鏈路地圖" + } + } } } diff --git a/apps/web/src/app/[locale]/awooop/page.tsx b/apps/web/src/app/[locale]/awooop/page.tsx index 31321056..51994740 100644 --- a/apps/web/src/app/[locale]/awooop/page.tsx +++ b/apps/web/src/app/[locale]/awooop/page.tsx @@ -1,9 +1,360 @@ // ============================================================================= -// WOOO AIOps - AwoooP Console 入口頁 +// WOOO AIOps - AwoooP Operator Console 首頁 // ============================================================================= +// 將 AwoooP 定位為 AI 自動化飛輪的治理面、稽核面與人工操作面。 -import AwoooPWorkItemsPage from "./work-items/page"; +"use client"; + +import { useCallback, useEffect, useMemo, useState } from "react"; +import { useLocale, useTranslations } from "next-intl"; +import { + Activity, + ArrowRight, + BrainCircuit, + CheckCircle2, + FileText, + GitBranch, + RefreshCw, + ShieldCheck, + Waypoints, +} from "lucide-react"; +import { Link } from "@/i18n/routing"; +import { cn } from "@/lib/utils"; + +type Tenant = { + project_id: string; + display_name?: string; + migration_mode?: string; + is_active?: boolean; +}; + +type PlatformResponse = { + tenants?: Tenant[]; + total?: number; + runs?: unknown[]; + contracts?: unknown[]; + items?: unknown[]; +}; + +type Snapshot = { + tenants: number; + activeTenants: number; + shadowTenants: number; + runs: number; + approvals: number; + contracts: number; +}; + +type SnapshotStatus = "loading" | "ready" | "degraded"; + +const API_BASE = process.env.NEXT_PUBLIC_API_URL ?? ""; + +const emptySnapshot: Snapshot = { + tenants: 0, + activeTenants: 0, + shadowTenants: 0, + runs: 0, + approvals: 0, + contracts: 0, +}; + +function numberValue(value: unknown): number { + return typeof value === "number" && Number.isFinite(value) ? value : 0; +} + +function countRows(data: PlatformResponse, keys: Array): number { + if (typeof data.total === "number") return data.total; + for (const key of keys) { + const rows = data[key]; + if (Array.isArray(rows)) return rows.length; + } + return 0; +} + +function MetricCell({ + label, + value, + detail, + icon: Icon, + tone = "neutral", +}: { + label: string; + value: string | number; + detail: string; + icon: typeof Activity; + tone?: "neutral" | "good" | "warn"; +}) { + return ( +
+
+
+

{label}

+

+ {value} +

+
+ + +
+

{detail}

+
+ ); +} + +function LaneRow({ + name, + status, + detail, + tone, +}: { + name: string; + status: string; + detail: string; + tone: "good" | "warn" | "neutral"; +}) { + return ( +
+
{name}
+
+ + {status} + +
+
{detail}
+
+ ); +} export default function AwoooPPage() { - return ; + const t = useTranslations("awooop.home"); + const locale = useLocale(); + const [snapshot, setSnapshot] = useState(emptySnapshot); + const [status, setStatus] = useState("loading"); + const [lastUpdated, setLastUpdated] = useState(null); + + const fetchSnapshot = useCallback(async () => { + setStatus("loading"); + try { + const [tenantRes, runRes, approvalRes, contractRes] = await Promise.all([ + fetch(`${API_BASE}/api/v1/platform/tenants`), + fetch(`${API_BASE}/api/v1/platform/runs/list?per_page=1`), + fetch(`${API_BASE}/api/v1/platform/approvals`), + fetch(`${API_BASE}/api/v1/platform/contracts?per_page=1`), + ]); + + if (![tenantRes, runRes, approvalRes, contractRes].every((res) => res.ok)) { + throw new Error("platform snapshot fetch failed"); + } + + const [tenantData, runData, approvalData, contractData] = await Promise.all([ + tenantRes.json() as Promise, + runRes.json() as Promise, + approvalRes.json() as Promise, + contractRes.json() as Promise, + ]); + const tenants = Array.isArray(tenantData.tenants) ? tenantData.tenants : []; + + setSnapshot({ + tenants: countRows(tenantData, ["tenants", "items"]), + activeTenants: tenants.filter((tenant) => tenant.is_active !== false).length, + shadowTenants: tenants.filter((tenant) => tenant.migration_mode === "shadow").length, + runs: countRows(runData, ["runs", "items"]), + approvals: countRows(approvalData, ["items"]), + contracts: countRows(contractData, ["contracts", "items"]), + }); + setLastUpdated(new Date()); + setStatus("ready"); + } catch { + setStatus("degraded"); + setLastUpdated(new Date()); + } + }, []); + + useEffect(() => { + fetchSnapshot(); + }, [fetchSnapshot]); + + const formattedUpdated = useMemo(() => { + if (!lastUpdated) return "--"; + return lastUpdated.toLocaleTimeString(locale === "zh-TW" ? "zh-TW" : "en-US", { + hour: "2-digit", + minute: "2-digit", + second: "2-digit", + }); + }, [lastUpdated, locale]); + + const healthTone = status === "ready" ? "good" : status === "loading" ? "neutral" : "warn"; + + return ( +
+
+
+
+
+
+
+
+

+ {t("title")} +

+

+ {t("subtitle")} +

+
+ +
+
+ +
+
+
+ {t("snapshotStatus")} + + {status === "ready" ? t("ready") : status === "loading" ? t("loading") : t("degraded")} + +
+
+ {t("lastUpdated")} + {formattedUpdated} +
+
+ {t("migrationMode")} + {t("migrationValue")} +
+
+
+
+
+ +
+ + 0 ? "good" : "neutral"} + /> + 0 ? "warn" : "good"} + /> + 0 ? "good" : "warn"} + /> +
+ +
+
+
+
+
+
+ + + + +
+ +
+
+
+
+
+
+ {[ + [t("next.item1"), "/awooop/runs" as const], + [t("next.item2"), "/awooop/approvals" as const], + [t("next.item3"), "/awooop/contracts" as const], + [t("next.item4"), "/awooop/work-items" as const], + ].map(([label, href]) => ( + + {label} +
+
+
+
+ ); } diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 9bf713be..51bd7742 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,23 @@ +## 2026-05-06 | Telegram 事故通知語義收斂與 AwoooP 首頁總覽 + +**背景**:SRE 戰情室截圖顯示 ACTION REQUIRED、AI 自動修復失敗、Escalation、Code Review、Config Drift 等訊息混在同一條流中;值班者很難快速分辨哪些是 AI 已修復、哪些是 AI 無法修復需要人工、哪些只是報表或治理通知。 + +**本次修補**: +- `TelegramMessage` 主卡新增「處置狀態」,在第一屏明確標示 `AI 已提出修復建議,等待人工批准`、`AI 無可安全執行動作,需人工判斷`、`AI 分析超時,需人工排查` 或 `規則建議待審批`。 +- `append_incident_update()` 對同一 `incident_id` 的相同狀態回覆做 5 分鐘 Redis 去重,避免同樣的 `[AUTO] AI 自動修復失敗` 連續洗版。 +- 新增 `docs/awooop/TELEGRAM-INCIDENT-NOTIFICATION-MODEL.md`,定義 Telegram / AwoooP Run Monitor / Approval Queue / Incident Timeline / MCP Audit 的分工。 +- `/zh-TW/awooop` 首頁改為治理總覽,直接顯示租戶、Run、審批、合約與飛輪鏈路狀態;不再只是轉到 work-items 頁。 +- 新增 AwoooP 首頁 `zh-TW` / `en` i18n 字串。 + +**驗證**: +- `python -m py_compile apps/api/src/services/telegram_gateway.py apps/api/tests/test_telegram_message_templates.py` +- `pytest tests/test_telegram_message_templates.py tests/test_telegram_ai_automation_block.py -q` → 19 passed。 +- `pnpm --dir apps/web typecheck` 通過。 +- `NEXT_PUBLIC_API_URL=https://awoooi.wooo.work pnpm --dir apps/web build` 通過。 + +**注意**: +- `ruff check src/services/telegram_gateway.py ...` 仍會掃到 `telegram_gateway.py` 既有 import/order、bare except、單行 if 等歷史債;本輪沒有在 6000+ 行 gateway 巨檔做無關機械清理,避免混入額外行為風險。 + ## 2026-05-06 | AwoooP Run 監控頁 422 修正 **背景**:Playwright 驗證 `/zh-TW/awooop` 時未再看到 client-side exception,但 `/zh-TW/awooop/runs` 會顯示「無法載入 Run 資料 HTTP 422」。後端 log 顯示 `GET /api/v1/platform/runs/list?page=1&per_page=50` 被回 422。 diff --git a/docs/awooop/TELEGRAM-INCIDENT-NOTIFICATION-MODEL.md b/docs/awooop/TELEGRAM-INCIDENT-NOTIFICATION-MODEL.md new file mode 100644 index 00000000..52b4e16e --- /dev/null +++ b/docs/awooop/TELEGRAM-INCIDENT-NOTIFICATION-MODEL.md @@ -0,0 +1,47 @@ +# Telegram 事故通知模型 + +> 目的:讓 SRE 戰情室一眼分辨「AI 已修復」、「AI 可建議但需批准」、「AI 無法安全修復需人工」與「僅通知」,避免告警、執行 log、Code Review、Drift 與審批結果互相洗版。 + +## 核心判斷 + +Telegram 不應是完整執行日誌,也不應承載所有 AI 推理細節。Telegram 的職責是把需要人類注意力的決策摘要送到 SRE 戰情室;完整時間線、工具輸出、重試原因、provider fallback 與 audit 交給 AwoooP Run Monitor / Incident Timeline。 + +## 四種通知狀態 + +| 狀態 | 意義 | Telegram 行為 | 操作者動作 | +| --- | --- | --- | --- | +| AI 已自動修復 | 自動化已完成且驗證通過 | 更新原 incident 卡或回覆一次結果 | 檢查即可,不需批准 | +| AI 建議待審批 | AI / 規則已提出可執行建議,但被 Trust / Risk gate 擋下 | 發一張 ACTION REQUIRED 主卡 | 批准、拒絕、靜默或看詳情 | +| AI 無法安全修復 | NO_ACTION、INVALID_TARGET、LLM timeout、MCP 失敗或缺少安全動作 | 發人工接手摘要,不重複刷同一狀態 | 人工排查,或要求重診 | +| 僅通知 | 心跳、報表、Code Review 完成、低風險治理資訊 | 彙總卡或摘要頻道 | 通常不需即時動作 | + +## 專業化訊息規則 + +1. 同一個 `incident_id` 只應有一張主卡。後續狀態使用原卡回覆、編輯按鈕或 AwoooP timeline,不再每一步都新發卡。 +2. 主卡第一屏必須顯示「處置狀態」,先回答:AI 是否能修、是否已修、是否需要人工。 +3. 同一個 `incident_id` 的相同狀態更新,短時間內要去重。詳細重試與錯誤放到 timeline,不洗 Telegram。 +4. P0 / P1 escalation 可以另發升級卡,但內容必須是「目前影響、已嘗試、卡住原因、需要誰做什麼」,不可重貼所有底層 log。 +5. Code Review、Config Drift、報表、心跳不應和 incident 執行回覆混在同一種語義;它們可以在同一 SRE 群組,但必須以摘要卡與固定前綴區分。 + +## 與 AwoooP 的分工 + +| 介面 | 承載內容 | +| --- | --- | +| Telegram | 決策摘要、升級、人工批准入口 | +| AwoooP Run Monitor | 非同步 Run、provider fallback、tool call、retry、latency | +| Approval Queue | 所有等待批准的高風險動作 | +| Incident Timeline | 事件完整歷程、AI 嘗試、失敗原因、KM / Playbook 回寫 | +| MCP Audit | 工具執行、redaction、permission gate、credential 注入 | + +## 本輪落地 + +- `TelegramMessage` 主卡新增「處置狀態」。 +- `append_incident_update()` 對同一 incident 的相同狀態做 5 分鐘 Redis 去重。 +- 既有 `詳情 / 重診 / 歷史` 按鈕保留,讓 Telegram 保持輕量,細節回到控制台。 + +## 後續建議 + +1. 將 Telegram 群組升級為 Forum topics 或固定 topic lane:`P0/P1 事故`、`人工審批`、`治理/報表`、`CI/Code Review`。 +2. AwoooP Approval Queue 顯示與 Telegram 相同的「處置狀態」欄位,避免前後端語義分裂。 +3. 將 auto-repair failure 的完整 stdout/stderr 改寫入 Run Timeline,只在 Telegram 顯示最短摘要與詳情連結。 +4. 對 firing 告警做 fingerprint 聚合:同一 alertname + target + namespace 在窗口內只更新卡片,不新增卡片。