diff --git a/apps/api/src/services/failover_alerter.py b/apps/api/src/services/failover_alerter.py index 3885a2a5..cdf31124 100644 --- a/apps/api/src/services/failover_alerter.py +++ b/apps/api/src/services/failover_alerter.py @@ -299,7 +299,7 @@ def _as_dict(value: Any) -> dict[str, Any]: _EVENT_DISPLAY_NAMES = { "trust_drift": "信任漂移", - "knowledge_degradation": "知識庫劣化", + "knowledge_degradation": "KM 需要更新(影響 AI 判斷)", "governance_slo_data_gap": "SLO 資料缺口", "governance_self_failure": "治理自檢失敗", "llm_hallucination": "LLM 驗證失敗", @@ -421,6 +421,59 @@ def _governance_summary_lines(event_type: str, impact: dict[str, Any]) -> str: return _tree_lines(rows) +def _governance_operator_context(event_type: str, impact: dict[str, Any]) -> list[str]: + """Return operator-facing guidance for governance alerts. + + The governance loop stores machine-readable details in AwoooP. Telegram + needs a shorter "what this means / what to do now" layer so operators do + not have to infer the process stage from raw metric names. + """ + if event_type != "knowledge_degradation": + return [] + + stale_count = impact.get("stale_count", "?") + total_count = impact.get("total_count", "?") + stale_days = impact.get("stale_days", "?") + threshold = _format_metric_value("threshold", impact.get("threshold", 0.2)) + stale_ratio = _format_metric_value("stale_ratio", impact.get("stale_ratio", 0)) + + plain_summary = ( + f"{stale_count} / {total_count} 筆 KM 超過 {stale_days} 天未更新," + "AI 做告警分類、規則匹配、PlayBook 推薦時可能引用舊資訊。" + ) + policy_summary = ( + f"這是治理品質警報,不是服務故障;目標是把 stale ratio " + f"{stale_ratio} 降到門檻 {threshold} 以下。" + ) + + return [ + "", + "💬 *白話說明*", + _escape_md(plain_summary), + _escape_md(policy_summary), + "", + "🧩 *AI 流程狀態*", + _tree_lines( + [ + "階段:detected → queued_kb_healthcheck → waiting_owner_review", + "AI 已做:統計 stale KM,產生補齊與審核方向", + "AI 可做:反查 Incident / Sentry / SigNoz / PlayBook,產生 KM 更新草稿與任務", + "需要人工:owner 審核高影響 KM 內容,避免 AI 自動寫入錯誤知識", + ] + ), + "", + "✅ *現在要做*", + _lines_from_list( + [ + "確認 run_kb_growth_healthcheck 是否已排程或已執行", + "到 AwoooP Work Items / AI 治理篩選 knowledge_degradation", + "優先審核最近被告警、Sentry、SigNoz、PlayBook 引用的 KM", + "不用重啟服務;等 stale_ratio 降到 20% 以下再關閉治理警報", + ] + ), + ] + + def format_governance_alert_card(event_type: str, payload: dict[str, Any]) -> str: """格式化 AI 治理 Telegram 卡片。 @@ -440,6 +493,8 @@ def format_governance_alert_card(event_type: str, payload: dict[str, Any]) -> st f"狀態:{_escape_md(_status_badge(status))}", ] + sections.extend(_governance_operator_context(event_type, impact)) + impact_lines = _governance_summary_lines(event_type, impact) if impact_lines: sections.extend(["", "🧭 *影響摘要*", impact_lines]) @@ -460,9 +515,10 @@ def format_governance_alert_card(event_type: str, payload: dict[str, Any]) -> st if actionable_lines: sections.extend(["", "🤖 *可自動化工作*", actionable_lines]) + profiled_keys = {key for key, _label in _IMPACT_PROFILES.get(event_type, [])} fallback_items = _fallback_pairs( payload, - keep={"status", "impact", "remediation", "actionable"}, + keep={"status", "impact", "remediation", "actionable", *profiled_keys}, max_items=4, ) if fallback_items: diff --git a/apps/api/src/services/governance_agent.py b/apps/api/src/services/governance_agent.py index eeb4375f..fb4d7e75 100644 --- a/apps/api/src/services/governance_agent.py +++ b/apps/api/src/services/governance_agent.py @@ -227,7 +227,7 @@ class GovernanceAgent: "actionable": { "items": [ "每日檢查 ANTI_PATTERN 更新結果", - "安排至少 2 位 owner 對 stale條目做快速人工審核", + "安排至少 2 位 owner 對 stale 條目做快速人工審核", ], }, "stale_count": stale, diff --git a/apps/api/tests/test_failover_alerter.py b/apps/api/tests/test_failover_alerter.py index bcfed64e..8a269d09 100644 --- a/apps/api/tests/test_failover_alerter.py +++ b/apps/api/tests/test_failover_alerter.py @@ -280,11 +280,17 @@ def test_governance_alert_card_formats_knowledge_degradation() -> None: }, ) - assert "*AI 治理警報|知識庫劣化*" in card + assert "*AI 治理警報|KM 需要更新" in card + assert "💬 *白話說明*" in card + assert "🧩 *AI 流程狀態*" in card + assert "✅ *現在要做*" in card + assert "queued\\_kb\\_healthcheck" in card + assert "AwoooP Work Items" in card assert "🧭 *影響摘要*" in card assert "陳舊 KM:948" in card assert "陳舊比例:52\\.1%" in card assert "▶️ 下一步:run\\_kb\\_growth\\_healthcheck" in card + assert "📎 *補充欄位*" not in card assert "欄位快覽" not in card