fix(api): add manual handoff package for no-action alerts
This commit is contained in:
@@ -222,6 +222,85 @@ def _format_operator_outcome_lines(outcome: dict[str, object] | None) -> list[st
|
||||
]
|
||||
|
||||
|
||||
def _needs_manual_handoff_package(
|
||||
*,
|
||||
suggested_action: str | None = None,
|
||||
verdict: str | None = None,
|
||||
) -> bool:
|
||||
"""Return true when Telegram must show a concrete manual handoff package."""
|
||||
|
||||
action_text = str(suggested_action or "")
|
||||
verdict_text = str(verdict or "").lower()
|
||||
return (
|
||||
is_no_action_approval_action(action_text)
|
||||
or "repair_candidate_missing" in action_text.lower()
|
||||
or verdict_text.startswith("manual_required")
|
||||
or verdict_text in {
|
||||
"observed_not_executed",
|
||||
"received_only",
|
||||
"approval_expired_manual_review",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _manual_evidence_hint(resource_name: str, alert_category: str) -> str:
|
||||
"""Human-readable evidence target without prescribing a write action."""
|
||||
|
||||
resource = resource_name.lower()
|
||||
category = alert_category.lower()
|
||||
if "node-exporter" in resource:
|
||||
return "node_exporter target up、scrape error、host CPU/RAM/disk、service log 摘要"
|
||||
if category in {"host", "host_resource", "infrastructure"}:
|
||||
return "host metrics、service 狀態、journal 摘要、最近部署/維護紀錄"
|
||||
if category in {"k8s", "kubernetes", "k8s_workload"}:
|
||||
return "pod events、rollout 狀態、recent logs、readiness / liveness probe"
|
||||
if category in {"database", "db"}:
|
||||
return "連線數、慢查詢、lock、磁碟與 replication / backup 狀態"
|
||||
if category in {"backup", "backup_failure"}:
|
||||
return "最近 backup run、失敗 repo、exit code、offsite verifier 與 retry window"
|
||||
if category in {"external_site", "network"}:
|
||||
return "HTTP 狀態、DNS/TLS、blackbox probe、上游 / CDN / Nginx log 摘要"
|
||||
return "來源事件、fingerprint recurrence、metrics、logs、最近變更與相關 run"
|
||||
|
||||
|
||||
def _format_manual_handoff_package_lines(
|
||||
*,
|
||||
incident_id: str,
|
||||
resource_name: str,
|
||||
alert_category: str = "",
|
||||
suggested_action: str | None = None,
|
||||
verdict: str | None = None,
|
||||
compact: bool = False,
|
||||
) -> list[str]:
|
||||
"""Build a safe manual handoff package for no-action / degraded alerts.
|
||||
|
||||
This is deliberately advisory: it does not create runtime authorization and
|
||||
does not prescribe a destructive command. The goal is to turn "manual
|
||||
review" into a concrete evidence and repair-candidate checklist.
|
||||
"""
|
||||
|
||||
if not _needs_manual_handoff_package(
|
||||
suggested_action=suggested_action,
|
||||
verdict=verdict,
|
||||
):
|
||||
return []
|
||||
|
||||
evidence_hint = _manual_evidence_hint(resource_name, alert_category)
|
||||
incident_ref = incident_id or "--"
|
||||
lines = [
|
||||
"",
|
||||
"🧰 <b>人工處置包</b>",
|
||||
"├ 狀態:AI 尚未產生安全可執行修復,不能直接批准執行",
|
||||
f"├ 1. 開 Runs / 真相鏈確認 <code>{html.escape(incident_ref)}</code> 仍在 firing 或 recurrence",
|
||||
f"├ 2. 補證據:{html.escape(evidence_hint)}",
|
||||
"├ 3. 在 AwoooP 建立修復候選:命令、風險、rollback、verifier、owner",
|
||||
"└ 4. 修復後回寫:execution result、verifier、KM / PlayBook trust",
|
||||
]
|
||||
if not compact:
|
||||
lines.append("按鈕:<b>處置包</b> 看完整證據,<b>重診</b> 重新收集,<b>Runs</b> 追蹤狀態")
|
||||
return lines
|
||||
|
||||
|
||||
def _format_remediation_history_lines(history: dict[str, object] | None) -> list[str]:
|
||||
if not history or int(history.get("total") or 0) <= 0:
|
||||
return []
|
||||
@@ -1900,6 +1979,10 @@ class TelegramMessage:
|
||||
|
||||
def _automation_mode(self) -> str:
|
||||
text = f"{self.root_cause} {self.suggested_action}".lower()
|
||||
if is_no_action_approval_action(self.suggested_action):
|
||||
if "repair_candidate_missing" in text:
|
||||
return "repair_candidate_missing_manual_handoff"
|
||||
return "manual_handoff_required"
|
||||
if "超時" in text or "timeout" in text:
|
||||
return "llm_timeout_manual_gate"
|
||||
if self.confidence > 0 and self.suggested_action and self.suggested_action != "待分析":
|
||||
@@ -1953,6 +2036,10 @@ class TelegramMessage:
|
||||
return "🟠 AI 補救試跑證據查詢失敗,需人工判斷"
|
||||
if verdict == "approval_required":
|
||||
return "🟡 需要審批後才會執行"
|
||||
if mode == "repair_candidate_missing_manual_handoff":
|
||||
return "🟠 缺少可執行修復候選,已產生人工處置包"
|
||||
if mode == "manual_handoff_required":
|
||||
return "🟠 未自動修復,已產生人工處置包"
|
||||
if verdict.startswith("manual_required"):
|
||||
return "🟠 未自動修復,需人工判斷"
|
||||
|
||||
@@ -2021,6 +2108,20 @@ class TelegramMessage:
|
||||
f"└ Flow:<code>{flow}</code>\n"
|
||||
)
|
||||
|
||||
def _format_manual_handoff_package_block(self) -> str:
|
||||
quality = self.automation_quality or {}
|
||||
verdict = str(quality.get("verdict") or self._automation_mode())
|
||||
lines = _format_manual_handoff_package_lines(
|
||||
incident_id=self.incident_id or self.approval_id,
|
||||
resource_name=self.resource_name,
|
||||
alert_category=self.alert_category,
|
||||
suggested_action=self.suggested_action,
|
||||
verdict=verdict,
|
||||
)
|
||||
if not lines:
|
||||
return ""
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
def _format_flow_progress_block(self) -> str:
|
||||
"""Operator-facing state of where the alert is in the automation loop."""
|
||||
quality = self.automation_quality or {}
|
||||
@@ -2029,7 +2130,8 @@ class TelegramMessage:
|
||||
|
||||
action_upper = (self.suggested_action or "").upper()
|
||||
is_noop = (
|
||||
"NO_ACTION" in action_upper
|
||||
is_no_action_approval_action(self.suggested_action)
|
||||
or "NO_ACTION" in action_upper
|
||||
or action_upper.startswith("OBSERVE")
|
||||
or action_upper.startswith("INVESTIGATE")
|
||||
or not action_upper.strip()
|
||||
@@ -2153,6 +2255,11 @@ class TelegramMessage:
|
||||
safe_action = html.escape(self.suggested_action)
|
||||
safe_downtime = html.escape(self.estimated_downtime)
|
||||
safe_automation_summary = html.escape(self._automation_status_summary())
|
||||
action_heading = (
|
||||
"🧭 <b>修復候選狀態</b>"
|
||||
if is_no_action_approval_action(self.suggested_action)
|
||||
else "⚡ <b>建議修復動作</b>"
|
||||
)
|
||||
|
||||
# 2026-03-29 ogt: AI Token/Cost 顯示
|
||||
ai_cost_display = ""
|
||||
@@ -2245,6 +2352,7 @@ class TelegramMessage:
|
||||
flow_progress_block = self._format_flow_progress_block()
|
||||
operator_outcome_block = self._format_operator_outcome_block()
|
||||
automation_block = self._format_automation_block()
|
||||
manual_handoff_block = self._format_manual_handoff_package_block()
|
||||
|
||||
# ADR-075 TYPE-3 格式組裝
|
||||
message = (
|
||||
@@ -2258,13 +2366,14 @@ class TelegramMessage:
|
||||
f"{flow_progress_block}\n"
|
||||
f"{operator_outcome_block}"
|
||||
f"{automation_block}"
|
||||
f"{manual_handoff_block}"
|
||||
f"\n"
|
||||
f"🧠 <b>AI 深度診斷</b>\n"
|
||||
f"├─ 分析:{safe_root_cause}\n"
|
||||
f"├─ 責任:{resp_display}\n"
|
||||
f"└─ {ai_source}\n"
|
||||
f"\n"
|
||||
f"⚡ <b>建議修復動作</b>\n"
|
||||
f"{action_heading}\n"
|
||||
f"{playbook_line}"
|
||||
f"<code>{safe_action}</code>\n"
|
||||
)
|
||||
@@ -3680,13 +3789,21 @@ class TelegramGateway:
|
||||
|
||||
if not approval_buttons_enabled:
|
||||
info_row: list[dict] = []
|
||||
secondary_row: list[dict] = []
|
||||
if incident_id:
|
||||
info_row.extend([
|
||||
{"text": "📋 詳情", "callback_data": f"detail:{incident_id}"},
|
||||
{"text": "📊 歷史", "callback_data": f"history:{incident_id}"},
|
||||
{"text": "🧰 處置包", "callback_data": f"detail:{incident_id}"},
|
||||
{"text": "🔄 重診", "callback_data": f"reanalyze:{incident_id}"},
|
||||
])
|
||||
info_row.append({"text": "🔕 靜默", "callback_data": silence_nonce})
|
||||
secondary_row.extend([
|
||||
{"text": "📊 歷史", "callback_data": f"history:{incident_id}"},
|
||||
{"text": "🔕 靜默", "callback_data": silence_nonce},
|
||||
])
|
||||
else:
|
||||
info_row.append({"text": "🔕 靜默", "callback_data": silence_nonce})
|
||||
buttons: list[list[dict]] = [info_row]
|
||||
if secondary_row:
|
||||
buttons.append(secondary_row)
|
||||
awooop_row = _awooop_truth_chain_button_row(incident_id)
|
||||
if awooop_row:
|
||||
buttons.append(awooop_row)
|
||||
@@ -7087,6 +7204,37 @@ class TelegramGateway:
|
||||
truth_chain=truth_chain,
|
||||
remediation_history=remediation_history,
|
||||
)
|
||||
quality = (
|
||||
truth_chain.get("automation_quality")
|
||||
if isinstance(truth_chain.get("automation_quality"), dict)
|
||||
else {}
|
||||
)
|
||||
reconciliation = (
|
||||
truth_chain.get("reconciliation")
|
||||
if isinstance(truth_chain.get("reconciliation"), dict)
|
||||
else {}
|
||||
)
|
||||
reconciliation_facts = (
|
||||
reconciliation.get("facts")
|
||||
if isinstance(reconciliation.get("facts"), dict)
|
||||
else {}
|
||||
)
|
||||
latest_action = str(
|
||||
reconciliation_facts.get("latest_approval_action") or ""
|
||||
)
|
||||
detail_resource = (
|
||||
", ".join(str(s) for s in incident.affected_services[:2])
|
||||
if incident.affected_services
|
||||
else incident_id
|
||||
)
|
||||
lines += _format_manual_handoff_package_lines(
|
||||
incident_id=incident_id,
|
||||
resource_name=detail_resource,
|
||||
alert_category="",
|
||||
suggested_action=latest_action,
|
||||
verdict=str(quality.get("verdict") or ""),
|
||||
compact=True,
|
||||
)
|
||||
lines += _format_km_stale_completion_lines(km_completion_summary)
|
||||
lines += _format_remediation_history_lines(remediation_history)
|
||||
gateway_summary = (
|
||||
@@ -8722,7 +8870,11 @@ class TelegramGateway:
|
||||
if action == "approve":
|
||||
status_emoji = "✅"
|
||||
status_text = f"<b>已批准</b> by {_html.escape(username)}"
|
||||
if approval_action is not None and is_no_action_approval_action(approval_action):
|
||||
no_action_approval = (
|
||||
approval_action is not None
|
||||
and is_no_action_approval_action(approval_action)
|
||||
)
|
||||
if no_action_approval:
|
||||
status_emoji = "🟠"
|
||||
suffix = "已記錄;此卡沒有可執行修復,等待補修復候選"
|
||||
else:
|
||||
@@ -8731,16 +8883,25 @@ class TelegramGateway:
|
||||
status_emoji = "❌"
|
||||
status_text = f"<b>已拒絕</b> by {_html.escape(username)}"
|
||||
suffix = ""
|
||||
no_action_approval = False
|
||||
|
||||
status_line = f"{status_emoji} {status_text} {suffix}".strip()
|
||||
|
||||
if orig_msg_id:
|
||||
try:
|
||||
# 1. 移除批准/拒絕按鈕(只保留資訊按鈕列)
|
||||
info_buttons = [[
|
||||
{"text": "📋 詳情", "callback_data": f"detail:{incident_id}"},
|
||||
{"text": "📊 歷史", "callback_data": f"history:{incident_id}"},
|
||||
]]
|
||||
if no_action_approval:
|
||||
info_buttons = [[
|
||||
{"text": "🧰 處置包", "callback_data": f"detail:{incident_id}"},
|
||||
{"text": "🔄 重診", "callback_data": f"reanalyze:{incident_id}"},
|
||||
], [
|
||||
{"text": "📊 歷史", "callback_data": f"history:{incident_id}"},
|
||||
]]
|
||||
else:
|
||||
info_buttons = [[
|
||||
{"text": "📋 詳情", "callback_data": f"detail:{incident_id}"},
|
||||
{"text": "📊 歷史", "callback_data": f"history:{incident_id}"},
|
||||
]]
|
||||
awooop_row = _awooop_truth_chain_button_row(incident_id)
|
||||
if awooop_row:
|
||||
info_buttons.append(awooop_row)
|
||||
|
||||
@@ -28,6 +28,33 @@ def test_action_required_card_exposes_ai_automation_on_fallback() -> None:
|
||||
assert "執行:<code>no_action_or_observe</code>" in body
|
||||
|
||||
|
||||
def test_repair_candidate_missing_card_exposes_manual_handoff_package() -> None:
|
||||
message = TelegramMessage(
|
||||
status_emoji="ℹ️",
|
||||
risk_level="LOW",
|
||||
resource_name="node-exporter-188",
|
||||
root_cause="AI 選擇不執行修復,需人工判斷是否接手",
|
||||
suggested_action="NO_ACTION - REPAIR_CANDIDATE_MISSING: LLM 分析失敗,尚未產生安全可執行修復指令",
|
||||
estimated_downtime="unknown",
|
||||
approval_id="test-approval-id",
|
||||
incident_id="INC-20260611-34BBF5",
|
||||
primary_responsibility="INFRA",
|
||||
confidence=0.0,
|
||||
alert_category="host_resource",
|
||||
)
|
||||
|
||||
body = message.format()
|
||||
|
||||
assert "缺少可執行修復候選,已產生人工處置包" in body
|
||||
assert "Mode:<code>repair_candidate_missing_manual_handoff</code>" in body
|
||||
assert "人工處置包" in body
|
||||
assert "補證據:node_exporter target up" in body
|
||||
assert "AwoooP 建立修復候選" in body
|
||||
assert "按鈕:<b>處置包</b>" in body
|
||||
assert "修復候選狀態" in body
|
||||
assert "等待人工批准" not in body
|
||||
|
||||
|
||||
def test_nemotron_card_exposes_same_ai_automation_chain() -> None:
|
||||
message = TelegramMessage(
|
||||
status_emoji="🚨",
|
||||
|
||||
@@ -762,6 +762,8 @@ async def test_build_inline_keyboard_hides_approval_for_no_action() -> None:
|
||||
|
||||
assert "✅ 批准" not in button_texts
|
||||
assert "❌ 拒絕" not in button_texts
|
||||
assert "🧰 處置包" in button_texts
|
||||
assert "🔄 重診" in button_texts
|
||||
assert "🔕 靜默" in button_texts
|
||||
assert {
|
||||
"text": "🧭 Runs",
|
||||
@@ -1170,7 +1172,7 @@ class TestTelegramMessageFormat:
|
||||
assert "AI 已提出修復建議,等待人工批准" in result
|
||||
|
||||
def test_telegram_message_no_action_marks_manual_judgement(self):
|
||||
"""NO_ACTION 卡片必須一眼看得出需要人工判斷。"""
|
||||
"""NO_ACTION 卡片必須一眼看得出需要人工處置包。"""
|
||||
msg = TelegramMessage(
|
||||
status_emoji="ℹ️",
|
||||
risk_level="LOW",
|
||||
@@ -1184,7 +1186,12 @@ class TestTelegramMessageFormat:
|
||||
result = msg.format()
|
||||
|
||||
assert "處置狀態" in result
|
||||
assert "AI 無可安全執行動作,需人工判斷" in result
|
||||
assert "未自動修復,已產生人工處置包" in result
|
||||
assert "人工處置包" in result
|
||||
assert "補證據:node_exporter target up" in result
|
||||
assert "AwoooP 建立修復候選" in result
|
||||
assert "execution result、verifier、KM / PlayBook trust" in result
|
||||
assert "等待人工批准" not in result
|
||||
|
||||
def test_telegram_message_diagnosis_state_is_not_auto_repair(self):
|
||||
"""SSH 只讀診斷 lane 不得被顯示成自動修復。"""
|
||||
|
||||
Reference in New Issue
Block a user