diff --git a/services/auto_heal_service.py b/services/auto_heal_service.py
index 1e23692..9f78eff 100644
--- a/services/auto_heal_service.py
+++ b/services/auto_heal_service.py
@@ -40,6 +40,8 @@ _JUMP_HOST = os.getenv("SSH_JUMP_HOST", "192.168.0.110")
_JUMP_USER = os.getenv("SSH_JUMP_USER", "wooo")
_TARGET_HOST = os.getenv("SSH_TARGET_HOST", "192.168.0.188")
_TARGET_USER = os.getenv("SSH_TARGET_USER", "ollama")
+# SSH 私鑰路徑:優先 env,fallback 到 config 目錄(rw mount,不需重建容器)
+_SSH_KEY_PATH = os.getenv("SSH_KEY_PATH", "/app/config/autoheal_id_ed25519")
# ─── 白名單允許執行的指令前綴 ────────────────────────────
_CMD_WHITELIST = [
@@ -117,9 +119,14 @@ def _execute_ssh_cmd(cmd: str) -> Tuple[bool, str]:
try:
import paramiko
+ import os as _os
+ key_path = _SSH_KEY_PATH if _os.path.isfile(_SSH_KEY_PATH) else None
+ key_kwargs = {"key_filename": key_path} if key_path else {}
+
jump = paramiko.SSHClient()
jump.set_missing_host_key_policy(paramiko.AutoAddPolicy())
- jump.connect(_JUMP_HOST, username=_JUMP_USER, timeout=10)
+ jump.connect(_JUMP_HOST, username=_JUMP_USER, timeout=10,
+ look_for_keys=True, **key_kwargs)
# 透過跳板機建立隧道
transport = jump.get_transport()
@@ -129,7 +136,8 @@ def _execute_ssh_cmd(cmd: str) -> Tuple[bool, str]:
target = paramiko.SSHClient()
target.set_missing_host_key_policy(paramiko.AutoAddPolicy())
- target.connect(_TARGET_HOST, username=_TARGET_USER, sock=chan, timeout=15)
+ target.connect(_TARGET_HOST, username=_TARGET_USER, sock=chan, timeout=15,
+ look_for_keys=True, **key_kwargs)
_stdin, stdout, stderr = target.exec_command(cmd, timeout=60)
out = stdout.read().decode("utf-8", errors="replace").strip()
@@ -224,6 +232,8 @@ class AutoHealService:
)
session.add(incident)
session.commit()
+ session.refresh(incident)
+ session.expunge(incident)
sys_log.info(f"[AutoHeal] 建立 Incident id={incident.id} type={error_type}")
return incident
except Exception as e:
@@ -379,7 +389,7 @@ class AutoHealService:
except Exception as e:
session.rollback()
sys_log.error(f"[AutoHeal] write_heal_log 失敗: {e}")
- return HealLog(result=result, action_detail=action_detail)
+ return HealLog(result=result, action_detail=action_detail, duration_ms=duration_ms)
finally:
session.close()
@@ -422,19 +432,35 @@ class AutoHealService:
# ── 步驟 7:Telegram 通知 ───────────────────────────
def _notify_telegram(self, incident: Incident, playbook: Playbook,
heal_log: HealLog) -> None:
- """推播修復結果通知"""
- icon = {"success": "✅", "failed": "❌", "skipped": "⏭️"}.get(heal_log.result, "❓")
sev_icon = {"P1": "🔴", "P2": "🟠", "P3": "🟡"}.get(incident.severity, "⚪")
+ result = heal_log.result
+
+ if result == "success":
+ header = f"✅ [AIOps] 自動修復成功"
+ footer = f"💾 知識已沉澱至 KM"
+ elif result == "failed":
+ header = f"🚨 [AIOps] 自動修復失敗 — 需人工介入"
+ footer = (
+ f"⚠️ 修復指令回傳錯誤,請登入 188 手動排查:\n"
+ f"docker restart {playbook.get_action_params().get('container', '?')}\n"
+ f"🆔 Incident #{incident.id}"
+ )
+ else:
+ header = f"⏭️ [AIOps] 修復已略過"
+ footer = f"🆔 Incident #{incident.id}"
msg = (
- f"{sev_icon} [EwoooC AIOps] 自動修復報告\n\n"
- f"📌 任務:{incident.task_name}\n"
- f"🚨 錯誤類型:{incident.error_type}\n"
- f"📝 症狀:{incident.error_message[:200]}\n\n"
+ f"{sev_icon} {header}\n"
+ f"━━━━━━━━━━━━━━━━━━\n"
+ f"📌 {incident.task_name}\n"
+ f"🔖 {incident.error_type} · {incident.severity}\n"
+ f"📝 {incident.error_message[:180]}\n"
+ f"━━━━━━━━━━━━━━━━━━\n"
f"🔧 PlayBook:{playbook.name}\n"
f"⚙️ 動作:{heal_log.action_detail}\n"
- f"{icon} 結果:{heal_log.result}({heal_log.duration_ms:.0f}ms)\n\n"
- f"💾 已沉澱至 KM(auto_heal_playbook)"
+ f"⏱ 耗時:{heal_log.duration_ms:.0f}ms\n"
+ f"━━━━━━━━━━━━━━━━━━\n"
+ f"{footer}"
)
_send_telegram(msg)