diff --git a/apps/api/alert_rules.yaml b/apps/api/alert_rules.yaml
index bfb8a4c8..02f97c17 100644
--- a/apps/api/alert_rules.yaml
+++ b/apps/api/alert_rules.yaml
@@ -108,25 +108,53 @@ rules:
       reasoning: "[規則匹配] Pod OOMKilled 後 ReplicaSet 將自動重建，但需同步修正資源配置防止復發。"
 
   # 2026-04-12 ogt: Host CPU 告警獨立規則 — node_exporter 告警無 pod/deployment label
-  #   原本放在 high_cpu 規則導致 {target}="unknown" → auto-repair 安全攔截
-  #   host 告警只能通知，不能 kubectl scale
-  - id: host_cpu_high
+  # 2026-04-16 ogt + Claude Sonnet 4.6: 補齊主機層所有常見 Prometheus alertname
+  #   原則：主機層告警 = 只能通知 + 建議 SSH 排查，絕對禁止 kubectl restart
+  - id: host_resource_alert
     priority: 45
-    description: Host 主機 CPU 使用率過高 (node_exporter，非 K8s workload)
+    description: Host 主機資源告警 (node_exporter — CPU/記憶體/負載/磁碟增長，非 K8s workload)
     match:
       alertname:
+        # CPU 相關
         - HostHighCpuLoad
         - NodeCPUUsageHigh
         - NodeHighCpuLoad
+        # 負載相關
+        - HostHighLoadAverage
+        - NodeLoadAverageHigh
+        - HostLoadAverageHigh
+        # 記憶體相關
+        - HostOutOfMemory
+        - HostMemoryUnderMemoryPressure
+        - HostMemoryUsageHigh
+        - NodeMemoryPressure
+        # 磁碟 I/O 相關
+        - HostUnusualDiskReadLatency
+        - HostUnusualDiskWriteLatency
+        - HostUnusualDiskReadRate
+        - HostUnusualDiskWriteRate
+        - HostDiskWillFillIn24Hours
+        - HostOutOfDiskSpace
+        # 網路相關
+        - HostUnusualNetworkThroughputIn
+        - HostUnusualNetworkThroughputOut
+        # 系統服務
+        - HostSystemdServiceCrashed
+        - HostKernelVersionDeviations
+        - HostOomKillDetected
+        - HostEdacCorrectableErrors
+        - HostEdacUncorrectableErrors
+        - HostClockSkewDetected
+        - HostClockNotSynchronising
     response:
-      action_title: "Host {host} CPU 過高 — 需排查高 CPU 進程"
-      description: "⚠️ 主機 {host} CPU 使用率超標。此為主機層告警，需 SSH 登入排查 (top / ps aux)。常見原因: Ollama 推理、DB 查詢、K3s GC。"
+      action_title: "⚠️ 主機告警 — 需 SSH 人工排查"
+      description: "⚠️ 主機層告警（node_exporter）。此告警源自主機資源，無法透過 kubectl 自動修復。請 SSH 登入主機排查根因：top / htop / df -h / journalctl -xe。"
       suggested_action: NO_ACTION
       kubectl_command: ""
       estimated_downtime: "N/A"
       risk: low
       responsibility: INFRA
-      reasoning: "[規則匹配] 主機 CPU 告警無法自動修復，需人工確認高 CPU 進程後決策。"
+      reasoning: "[規則匹配] 主機層資源告警無法自動修復，需人工登入確認高負載/高記憶體/磁碟根因後決策。禁止 kubectl restart（node_exporter 不是 K8s 服務）。"
 
   - id: high_cpu
     priority: 40
@@ -219,6 +247,34 @@ rules:
 
   # ── 資料庫層 ─────────────────────────────────────────────────
 
+  # 2026-04-16 ogt + Claude Sonnet 4.6: PostgreSQL 監控告警 — 磁碟/資源類，絕對不能重啟
+  # 根因：PostgreSQLDiskGrowthRate 落 generic_fallback → 輸出 kubectl rollout restart postgresql（錯誤！）
+  - id: postgresql_disk_monitoring
+    priority: 68
+    description: PostgreSQL 磁碟/增長率/exporter 監控告警（不重啟資料庫）
+    match:
+      alertname:
+        - PostgreSQLDiskGrowthRate
+        - PostgreSQLDiskUsageHigh
+        - PostgreSQLDiskFull
+        - PostgresExporterDown
+        - PostgreSQLExporterDown
+        - PostgreSQLTableBloat
+        - PostgreSQLVacuumRequired
+        - PostgreSQLReplicationLag
+        - PostgreSQLTooManyConnections
+    response:
+      action_title: "⚠️ PostgreSQL 監控告警 — 需人工排查，禁止重啟"
+      description: "⚠️ PostgreSQL 資源/監控告警。磁碟增長過快或 exporter 異常，重啟資料庫會造成資料風險。請登入排查磁碟用量或 WAL 狀態。"
+      suggested_action: NO_ACTION
+      kubectl_command: "kubectl exec -n {namespace} deployment/postgresql -- psql -U postgres -c 'SELECT pg_database_size(current_database()), pg_size_pretty(pg_database_size(current_database()));'"
+      estimated_downtime: "N/A"
+      risk: medium
+      responsibility: DB
+      responsibility_reasoning: "PostgreSQL 磁碟告警需 DBA 評估，自動重啟資料庫有資料丟失風險，必須人工確認"
+      secondary_teams: [INFRA]
+      reasoning: "[規則匹配] PostgreSQL 磁碟增長/監控告警，絕對禁止自動重啟資料庫。需 DBA 人工確認磁碟用量、WAL 清理、VACUUM 狀態。"
+
   - id: postgresql_down
     priority: 70
     description: PostgreSQL 服務下線
diff --git a/apps/api/pyproject.toml b/apps/api/pyproject.toml
index 2c6c4d00..533fbf26 100644
--- a/apps/api/pyproject.toml
+++ b/apps/api/pyproject.toml
@@ -43,6 +43,9 @@ dependencies = [
     "statsmodels>=0.14.0",
     "drain3>=0.9.11",
     "sse-starlette>=1.8.0",
+    # 2026-04-16 ogt + Claude Sonnet 4.6: SSH MCP sensor 修復 — asyncssh 缺失導致 sensors_succeeded=0
+    # 根因: ssh_provider.py 中 import asyncssh 在 try/except 外，所有 15 個 SSH tool 直接 ImportError
+    "asyncssh>=2.14.0",
 ]
 
 # [tool.uv.sources]
diff --git a/apps/api/src/services/decision_manager.py b/apps/api/src/services/decision_manager.py
index b127ba18..1d1d9949 100644
--- a/apps/api/src/services/decision_manager.py
+++ b/apps/api/src/services/decision_manager.py
@@ -1237,6 +1237,18 @@ class DecisionManager:
             # COMPLETED 狀態: 直接返回，避免重複建立 decision 導致 Telegram 轟炸
             if existing_token.state == DecisionState.COMPLETED:
                 return existing_token
+            # 2026-04-16 ogt + Claude Sonnet 4.6: 修復重複卡片根因 — ANALYZING 未早返回
+            # 問題：多 pod 並發時 pod-A 在 ANALYZING，pod-B/C 發現 ANALYZING 不在返回條件
+            #       → 各自建新 token → 同一 incident 跑 3 次 agent_debate → 送出 3 張 TG 卡
+            # 修復：ANALYZING 狀態也直接返回，避免重複處理
+            if existing_token.state == DecisionState.ANALYZING:
+                logger.debug(
+                    "decision_analyzing_in_progress",
+                    incident_id=incident.incident_id,
+                    token=existing_token.token,
+                    reason="另一個 worker 正在分析中，跳過重複建立",
+                )
+                return existing_token
 
         # ADR-073 Phase 3-1: TYPE-1 triage guard — 純資訊告警跳過 LLM 分析
         # classify_alert_early() 已在 webhook 入口設定 notification_type