diff --git a/apps/api/src/core/prompts.py b/apps/api/src/core/prompts.py
index 66f94c3b..6dec0473 100644
--- a/apps/api/src/core/prompts.py
+++ b/apps/api/src/core/prompts.py
@@ -120,6 +120,24 @@ The `alertname` field is your PRIMARY signal. Use it to determine the problem ty
 **NEVER** use `kubectl rollout restart deployment/awoooi-prod` for database, storage, or network alerts.
 Make `action_title` describe the ACTUAL problem from alertname (not generic "自動修復 AWOOOI 服務").
 
+## 🧪 Evidence-First Protocol (CRITICAL — overrides intuition)
+
+If the prompt contains a `<raw_evidence>` block, you MUST:
+1. **Read it first** before forming any hypothesis.
+2. **Quote specific lines** from the evidence in your `reasoning` to show you used it.
+3. **Never contradict** the evidence — if kubectl shows 2 pods running, do NOT say pods are down.
+4. **Adjust confidence** based on evidence quality:
+   - Evidence clearly confirms root cause → 0.80–0.95
+   - Evidence partially supports → 0.60–0.79
+   - No evidence or contradictory → 0.30–0.59 (set `primary_responsibility = "COLLAB"`)
+
+## 🔍 Skepticism Rules
+
+- **Forbidden**: Recommending `kubectl rollout restart` when evidence shows the pod is healthy.
+- **Forbidden**: Claiming OOM without memory metrics proving it.
+- **Forbidden**: Setting `confidence > 0.75` when `<raw_evidence>` is absent or shows "error".
+- If you have no concrete evidence, set `suggested_action = "INVESTIGATE"` and provide a diagnostic `kubectl_command` (get/describe/logs/top only).
+
 ## 🔥 Short Example: High CPU -> SCALE_DEPLOYMENT, HPA, risk_level=medium
 Please carefully justify your confidence between 0.0 and 1.0 (e.g. 0.82) based on symptoms and metrics.
 
diff --git a/apps/api/src/services/consensus_engine.py b/apps/api/src/services/consensus_engine.py
index 164fee1e..4253078e 100644
--- a/apps/api/src/services/consensus_engine.py
+++ b/apps/api/src/services/consensus_engine.py
@@ -192,26 +192,26 @@ class SREAgent(ExpertAgent):
         alert_names = " ".join([s.alert_name.lower() for s in incident.signals])
         target = incident.affected_services[0] if incident.affected_services else "unknown"
 
-        # SRE 規則引擎
+        # SRE 規則引擎 — confidence 依關鍵字明確度定
         if any(kw in alert_names for kw in ["crash", "restart", "oom", "killed"]):
             action = "重新啟動服務以恢復穩定性"
             kubectl = f"kubectl rollout restart deployment/{target} -n awoooi-prod"
-            confidence = 0.0  # 🔴 規則匹配，非 AI 分析
+            confidence = 0.72  # 明確崩潰訊號，規則高可信
             risk = "medium"
         elif any(kw in alert_names for kw in ["latency", "slow", "timeout"]):
             action = "擴展副本數以分散負載"
             kubectl = f"kubectl scale deployment/{target} --replicas=3 -n awoooi-prod"
-            confidence = 0.0  # 🔴 規則匹配，非 AI 分析
+            confidence = 0.65  # 效能問題，可能多因，中等可信
             risk = "low"
         elif any(kw in alert_names for kw in ["cpu", "memory", "resource"]):
             action = "調整資源限制或擴展副本"
             kubectl = f"kubectl scale deployment/{target} --replicas=2 -n awoooi-prod"
-            confidence = 0.0  # 🔴 規則匹配，非 AI 分析
+            confidence = 0.68  # 資源告警，指標明確
             risk = "medium"
         else:
             action = "進行安全重啟以排除未知問題"
             kubectl = f"kubectl rollout restart deployment/{target} -n awoooi-prod"
-            confidence = 0.0  # 🔴 規則匹配，非 AI 分析
+            confidence = 0.45  # 無明確訊號，低可信保守處理
             risk = "medium"
 
         return AgentOpinion(
@@ -236,7 +236,6 @@ class SecurityAgent(ExpertAgent):
 
     async def analyze(self, incident: Incident) -> AgentOpinion:
         """資安視角分析"""
-        _target = incident.affected_services[0] if incident.affected_services else "unknown"
         alert_names = " ".join([s.alert_name.lower() for s in incident.signals])
 
         # 資安掃描
@@ -250,11 +249,11 @@ class SecurityAgent(ExpertAgent):
 
         if security_concerns:
             action = "建議先隔離受影響服務，啟用 NetworkPolicy 限制"
-            confidence = 0.0  # 🔴 規則匹配，非 AI 分析
+            confidence = 0.80  # 安全關鍵字強命中，資安規則高可信
             risk = "critical"
         else:
             action = "無明顯資安風險，建議 SRE 處理"
-            confidence = 0.0  # 🔴 規則匹配，非 AI 分析
+            confidence = 0.60  # 排除確認，中等可信
             risk = "low"
 
         return AgentOpinion(
@@ -289,7 +288,7 @@ class CostAgent(ExpertAgent):
             agent_type=self.agent_type,
             action=action,
             reasoning="FinOps 分析: 使用 HPA 可在負載降低後自動縮減，相比固定擴容可節省約 40% 成本",
-            confidence=0.0,  # 🔴 規則匹配，非 AI 分析
+            confidence=0.55,  # 通用建議，非症狀驅動，保守可信
             risk_assessment="成本風險: low，使用 HPA 可自動調節",
             kubectl_command=kubectl,
             priority=4,
@@ -313,11 +312,11 @@ class PerformanceAgent(ExpertAgent):
         if any(kw in alert_names for kw in ["latency", "p99", "slow"]):
             action = "建議增加資源限制並啟用 PodDisruptionBudget"
             kubectl = f"kubectl patch deployment/{target} -n awoooi-prod -p '{{\"spec\":{{\"template\":{{\"spec\":{{\"containers\":[{{\"name\":\"{target}\",\"resources\":{{\"limits\":{{\"cpu\":\"2\",\"memory\":\"2Gi\"}}}}}}]}}}}}}}}'"
-            confidence = 0.0  # 🔴 規則匹配，非 AI 分析
+            confidence = 0.70  # 效能關鍵字明確命中
         else:
             action = "當前效能指標正常，建議觀察"
             kubectl = None
-            confidence = 0.0  # 🔴 規則匹配，非 AI 分析
+            confidence = 0.50  # 無效能異常，不確定，低權重
 
         return AgentOpinion(
             agent_type=self.agent_type,
@@ -483,7 +482,7 @@ class ConsensusEngine:
         """將 action 正規化到類別"""
         action_lower = action.lower()
 
-        if any(kw in action_lower for kw in ["重啟", "restart"]):
+        if any(kw in action_lower for kw in ["重啟", "重新啟動", "restart"]):
             return "RESTART"
         elif any(kw in action_lower for kw in ["擴展", "scale", "副本"]):
             return "SCALE"
diff --git a/apps/api/src/services/openclaw.py b/apps/api/src/services/openclaw.py
index 98898626..fe844fad 100644
--- a/apps/api/src/services/openclaw.py
+++ b/apps/api/src/services/openclaw.py
@@ -1360,19 +1360,26 @@ Trace URL: {signoz_trace_url}
             else "\n\n## ⚠️ 無法取得叢集清單，target_resource 請依 alertname 推斷，勿編造。\n"
         )
 
+        # P2.1 fix 2026-04-24 ogt + Claude Sonnet 4.6: 提取 MCP evidence_summary 注入 prompt
+        # diagnosis_context 由 decision_manager 在呼叫前填入（pre_decision_investigator 產出）
+        _raw_evidence = alert_context.get("diagnosis_context", "") or ""
+        if _raw_evidence and not _raw_evidence.startswith("<raw_evidence>"):
+            _raw_evidence = f"<raw_evidence>\n{_raw_evidence}\n</raw_evidence>"
+        evidence_section = f"\n\n## 🔬 MCP 實時環境證據\n{_raw_evidence}\n" if _raw_evidence else ""
+
         # 格式化告警為 Prompt (2026-03-31 ogt: 強力截斷以符合 NVIDIA 4K 限制)
         # 優先保留 System Prompt，截斷 Alert Data
-        available_len = 3500 - len(OPENCLAW_SYSTEM_PROMPT) - len(signoz_context) - len(k8s_section)
+        available_len = 3500 - len(OPENCLAW_SYSTEM_PROMPT) - len(signoz_context) - len(k8s_section) - len(evidence_section)
         if available_len < 500:
              # 如果 SignOz 太長，也截斷它
              signoz_context = signoz_context[:500] + "... (truncated)"
-             available_len = 3500 - len(OPENCLAW_SYSTEM_PROMPT) - len(signoz_context) - len(k8s_section)
+             available_len = 3500 - len(OPENCLAW_SYSTEM_PROMPT) - len(signoz_context) - len(k8s_section) - len(evidence_section)
 
         alert_json = json.dumps(alert_context, ensure_ascii=False, indent=2)
         if len(alert_json) > available_len:
             alert_json = alert_json[:available_len] + "... (truncated)"
 
-        full_prompt = OPENCLAW_SYSTEM_PROMPT + signoz_context + k8s_section + "\n\n## Alert Data:\n" + alert_json
+        full_prompt = OPENCLAW_SYSTEM_PROMPT + signoz_context + k8s_section + evidence_section + "\n\n## Alert Data:\n" + alert_json
 
         logger.info(
             "openclaw_alert_analysis_start",