feat: add all application source code

- apps/api: FastAPI backend with Dockerfile - apps/web: Next.js frontend with Dockerfile - apps/sensor: Signal collection agent - packages: shared packages Co-Authored-By: Claude <noreply@anthropic.com>
2026-03-22 18:57:44 +08:00
parent a840bf975b
commit 196d269b92
245 changed files with 42207 additions and 6 deletions
--- a/apps/api/scripts/apply_prometheus_config.sh
+++ b/apps/api/scripts/apply_prometheus_config.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+# =============================================================================
+# Prometheus Alertmanager 自動對接腳本
+# =============================================================================
+# Phase 5: Shadow Mode - 自動化環境對接
+#
+# 功能:
+# 1. 建立 Alertmanager ConfigMap
+# 2. 套用至 K3s 叢集
+# 3. 自動重載 Alertmanager
+#
+# 使用方式:
+#   ./scripts/apply_prometheus_config.sh
+#
+# 前提條件:
+# - kubectl 已配置並可連線至 K3s (192.168.0.120)
+# - 有權限操作 monitoring namespace
+#
+# Tier 2 授權: 此腳本會變更 K3s 環境，需統帥授權
+# =============================================================================
+
+set -euo pipefail
+
+# -----------------------------------------------------------------------------
+# Configuration
+# -----------------------------------------------------------------------------
+NAMESPACE="monitoring"
+CONFIGMAP_NAME="alertmanager-awoooi-webhook"
+AWOOOI_WEBHOOK_URL="http://192.168.0.188:8000/api/v1/webhooks/alerts"
+KUBECONFIG_PATH="${KUBECONFIG:-./k3s-prod.yaml}"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# -----------------------------------------------------------------------------
+# Functions
+# -----------------------------------------------------------------------------
+
+log_info() {
+    echo -e "${GREEN}[INFO]${NC} $1"
+}
+
+log_warn() {
+    echo -e "${YELLOW}[WARN]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+check_prerequisites() {
+    log_info "檢查前提條件..."
+
+    # Check kubectl
+    if ! command -v kubectl &> /dev/null; then
+        log_error "kubectl 未安裝"
+        exit 1
+    fi
+
+    # Check kubeconfig
+    if [[ ! -f "$KUBECONFIG_PATH" ]]; then
+        log_error "找不到 kubeconfig: $KUBECONFIG_PATH"
+        exit 1
+    fi
+
+    # Test connection
+    if ! kubectl --kubeconfig="$KUBECONFIG_PATH" cluster-info &> /dev/null; then
+        log_error "無法連線至 K3s 叢集"
+        exit 1
+    fi
+
+    log_info "前提條件檢查通過"
+}
+
+create_namespace_if_not_exists() {
+    log_info "確認 namespace: $NAMESPACE..."
+
+    if ! kubectl --kubeconfig="$KUBECONFIG_PATH" get namespace "$NAMESPACE" &> /dev/null; then
+        log_info "建立 namespace: $NAMESPACE"
+        kubectl --kubeconfig="$KUBECONFIG_PATH" create namespace "$NAMESPACE"
+    else
+        log_info "Namespace $NAMESPACE 已存在"
+    fi
+}
+
+apply_alertmanager_config() {
+    log_info "套用 Alertmanager Webhook 設定..."
+
+    # Create ConfigMap YAML
+    cat <<EOF | kubectl --kubeconfig="$KUBECONFIG_PATH" apply -f -
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: ${CONFIGMAP_NAME}
+  namespace: ${NAMESPACE}
+  labels:
+    app: alertmanager
+    component: awoooi-webhook
+data:
+  alertmanager-webhook.yml: |
+    # =============================================================================
+    # AWOOOI Webhook Receiver Configuration
+    # =============================================================================
+    # 此設定檔定義 Alertmanager 如何將告警轉發至 AWOOOI OpenClaw
+    #
+    # 用法: 將此內容合併至主 alertmanager.yml 的 receivers 區段
+    # =============================================================================
+
+    receivers:
+      - name: 'awoooi-openclaw'
+        webhook_configs:
+          - url: '${AWOOOI_WEBHOOK_URL}'
+            send_resolved: true
+            max_alerts: 10
+            # 5 秒超時
+            http_config:
+              follow_redirects: true
+
+    # 路由規則範例 (合併至主設定):
+    # route:
+    #   receiver: 'awoooi-openclaw'
+    #   group_by: ['alertname', 'namespace']
+    #   group_wait: 30s
+    #   group_interval: 5m
+    #   repeat_interval: 4h
+    #   routes:
+    #     - match:
+    #         severity: critical
+    #       receiver: 'awoooi-openclaw'
+    #       group_wait: 10s
+EOF
+
+    log_info "ConfigMap ${CONFIGMAP_NAME} 已套用"
+}
+
+reload_alertmanager() {
+    log_info "嘗試重載 Alertmanager..."
+
+    # Find Alertmanager pod
+    ALERTMANAGER_POD=$(kubectl --kubeconfig="$KUBECONFIG_PATH" get pods -n "$NAMESPACE" \
+        -l app=alertmanager -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
+
+    if [[ -z "$ALERTMANAGER_POD" ]]; then
+        log_warn "找不到 Alertmanager Pod (可能尚未部署)"
+        log_info "ConfigMap 已建立，待 Alertmanager 部署後可手動合併設定"
+        return 0
+    fi
+
+    # Trigger reload via /-/reload endpoint
+    log_info "觸發 Alertmanager 設定重載..."
+    kubectl --kubeconfig="$KUBECONFIG_PATH" exec -n "$NAMESPACE" "$ALERTMANAGER_POD" -- \
+        wget -q -O- --post-data='' http://localhost:9093/-/reload 2>/dev/null || true
+
+    log_info "Alertmanager 重載完成"
+}
+
+verify_config() {
+    log_info "驗證 ConfigMap..."
+
+    kubectl --kubeconfig="$KUBECONFIG_PATH" get configmap "$CONFIGMAP_NAME" -n "$NAMESPACE" -o yaml
+
+    log_info "驗證完成"
+}
+
+# -----------------------------------------------------------------------------
+# Main
+# -----------------------------------------------------------------------------
+
+main() {
+    echo "============================================================"
+    echo "  AWOOOI Prometheus Alertmanager 自動對接"
+    echo "============================================================"
+    echo ""
+    echo "目標: 將 Webhook 設定套用至 K3s 叢集"
+    echo "Webhook URL: $AWOOOI_WEBHOOK_URL"
+    echo "Namespace: $NAMESPACE"
+    echo ""
+
+    check_prerequisites
+    create_namespace_if_not_exists
+    apply_alertmanager_config
+    reload_alertmanager
+    verify_config
+
+    echo ""
+    echo "============================================================"
+    echo "  對接完成"
+    echo "============================================================"
+    echo ""
+    log_info "ConfigMap 已建立: $NAMESPACE/$CONFIGMAP_NAME"
+    log_info "下一步: 將 receiver 設定合併至 alertmanager.yml"
+    log_info "測試: 使用 scripts/fire_live_alert.py 發射測試告警"
+}
+
+main "$@"
--- a/apps/api/scripts/demo_multisig.py
+++ b/apps/api/scripts/demo_multisig.py
@@ -0,0 +1,265 @@
+#!/usr/bin/env python3
+"""
+CISO-101 Multi-Sig Demo Script
+==============================
+展示 CRITICAL 任務從發起到完成的完整信任鏈生命週期
+
+流程:
+1. ClawBot 發起 CRITICAL 操作 (DROP TABLE)
+2. 第一位簽核者簽核 → 仍為 PENDING (1/2)
+3. 第二位簽核者簽核 → 轉為 APPROVED → 觸發執行
+
+執行方式:
+    cd apps/api
+    source .venv/bin/activate
+    python scripts/demo_multisig.py
+"""
+
+import sys
+from pathlib import Path
+from datetime import datetime, timezone, timedelta
+
+# Add parent to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from src.models.approval import (
+    ApprovalRequestCreate,
+    ApprovalStatus,
+    RiskLevel,
+    BlastRadius,
+    DataImpact,
+    DryRunCheck,
+)
+from src.core.trust_engine import TrustEngine, get_required_signatures
+
+
+def print_header(title: str) -> None:
+    """Print a formatted header"""
+    print("\n" + "=" * 60)
+    print(f"  {title}")
+    print("=" * 60)
+
+
+def print_approval_status(approval) -> None:
+    """Print approval status summary"""
+    print(f"""
+    ID:              {approval.id}
+    Action:          {approval.action}
+    Status:          {approval.status.value.upper()}
+    Risk Level:      {approval.risk_level.value.upper()}
+    Required Sigs:   {approval.required_signatures}
+    Current Sigs:    {approval.current_signatures}
+    Is Fully Signed: {approval.is_fully_signed}
+    """)
+
+    if approval.signatures:
+        print("    Signatures:")
+        for sig in approval.signatures:
+            print(f"      - {sig.signer_name} ({sig.signer_id}) at {sig.signed_at.strftime('%H:%M:%S')}")
+            if sig.comment:
+                print(f"        Comment: {sig.comment}")
+
+
+def main():
+    """Run the Multi-Sig demo"""
+
+    print_header("CISO-101 Multi-Sig Trust Engine Demo")
+    print("""
+    This demo shows the complete CRITICAL approval lifecycle:
+
+    1. ClawBot initiates a CRITICAL operation (DROP TABLE)
+    2. First signer signs → Still PENDING (1/2)
+    3. Second signer signs → APPROVED → Execution triggered
+    """)
+
+    # ==========================================================================
+    # Step 0: Show signature requirements
+    # ==========================================================================
+    print_header("Step 0: Signature Requirements")
+    print("""
+    Risk Level    Required Signatures
+    ----------    -------------------
+    LOW           0 (Auto-approve)
+    MEDIUM        1
+    CRITICAL      2 (Multi-Sig)
+    """)
+
+    for level in RiskLevel:
+        req = get_required_signatures(level)
+        print(f"    {level.value.upper():10} → {req} signature(s)")
+
+    # ==========================================================================
+    # Step 1: Create CRITICAL approval request
+    # ==========================================================================
+    print_header("Step 1: ClawBot Initiates CRITICAL Operation")
+
+    # Track approved requests
+    approved_requests = []
+
+    def on_approved(approval):
+        approved_requests.append(approval)
+        print(f"\n    🚀 EXECUTION TRIGGERED: {approval.action}")
+
+    def on_rejected(approval):
+        print(f"\n    ❌ REJECTED: {approval.rejection_reason}")
+
+    engine = TrustEngine(
+        on_approved=on_approved,
+        on_rejected=on_rejected,
+    )
+
+    # Create the CRITICAL request
+    request = ApprovalRequestCreate(
+        action="DROP TABLE user_sessions",
+        description="清除所有用戶 session 以強制重新登入。此操作將影響所有線上用戶。",
+        risk_level=RiskLevel.CRITICAL,
+        blast_radius=BlastRadius(
+            affected_pods=0,
+            estimated_downtime="0",
+            related_services=["auth-service", "api-gateway", "user-service"],
+            data_impact=DataImpact.DESTRUCTIVE,
+        ),
+        dry_run_checks=[
+            DryRunCheck(name="RBAC Check", passed=True, message="db-admin"),
+            DryRunCheck(name="Syntax Check", passed=True),
+            DryRunCheck(name="Backup Available", passed=False, message="No recent backup!"),
+        ],
+        requested_by="ClawBot",
+        expires_at=datetime.now(timezone.utc) + timedelta(hours=1),
+    )
+
+    approval = engine.create_approval(request)
+
+    print(f"""
+    ClawBot 發起 CRITICAL 操作請求:
+
+    動作:     {request.action}
+    描述:     {request.description}
+    風險等級: {request.risk_level.value.upper()}
+    資料影響: {request.blast_radius.data_impact.value.upper()}
+    """)
+
+    print_approval_status(approval)
+
+    # ==========================================================================
+    # Step 2: First signer signs
+    # ==========================================================================
+    print_header("Step 2: First Signer (Alice) Signs")
+
+    approval, message, triggered = engine.sign_approval(
+        approval_id=approval.id,
+        signer_id="alice-001",
+        signer_name="Alice Chen (CTO)",
+        comment="已確認風險，建議在低流量時段執行",
+    )
+
+    print(f"""
+    Alice (CTO) 已簽核:
+
+    結果:     {message}
+    觸發執行: {triggered}
+    """)
+
+    print_approval_status(approval)
+
+    assert approval.status == ApprovalStatus.PENDING, "Should still be PENDING after first signature"
+    assert approval.current_signatures == 1, "Should have 1 signature"
+    assert not triggered, "Should not trigger execution yet"
+
+    # ==========================================================================
+    # Step 3: Second signer signs
+    # ==========================================================================
+    print_header("Step 3: Second Signer (Bob) Signs - Multi-Sig Complete")
+
+    approval, message, triggered = engine.sign_approval(
+        approval_id=approval.id,
+        signer_id="bob-002",
+        signer_name="Bob Wu (CISO)",
+        comment="CISO 核准。已通知 DBA 團隊待命。",
+    )
+
+    print(f"""
+    Bob (CISO) 已簽核:
+
+    結果:     {message}
+    觸發執行: {triggered}
+    """)
+
+    print_approval_status(approval)
+
+    assert approval.status == ApprovalStatus.APPROVED, "Should be APPROVED after second signature"
+    assert approval.current_signatures == 2, "Should have 2 signatures"
+    assert approval.is_fully_signed, "Should be fully signed"
+    assert triggered, "Should trigger execution"
+
+    # ==========================================================================
+    # Step 4: Verify final state
+    # ==========================================================================
+    print_header("Step 4: Verification")
+
+    pending = engine.get_pending_approvals()
+
+    print(f"""
+    驗證結果:
+
+    ✅ 待簽核清單數量: {len(pending)} (應為 0)
+    ✅ 已批准請求數量: {len(approved_requests)} (應為 1)
+    ✅ 最終狀態: {approval.status.value.upper()}
+    ✅ 簽核數: {approval.current_signatures}/{approval.required_signatures}
+    ✅ 解決時間: {approval.resolved_at.strftime('%Y-%m-%d %H:%M:%S') if approval.resolved_at else 'N/A'}
+    """)
+
+    # ==========================================================================
+    # Bonus: Demo LOW risk auto-approval
+    # ==========================================================================
+    print_header("Bonus: LOW Risk Auto-Approval Demo")
+
+    low_request = ApprovalRequestCreate(
+        action="Scale deployment api-backend to 5 replicas",
+        description="增加後端服務副本數以應對流量增長",
+        risk_level=RiskLevel.LOW,
+        blast_radius=BlastRadius(
+            affected_pods=5,
+            estimated_downtime="0",
+            related_services=["api-backend"],
+            data_impact=DataImpact.NONE,
+        ),
+        dry_run_checks=[
+            DryRunCheck(name="Resource Check", passed=True, message="5/20 pods"),
+        ],
+        requested_by="ClawBot",
+    )
+
+    low_approval = engine.create_approval(low_request)
+
+    print(f"""
+    LOW 風險操作自動放行:
+
+    動作:     {low_request.action}
+    風險等級: LOW
+    狀態:     {low_approval.status.value.upper()} (自動批准!)
+    簽核數:   {low_approval.required_signatures} (不需要簽核)
+    """)
+
+    assert low_approval.status == ApprovalStatus.APPROVED, "LOW risk should be auto-approved"
+
+    # ==========================================================================
+    # Summary
+    # ==========================================================================
+    print_header("Demo Complete!")
+    print("""
+    CISO-101 Multi-Sig Trust Engine 功能驗證完成:
+
+    ✅ 風險等級分類 (LOW/MEDIUM/CRITICAL)
+    ✅ 簽核數自動判定 (0/1/2)
+    ✅ LOW 風險自動放行
+    ✅ CRITICAL 雙重簽核 (Multi-Sig)
+    ✅ 狀態機正確轉換 (PENDING → APPROVED)
+    ✅ 簽核完成觸發執行回調
+
+    信任鏈完整性已驗證。
+    """)
+
+
+if __name__ == "__main__":
+    main()
--- a/apps/api/scripts/e2e_openclaw_test.py
+++ b/apps/api/scripts/e2e_openclaw_test.py
@@ -0,0 +1,246 @@
+#!/usr/bin/env python3
+"""
+Phase 5 E2E 點火測試 - OpenClaw 全鏈路驗證
+==========================================
+
+測試流程:
+1. 發射模擬 K8s 告警到 Webhook
+2. 驗證告警被正確處理
+3. 驗證 ApprovalRecord 被建立
+4. 模擬 Telegram 簽核回調
+5. 驗證執行觸發
+
+使用方式:
+    python scripts/e2e_openclaw_test.py
+"""
+
+import asyncio
+import json
+import sys
+from datetime import datetime
+
+
+def print_header(title: str) -> None:
+    """列印測試標題"""
+    print("\n" + "=" * 60)
+    print(f"  {title}")
+    print("=" * 60)
+
+
+def print_step(step: int, description: str) -> None:
+    """列印測試步驟"""
+    print(f"\n🔹 Step {step}: {description}")
+
+
+def print_success(message: str) -> None:
+    """列印成功訊息"""
+    print(f"   ✅ {message}")
+
+
+def print_error(message: str) -> None:
+    """列印錯誤訊息"""
+    print(f"   ❌ {message}")
+
+
+def print_info(message: str) -> None:
+    """列印資訊訊息"""
+    print(f"   ℹ️  {message}")
+
+
+async def test_phase5_e2e():
+    """Phase 5 E2E 測試"""
+    print_header("Phase 5 E2E 點火測試 - OpenClaw 全鏈路驗證")
+    print(f"執行時間: {datetime.now().isoformat()}")
+
+    # =========================================================================
+    # Step 1: 測試 LogLevelFilter (日誌清洗)
+    # =========================================================================
+    print_step(1, "日誌清洗模組 (LogLevelFilter)")
+
+    try:
+        from src.services.context_gatherer import LogLevelFilter
+
+        # 模擬 K8s 日誌
+        raw_logs = """
+2024-03-21T10:15:23.456Z INFO  [harbor.core] Starting Harbor Core
+2024-03-21T10:15:24.789Z DEBUG [harbor.core.db] Initializing connection pool
+2024-03-21T10:16:45.123Z ERROR [harbor.core.db] Connection lost to PostgreSQL
+2024-03-21T10:16:45.456Z FATAL [harbor.core] Unrecoverable error
+Traceback (most recent call last):
+  File "/harbor/core/db.py", line 234, in connect
+    raise DatabaseConnectionError("Max retries exceeded")
+        """.strip()
+
+        filtered = LogLevelFilter.filter_logs(raw_logs)
+        stats = LogLevelFilter.get_filter_stats(raw_logs, filtered)
+
+        # 驗證 DEBUG/INFO 被過濾
+        assert "DEBUG" not in filtered, "DEBUG should be filtered"
+        assert "INFO" not in filtered.replace("DatabaseConnectionError", ""), "INFO should be filtered"
+        assert "ERROR" in filtered, "ERROR should be preserved"
+        assert "FATAL" in filtered, "FATAL should be preserved"
+        assert "Traceback" in filtered, "Stacktrace should be preserved"
+
+        print_success(f"日誌清洗成功: {stats['original_lines']} → {stats['filtered_lines']} 行")
+        print_success(f"雜訊移除率: {stats['removal_rate_percent']}%")
+
+    except Exception as e:
+        print_error(f"日誌清洗測試失敗: {e}")
+        return False
+
+    # =========================================================================
+    # Step 2: 測試 Security Interceptor (白名單 + Nonce)
+    # =========================================================================
+    print_step(2, "安全攔截器 (Security Interceptor)")
+
+    try:
+        from src.services.security_interceptor import (
+            TelegramSecurityInterceptor,
+            UserNotWhitelistedError,
+            NonceReplayError,
+        )
+        from src.core.config import settings
+
+        interceptor = TelegramSecurityInterceptor()
+
+        # 測試白名單 (假設統帥 ID: 5619078117)
+        test_user_id = 5619078117
+
+        # 檢查白名單配置
+        whitelist = settings.OPENCLAW_TG_USER_WHITELIST
+        print_info(f"白名單配置: {whitelist}")
+
+        if whitelist:
+            is_whitelisted = interceptor.is_whitelisted(test_user_id)
+            if is_whitelisted:
+                print_success(f"統帥 ID {test_user_id} 在白名單內")
+            else:
+                print_info(f"統帥 ID {test_user_id} 不在白名單 (需配置)")
+        else:
+            print_info("白名單為空 (需在環境變數中配置 OPENCLAW_TG_USER_WHITELIST)")
+
+        # 測試 Nonce 產生
+        nonce = interceptor.generate_callback_nonce("test-approval-123", "approve")
+        print_success(f"Nonce 產生成功: {nonce[:30]}...")
+
+        # 解析 Nonce
+        parsed = interceptor.parse_callback_data(nonce)
+        assert parsed["action"] == "approve"
+        assert parsed["approval_id"] == "test-approval-123"
+        print_success("Nonce 解析成功")
+
+    except Exception as e:
+        print_error(f"安全攔截器測試失敗: {e}")
+        return False
+
+    # =========================================================================
+    # Step 3: 測試 Telegram Gateway (訊息格式)
+    # =========================================================================
+    print_step(3, "Telegram Gateway (SOUL.md 訊息格式)")
+
+    try:
+        from src.services.telegram_gateway import TelegramMessage, RISK_EMOJI_MAP
+
+        # 建立測試訊息
+        message = TelegramMessage(
+            status_emoji=RISK_EMOJI_MAP["critical"],
+            risk_level="CRITICAL",
+            resource_name="harbor-core-7d4b8c9f5-xk2m3",
+            root_cause="OOMKilled",
+            suggested_action="DELETE_POD (重啟 Pod)",
+            estimated_downtime="~30s",
+            approval_id="test-approval-123",
+        )
+
+        formatted = message.format()
+
+        # 驗證 SOUL.md 格式
+        assert "🚨" in formatted, "Should have critical emoji"
+        assert "CRITICAL" in formatted, "Should have risk level"
+        assert "harbor-core" in formatted, "Should have resource name"
+        assert "OOMKilled" in formatted, "Should have root cause"
+        assert "建議" in formatted, "Should have suggestion"
+        assert "停機" in formatted, "Should have downtime"
+        assert len(formatted) <= 500, f"Should be <= 500 chars, got {len(formatted)}"
+
+        print_success("SOUL.md 訊息格式驗證通過")
+        print_info(f"訊息長度: {len(formatted)} / 500 字元")
+        print()
+        print("   📱 訊息預覽:")
+        for line in formatted.split("\n"):
+            print(f"      {line}")
+
+    except Exception as e:
+        print_error(f"Telegram Gateway 測試失敗: {e}")
+        return False
+
+    # =========================================================================
+    # Step 4: 測試 OpenClaw 模組載入
+    # =========================================================================
+    print_step(4, "OpenClaw AI 模組載入")
+
+    try:
+        from src.services.openclaw import get_openclaw, OpenClawService
+
+        openclaw = get_openclaw()
+        assert isinstance(openclaw, OpenClawService)
+        print_success("OpenClaw 服務載入成功")
+
+        # 檢查 AI Fallback 順序
+        from src.core.config import settings
+        print_info(f"AI Fallback 順序: {settings.AI_FALLBACK_ORDER}")
+        print_info(f"預設模型: {settings.OPENCLAW_DEFAULT_MODEL}")
+
+    except Exception as e:
+        print_error(f"OpenClaw 模組載入失敗: {e}")
+        return False
+
+    # =========================================================================
+    # Step 5: 測試 Signature 審計欄位
+    # =========================================================================
+    print_step(5, "Signature 審計欄位 (Telegram 擴充)")
+
+    try:
+        from src.models.approval import Signature, SignatureSource
+
+        # 建立 Telegram 簽核記錄
+        sig = Signature(
+            signer_id="tg_5619078117",
+            signer_name="統帥",
+            comment="Telegram 簽核測試",
+            source=SignatureSource.TELEGRAM,
+            telegram_user_id=5619078117,
+            telegram_message_id=12345,
+        )
+
+        assert sig.source == SignatureSource.TELEGRAM
+        assert sig.telegram_user_id == 5619078117
+        print_success("Telegram 審計欄位驗證通過")
+        print_info(f"簽核來源: {sig.source.value}")
+        print_info(f"Telegram User ID: {sig.telegram_user_id}")
+
+    except Exception as e:
+        print_error(f"Signature 審計欄位測試失敗: {e}")
+        return False
+
+    # =========================================================================
+    # 測試完成
+    # =========================================================================
+    print_header("E2E 測試結果")
+    print()
+    print("   ✅ Step 1: 日誌清洗 (LogLevelFilter) - PASSED")
+    print("   ✅ Step 2: 安全攔截器 (Security Interceptor) - PASSED")
+    print("   ✅ Step 3: Telegram Gateway (SOUL.md 格式) - PASSED")
+    print("   ✅ Step 4: OpenClaw AI 模組載入 - PASSED")
+    print("   ✅ Step 5: Signature 審計欄位 - PASSED")
+    print()
+    print("=" * 60)
+    print("  🎉 Phase 5 E2E 點火測試 - 全數通過！")
+    print("=" * 60)
+
+    return True
+
+
+if __name__ == "__main__":
+    success = asyncio.run(test_phase5_e2e())
+    sys.exit(0 if success else 1)
--- a/apps/api/scripts/fire_live_alert.py
+++ b/apps/api/scripts/fire_live_alert.py
@@ -0,0 +1,372 @@
+#!/usr/bin/env python3
+"""
+AWOOOI 實彈射擊腳本 - 自動化告警測試
+=====================================
+Phase 5: Shadow Mode - 自動化實彈演習
+
+功能:
+1. 模擬 Prometheus 格式的 OOMKilled/PodCrash 告警
+2. 自動計算 HMAC-SHA256 簽章
+3. 直接打向本地 Webhook 端點
+4. 驗證回應並輸出結果
+
+使用方式:
+    python scripts/fire_live_alert.py
+
+環境變數:
+    WEBHOOK_HMAC_SECRET: HMAC 簽章密鑰 (必要)
+    AWOOOI_API_URL: API 端點 (預設: http://192.168.0.188:8000)
+
+Tier 2 授權: 此腳本會觸發 AI 分析流程，需統帥授權
+"""
+
+import argparse
+import hashlib
+import hmac
+import json
+import os
+import sys
+from datetime import datetime, timezone
+from typing import Literal
+
+import httpx
+
+
+# =============================================================================
+# Configuration
+# =============================================================================
+
+DEFAULT_API_URL = os.getenv("AWOOOI_API_URL", "http://192.168.0.188:8000")
+WEBHOOK_ENDPOINT = "/api/v1/webhooks/alerts"
+HMAC_SECRET = os.getenv("WEBHOOK_HMAC_SECRET", "")
+
+
+# =============================================================================
+# Alert Templates
+# =============================================================================
+
+ALERT_TEMPLATES = {
+    "oomkilled": {
+        "alert_type": "k8s_pod_crash",
+        "severity": "critical",
+        "source": "prometheus",
+        "target_resource": "harbor-core-7d4b8c9f5-xk2m3",
+        "namespace": "harbor",
+        "message": "Pod terminated due to OOMKilled - Container exceeded memory limit",
+        "metrics": {
+            "memory_percent": 99.8,
+            "restart_count": 5,
+            "memory_limit_mb": 512,
+            "memory_usage_mb": 520,
+        },
+        "labels": {
+            "app": "harbor-core",
+            "deployment": "harbor-core",
+            "pod": "harbor-core-7d4b8c9f5-xk2m3",
+            "container": "harbor-core",
+            "reason": "OOMKilled",
+        },
+    },
+    "podcrash": {
+        "alert_type": "k8s_pod_crash",
+        "severity": "warning",
+        "source": "prometheus",
+        "target_resource": "nginx-ingress-7d6f8c9b5-abc12",
+        "namespace": "ingress-nginx",
+        "message": "Pod CrashLoopBackOff - Container restarting repeatedly",
+        "metrics": {
+            "restart_count": 8,
+            "cpu_percent": 15.2,
+            "memory_percent": 45.0,
+        },
+        "labels": {
+            "app": "nginx-ingress",
+            "deployment": "nginx-ingress-controller",
+            "pod": "nginx-ingress-7d6f8c9b5-abc12",
+        },
+    },
+    "highcpu": {
+        "alert_type": "high_cpu",
+        "severity": "warning",
+        "source": "prometheus",
+        "target_resource": "api-backend-deployment",
+        "namespace": "default",
+        "message": "High CPU usage detected - Pod using 95% of allocated CPU",
+        "metrics": {
+            "cpu_percent": 95.5,
+            "memory_percent": 60.0,
+            "sigma_deviation": 3.2,
+        },
+        "labels": {
+            "app": "api-backend",
+            "deployment": "api-backend",
+        },
+    },
+    "highmemory": {
+        "alert_type": "high_memory",
+        "severity": "warning",
+        "source": "prometheus",
+        "target_resource": "redis-master-0",
+        "namespace": "redis",
+        "message": "High memory usage detected - Pod memory at 92%",
+        "metrics": {
+            "cpu_percent": 25.0,
+            "memory_percent": 92.0,
+            "sigma_deviation": 2.8,
+        },
+        "labels": {
+            "app": "redis",
+            "statefulset": "redis-master",
+        },
+    },
+}
+
+
+# =============================================================================
+# Helper Functions
+# =============================================================================
+
+def compute_hmac_signature(secret: str, payload: bytes) -> str:
+    """計算 HMAC-SHA256 簽章"""
+    signature = hmac.new(
+        secret.encode(),
+        payload,
+        hashlib.sha256,
+    ).hexdigest()
+    return f"sha256={signature}"
+
+
+def print_header(title: str) -> None:
+    """列印標題"""
+    print("\n" + "=" * 60)
+    print(f"  {title}")
+    print("=" * 60)
+
+
+def print_success(message: str) -> None:
+    """列印成功訊息"""
+    print(f"   ✅ {message}")
+
+
+def print_error(message: str) -> None:
+    """列印錯誤訊息"""
+    print(f"   ❌ {message}")
+
+
+def print_info(message: str) -> None:
+    """列印資訊訊息"""
+    print(f"   ℹ️  {message}")
+
+
+def print_warning(message: str) -> None:
+    """列印警告訊息"""
+    print(f"   ⚠️  {message}")
+
+
+# =============================================================================
+# Main Logic
+# =============================================================================
+
+def fire_alert(
+    alert_type: str,
+    api_url: str = DEFAULT_API_URL,
+    hmac_secret: str = HMAC_SECRET,
+    dry_run: bool = False,
+) -> dict:
+    """
+    發射模擬告警
+
+    Args:
+        alert_type: 告警類型 (oomkilled, podcrash, highcpu, highmemory)
+        api_url: API 端點 URL
+        hmac_secret: HMAC 簽章密鑰
+        dry_run: 是否僅輸出不實際發送
+
+    Returns:
+        dict: API 回應
+    """
+    print_header(f"AWOOOI 實彈射擊 - {alert_type.upper()}")
+    print(f"執行時間: {datetime.now(timezone.utc).isoformat()}")
+    print(f"目標端點: {api_url}{WEBHOOK_ENDPOINT}")
+
+    # 取得告警模板
+    if alert_type not in ALERT_TEMPLATES:
+        print_error(f"未知的告警類型: {alert_type}")
+        print_info(f"可用類型: {', '.join(ALERT_TEMPLATES.keys())}")
+        return {"success": False, "error": "Unknown alert type"}
+
+    payload = ALERT_TEMPLATES[alert_type].copy()
+
+    # 序列化 Payload (與 httpx 相同的格式)
+    payload_json = json.dumps(payload, separators=(",", ":"))
+    payload_bytes = payload_json.encode()
+
+    print("\n📦 告警 Payload:")
+    print(json.dumps(payload, indent=2, ensure_ascii=False))
+
+    # 計算 HMAC 簽章
+    if hmac_secret:
+        signature = compute_hmac_signature(hmac_secret, payload_bytes)
+        print_success(f"HMAC 簽章: {signature[:40]}...")
+    else:
+        signature = None
+        print_warning("無 HMAC Secret - 簽章將被跳過 (僅限 dev 環境)")
+
+    # Dry-run 模式
+    if dry_run:
+        print("\n🔒 [DRY-RUN MODE] 不實際發送請求")
+        print_info("移除 --dry-run 參數以實際發射")
+        return {"success": True, "dry_run": True}
+
+    # 發送請求
+    print("\n🚀 發射中...")
+
+    headers = {"Content-Type": "application/json"}
+    if signature:
+        headers["X-Signature-256"] = signature
+
+    try:
+        with httpx.Client(timeout=30.0) as client:
+            response = client.post(
+                f"{api_url}{WEBHOOK_ENDPOINT}",
+                content=payload_bytes,
+                headers=headers,
+            )
+
+        # 解析回應
+        print(f"\n📡 HTTP Status: {response.status_code}")
+
+        try:
+            result = response.json()
+            print("\n📋 API 回應:")
+            print(json.dumps(result, indent=2, ensure_ascii=False))
+
+            if response.status_code == 200 and result.get("success"):
+                print_success("告警已成功接收並處理！")
+
+                if result.get("converged"):
+                    print_info(f"告警收斂: 相同指紋已聚合 x{result.get('hit_count', 1)} 次")
+                else:
+                    print_info(f"風險等級: {result.get('risk_level', 'N/A')}")
+                    print_info(f"建議操作: {result.get('suggested_action', 'N/A')}")
+
+                if result.get("approval_created"):
+                    print_success(f"待簽核卡片已建立: {result.get('approval_id', 'N/A')}")
+            else:
+                print_error(f"處理失敗: {result.get('message', result.get('detail', 'Unknown error'))}")
+
+            return result
+
+        except json.JSONDecodeError:
+            print_error(f"回應解析失敗: {response.text}")
+            return {"success": False, "error": "Response parse error", "raw": response.text}
+
+    except httpx.ConnectError as e:
+        print_error(f"連線失敗: {str(e)}")
+        print_info(f"請確認 API 服務正在執行: {api_url}")
+        return {"success": False, "error": "Connection failed"}
+
+    except httpx.TimeoutException as e:
+        print_error(f"請求超時: {str(e)}")
+        return {"success": False, "error": "Timeout"}
+
+    except Exception as e:
+        print_error(f"未預期錯誤: {str(e)}")
+        return {"success": False, "error": str(e)}
+
+
+def main():
+    """主程式入口"""
+    parser = argparse.ArgumentParser(
+        description="AWOOOI 實彈射擊腳本 - 自動化告警測試",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+告警類型:
+  oomkilled   - Pod OOMKilled (Critical)
+  podcrash    - Pod CrashLoopBackOff (Warning)
+  highcpu     - High CPU Usage (Warning)
+  highmemory  - High Memory Usage (Warning)
+
+範例:
+  # 發射 OOMKilled 告警
+  python scripts/fire_live_alert.py oomkilled
+
+  # Dry-run 模式 (不實際發送)
+  python scripts/fire_live_alert.py oomkilled --dry-run
+
+  # 指定 HMAC Secret
+  WEBHOOK_HMAC_SECRET=mysecret python scripts/fire_live_alert.py oomkilled
+        """,
+    )
+
+    parser.add_argument(
+        "alert_type",
+        choices=list(ALERT_TEMPLATES.keys()),
+        help="告警類型",
+    )
+
+    parser.add_argument(
+        "--api-url",
+        default=DEFAULT_API_URL,
+        help=f"API 端點 URL (預設: {DEFAULT_API_URL})",
+    )
+
+    parser.add_argument(
+        "--hmac-secret",
+        default=HMAC_SECRET,
+        help="HMAC 簽章密鑰 (也可用環境變數 WEBHOOK_HMAC_SECRET)",
+    )
+
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Dry-run 模式 - 僅輸出不實際發送",
+    )
+
+    parser.add_argument(
+        "--all",
+        action="store_true",
+        help="依序發射所有類型的告警",
+    )
+
+    args = parser.parse_args()
+
+    print_header("AWOOOI 實彈射擊系統")
+    print(f"API 端點: {args.api_url}")
+    print(f"HMAC 配置: {'已設定' if args.hmac_secret else '未設定 (dev mode)'}")
+    print(f"Shadow Mode: 已啟用 (K8s 操作將被安全攔截)")
+
+    if args.all:
+        # 發射所有類型的告警
+        print("\n🎯 連續發射所有告警類型...")
+        results = {}
+        for alert_type in ALERT_TEMPLATES.keys():
+            result = fire_alert(
+                alert_type=alert_type,
+                api_url=args.api_url,
+                hmac_secret=args.hmac_secret,
+                dry_run=args.dry_run,
+            )
+            results[alert_type] = result
+
+        # 摘要
+        print_header("射擊結果摘要")
+        for alert_type, result in results.items():
+            status = "✅" if result.get("success") else "❌"
+            print(f"   {status} {alert_type}: {result.get('message', result.get('error', 'N/A'))}")
+    else:
+        # 發射單一告警
+        fire_alert(
+            alert_type=args.alert_type,
+            api_url=args.api_url,
+            hmac_secret=args.hmac_secret,
+            dry_run=args.dry_run,
+        )
+
+    print("\n" + "=" * 60)
+    print("  實彈射擊完成")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
--- a/apps/api/scripts/fire_test_alert.py
+++ b/apps/api/scripts/fire_test_alert.py
@@ -0,0 +1,318 @@
+#!/usr/bin/env python3
+"""
+🚀 AWOOOI Phase 2 導彈腳本 - fire_test_alert.py
+===============================================
+向系統注入模擬告警，觸發 ClawBot AI 分析流程
+
+用途:
+- 驗證全鏈路 (Webhook → ClawBot → ApprovalCard)
+- 測試戰情室前端是否即時彈出授權卡片
+- 開發除錯用 (無需真實監控系統)
+
+執行方式:
+    cd apps/api
+    python -m scripts.fire_test_alert
+
+    # 指定告警類型
+    python -m scripts.fire_test_alert --type db_connection_timeout
+    python -m scripts.fire_test_alert --type k8s_pod_crash --severity critical
+
+Author: Claude Code
+Date: 2026-03-21
+"""
+
+import argparse
+import asyncio
+import sys
+from datetime import datetime
+from pathlib import Path
+
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+
+import httpx
+
+# =============================================================================
+# Config
+# =============================================================================
+
+API_BASE_URL = "http://localhost:8000"
+WEBHOOK_ENDPOINT = f"{API_BASE_URL}/api/v1/webhooks/alerts"
+
+# =============================================================================
+# 預定義告警場景 (High-Fidelity Mock Alerts)
+# =============================================================================
+
+ALERT_SCENARIOS = {
+    "db_connection_timeout": {
+        "alert_type": "db_connection_timeout",
+        "severity": "critical",
+        "source": "prometheus-alertmanager",
+        "target_resource": "postgres-primary-0",
+        "namespace": "database",
+        "message": "PostgreSQL Database OOM - Connection pool exhausted, 47 waiting queries",
+        "metrics": {
+            "connection_count": 100,
+            "waiting_queries": 47,
+            "cpu_percent": 89,
+            "memory_percent": 95,
+            "sigma_deviation": 4.2,
+        },
+        "labels": {
+            "app": "postgres",
+            "team": "dba",
+            "tier": "critical",
+        },
+    },
+    "k8s_pod_crash": {
+        "alert_type": "k8s_pod_crash",
+        "severity": "warning",
+        "source": "k8s-event-watcher",
+        "target_resource": "harbor-core-7d4b8c9f5-xk2m3",
+        "namespace": "harbor",
+        "message": "Pod CrashLoopBackOff detected - OOMKilled after 5 restarts",
+        "metrics": {
+            "restart_count": 5,
+            "last_exit_code": 137,
+            "cpu_percent": 95,
+            "memory_percent": 100,
+            "sigma_deviation": 3.8,
+        },
+        "labels": {
+            "app": "harbor-core",
+            "team": "devops",
+        },
+    },
+    "high_cpu": {
+        "alert_type": "high_cpu",
+        "severity": "warning",
+        "source": "node-exporter",
+        "target_resource": "api-backend-deployment",
+        "namespace": "production",
+        "message": "Payment API Latency Spike - CPU at 94%, response time > 2s",
+        "metrics": {
+            "cpu_percent": 94,
+            "memory_percent": 72,
+            "response_time_ms": 2340,
+            "sigma_deviation": 3.2,
+        },
+        "labels": {
+            "app": "payment-api",
+            "team": "backend",
+            "sla": "critical",
+        },
+    },
+    "disk_full": {
+        "alert_type": "disk_full",
+        "severity": "critical",
+        "source": "node-exporter",
+        "target_resource": "logging-node-01",
+        "namespace": "kube-system",
+        "message": "Disk usage at 97% - /var/log nearly full, risk of logging failure",
+        "metrics": {
+            "disk_percent": 97,
+            "available_gb": 2.3,
+            "inode_percent": 89,
+        },
+        "labels": {
+            "node": "logging-node-01",
+            "team": "sre",
+        },
+    },
+    "ssl_expiry": {
+        "alert_type": "ssl_expiry",
+        "severity": "warning",
+        "source": "cert-manager",
+        "target_resource": "awoooi.wooo.work",
+        "namespace": "cert-manager",
+        "message": "SSL Certificate expiring in 7 days - auto-renewal failed",
+        "metrics": {
+            "days_until_expiry": 7,
+        },
+        "labels": {
+            "domain": "awoooi.wooo.work",
+            "issuer": "letsencrypt",
+        },
+    },
+}
+
+# =============================================================================
+# Terminal Output Helpers (漂亮的 Log)
+# =============================================================================
+
+class Colors:
+    """ANSI Color Codes"""
+    HEADER = '\033[95m'
+    BLUE = '\033[94m'
+    CYAN = '\033[96m'
+    GREEN = '\033[92m'
+    YELLOW = '\033[93m'
+    RED = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    DIM = '\033[2m'
+
+
+def print_banner():
+    """Print AWOOOI ASCII Banner"""
+    banner = f"""
+{Colors.CYAN}{Colors.BOLD}
+    █████╗ ██╗    ██╗ ██████╗  ██████╗  ██████╗ ██╗
+   ██╔══██╗██║    ██║██╔═══██╗██╔═══██╗██╔═══██╗██║
+   ███████║██║ █╗ ██║██║   ██║██║   ██║██║   ██║██║
+   ██╔══██║██║███╗██║██║   ██║██║   ██║██║   ██║██║
+   ██║  ██║╚███╔███╔╝╚██████╔╝╚██████╔╝╚██████╔╝██║
+   ╚═╝  ╚═╝ ╚══╝╚══╝  ╚═════╝  ╚═════╝  ╚═════╝ ╚═╝
+{Colors.ENDC}
+{Colors.DIM}   🚀 Phase 2 導彈腳本 - Test Alert Injector{Colors.ENDC}
+{Colors.DIM}   ─────────────────────────────────────────{Colors.ENDC}
+"""
+    print(banner)
+
+
+def print_section(title: str):
+    """Print section header"""
+    print(f"\n{Colors.BLUE}{Colors.BOLD}▶ {title}{Colors.ENDC}")
+    print(f"{Colors.DIM}{'─' * 50}{Colors.ENDC}")
+
+
+def print_alert_info(alert: dict):
+    """Print alert payload info"""
+    print(f"  {Colors.YELLOW}告警類型:{Colors.ENDC} {alert['alert_type']}")
+    print(f"  {Colors.YELLOW}嚴重度:{Colors.ENDC} {alert['severity']}")
+    print(f"  {Colors.YELLOW}目標資源:{Colors.ENDC} {alert['target_resource']}")
+    print(f"  {Colors.YELLOW}命名空間:{Colors.ENDC} {alert['namespace']}")
+    print(f"  {Colors.YELLOW}訊息:{Colors.ENDC} {alert['message']}")
+    if alert.get('metrics'):
+        print(f"  {Colors.YELLOW}指標:{Colors.ENDC}")
+        for k, v in alert['metrics'].items():
+            print(f"    • {k}: {v}")
+
+
+def print_response(response: dict, status_code: int):
+    """Print API response"""
+    if status_code == 200 and response.get('success'):
+        print(f"\n{Colors.GREEN}{Colors.BOLD}✓ 告警發射成功！{Colors.ENDC}")
+        print(f"  {Colors.CYAN}Approval ID:{Colors.ENDC} {response.get('approval_id', 'N/A')}")
+        print(f"  {Colors.CYAN}風險等級:{Colors.ENDC} {response.get('risk_level', 'N/A')}")
+        print(f"  {Colors.CYAN}建議動作:{Colors.ENDC} {response.get('suggested_action', 'N/A')}")
+        print(f"  {Colors.CYAN}聚合次數:{Colors.ENDC} {response.get('hit_count', 1)}")
+        if response.get('converged'):
+            print(f"  {Colors.YELLOW}⚡ 告警已收斂 (跳過 LLM){Colors.ENDC}")
+    else:
+        print(f"\n{Colors.RED}{Colors.BOLD}✗ 告警發射失敗！{Colors.ENDC}")
+        print(f"  {Colors.RED}狀態碼:{Colors.ENDC} {status_code}")
+        print(f"  {Colors.RED}回應:{Colors.ENDC} {response}")
+
+
+def print_footer():
+    """Print footer with instructions"""
+    print(f"\n{Colors.DIM}{'─' * 50}{Colors.ENDC}")
+    print(f"{Colors.GREEN}📺 請查看戰情室前端：{Colors.ENDC} http://localhost:3000")
+    print(f"{Colors.GREEN}📋 右側面板應顯示新的 ApprovalCard{Colors.ENDC}")
+    print(f"{Colors.DIM}時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}{Colors.ENDC}\n")
+
+
+# =============================================================================
+# Main Logic
+# =============================================================================
+
+async def fire_alert(alert_type: str, severity: str | None = None) -> bool:
+    """
+    發射測試告警
+
+    Args:
+        alert_type: 告警類型 (db_connection_timeout, k8s_pod_crash, etc.)
+        severity: 覆蓋嚴重度 (optional)
+
+    Returns:
+        bool: 是否成功
+    """
+    # 取得告警場景
+    if alert_type not in ALERT_SCENARIOS:
+        print(f"{Colors.RED}❌ 未知告警類型: {alert_type}{Colors.ENDC}")
+        print(f"{Colors.DIM}可用類型: {', '.join(ALERT_SCENARIOS.keys())}{Colors.ENDC}")
+        return False
+
+    alert = ALERT_SCENARIOS[alert_type].copy()
+
+    # 覆蓋嚴重度
+    if severity:
+        alert['severity'] = severity
+
+    print_section("告警 Payload")
+    print_alert_info(alert)
+
+    print_section("發射告警至 Webhook API")
+    print(f"  {Colors.CYAN}端點:{Colors.ENDC} {WEBHOOK_ENDPOINT}")
+
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                WEBHOOK_ENDPOINT,
+                json=alert,
+                headers={"Content-Type": "application/json"},
+            )
+
+            result = response.json()
+            print_response(result, response.status_code)
+
+            return response.status_code == 200
+
+    except httpx.ConnectError:
+        print(f"\n{Colors.RED}{Colors.BOLD}✗ 連線失敗！{Colors.ENDC}")
+        print(f"  {Colors.RED}請確認後端 API 正在運行:{Colors.ENDC}")
+        print(f"  {Colors.DIM}cd apps/api && uvicorn src.main:app --reload{Colors.ENDC}")
+        return False
+
+    except Exception as e:
+        print(f"\n{Colors.RED}{Colors.BOLD}✗ 發生錯誤：{e}{Colors.ENDC}")
+        return False
+
+
+def main():
+    """CLI Entry Point"""
+    parser = argparse.ArgumentParser(
+        description="🚀 AWOOOI Phase 2 導彈腳本 - 發射測試告警",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+可用告警類型:
+  db_connection_timeout   PostgreSQL Database OOM (CRITICAL)
+  k8s_pod_crash          Pod CrashLoopBackOff (MEDIUM)
+  high_cpu               CPU Spike / Latency (MEDIUM)
+  disk_full              Disk Full Warning (CRITICAL)
+  ssl_expiry             SSL Certificate Expiry (LOW)
+
+範例:
+  python -m scripts.fire_test_alert
+  python -m scripts.fire_test_alert --type db_connection_timeout
+  python -m scripts.fire_test_alert --type k8s_pod_crash --severity critical
+        """,
+    )
+
+    parser.add_argument(
+        "--type", "-t",
+        type=str,
+        default="db_connection_timeout",
+        choices=list(ALERT_SCENARIOS.keys()),
+        help="告警類型 (預設: db_connection_timeout)",
+    )
+
+    parser.add_argument(
+        "--severity", "-s",
+        type=str,
+        choices=["info", "warning", "critical"],
+        help="覆蓋嚴重度 (預設使用場景預設值)",
+    )
+
+    args = parser.parse_args()
+
+    print_banner()
+    success = asyncio.run(fire_alert(args.type, args.severity))
+    print_footer()
+
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()
--- a/apps/api/scripts/test_phase63_aggregation.py
+++ b/apps/api/scripts/test_phase63_aggregation.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+"""
+Phase 6.3 聚合測試腳本
+=======================
+
+功能:
+1. 連續打入 3 筆「同源但不同名」的測試告警
+2. 證明這 3 筆告警被成功「聚合」進同一個 Incident 的 signals 陣列中
+3. 驗證 affected_services 有被正確填入
+
+使用方式:
+    cd apps/api
+    python scripts/test_phase63_aggregation.py
+
+預期結果:
+- 3 筆告警全部聚合到 1 個 Incident
+- signals 陣列長度 = 3
+- affected_services 包含 "payment-service"
+"""
+
+import asyncio
+import json
+import httpx
+from datetime import datetime
+import time
+
+# API 端點
+API_BASE = "http://localhost:8000"
+SIGNALS_ENDPOINT = f"{API_BASE}/api/v1/webhooks/signals"
+
+# 測試告警: 同 namespace + 同 target，不同 alert_name
+# 模擬: payment-service 發生一連串相關問題
+# 測試告警: 同 namespace + 同 target，不同 alert_name
+# 模擬: payment-service 發生一連串相關問題
+# 注意: severity 只能是 info | warning | critical (SignalPayload 定義)
+TEST_ALERTS = [
+    {
+        "alert_name": "PaymentServiceHighLatency",
+        "severity": "warning",
+        "source": "prometheus",
+        "namespace": "payment-prod",
+        "target": "payment-service",
+        "fingerprint": "fp_latency_001",
+        "labels": {"namespace": "payment-prod", "pod": "payment-service-abc123"},
+        "annotations": {"summary": "Payment service latency > 500ms"},
+    },
+    {
+        "alert_name": "PaymentServiceErrorRate",
+        "severity": "warning",  # 原本是 high，但 API 只接受 info|warning|critical
+        "source": "prometheus",
+        "namespace": "payment-prod",
+        "target": "payment-service",
+        "fingerprint": "fp_error_001",
+        "labels": {"namespace": "payment-prod", "pod": "payment-service-abc123"},
+        "annotations": {"summary": "Payment service error rate > 5%"},
+    },
+    {
+        "alert_name": "PaymentServicePodCrash",
+        "severity": "critical",
+        "source": "alertmanager",
+        "namespace": "payment-prod",
+        "target": "payment-service",
+        "fingerprint": "fp_crash_001",
+        "labels": {"namespace": "payment-prod", "pod": "payment-service-abc123"},
+        "annotations": {"summary": "Payment service pod crashed"},
+    },
+]
+
+
+async def send_alert(client: httpx.AsyncClient, alert: dict, index: int) -> dict:
+    """發送單一告警"""
+    print(f"\n[{index+1}/3] 發送告警: {alert['alert_name']}")
+    print(f"    severity: {alert['severity']}")
+    print(f"    namespace: {alert['namespace']}")
+    print(f"    target: {alert['target']}")
+
+    response = await client.post(
+        SIGNALS_ENDPOINT,
+        json=alert,
+        timeout=10.0,
+    )
+
+    result = response.json()
+    print(f"    status_code: {response.status_code}")
+    print(f"    message_id: {result.get('message_id', 'N/A')}")
+
+    return result
+
+
+async def check_redis_incident(client: httpx.AsyncClient) -> dict | None:
+    """檢查 Redis 中的 Incident"""
+    # 使用 health endpoint 確認 API 運作
+    try:
+        # 直接查詢 Redis (透過 API 或直接)
+        # 這裡我們用 curl 模擬，但實際應該有 API
+        return None
+    except Exception as e:
+        print(f"Error checking Redis: {e}")
+        return None
+
+
+async def main():
+    """主測試流程"""
+    print("=" * 60)
+    print("Phase 6.3 聚合測試")
+    print("=" * 60)
+    print(f"時間: {datetime.now().isoformat()}")
+    print(f"目標: 驗證 3 筆同源告警聚合到 1 個 Incident")
+    print()
+
+    async with httpx.AsyncClient() as client:
+        # 1. 確認 API 運作
+        print("[0] 檢查 API 健康狀態...")
+        try:
+            health = await client.get(f"{API_BASE}/api/v1/health", timeout=5.0)
+            print(f"    API status: {health.status_code}")
+        except Exception as e:
+            print(f"    API 連線失敗: {e}")
+            print("    請確認 API 已啟動: docker compose up -d")
+            return
+
+        # 2. 連續發送 3 筆告警
+        print("\n" + "-" * 60)
+        print("階段一: 連續發送 3 筆告警")
+        print("-" * 60)
+
+        results = []
+        for i, alert in enumerate(TEST_ALERTS):
+            result = await send_alert(client, alert, i)
+            results.append(result)
+            # 短暫等待，確保 Consumer 有時間處理
+            await asyncio.sleep(0.5)
+
+        # 3. 等待 Consumer 處理完成
+        print("\n" + "-" * 60)
+        print("階段二: 等待 Consumer 處理 (3 秒)")
+        print("-" * 60)
+        await asyncio.sleep(3)
+
+        # 4. 輸出驗證指令
+        print("\n" + "-" * 60)
+        print("階段三: 驗證指令")
+        print("-" * 60)
+        print()
+        print("請執行以下 Redis 指令檢查聚合結果:")
+        print()
+        print("# 1. 查看所有 Incident keys")
+        print("docker exec -it awoooi-redis redis-cli KEYS 'incident:*'")
+        print()
+        print("# 2. 查看特定 Incident 的 JSON (取代 <INCIDENT_ID>)")
+        print("docker exec -it awoooi-redis redis-cli GET 'incident:INC-XXXXXXXX-XXXXXX'")
+        print()
+        print("# 3. 或直接用以下指令掃描並輸出所有 Incident:")
+        print("""docker exec -it awoooi-redis redis-cli --no-raw KEYS 'incident:INC-*' | xargs -I {} docker exec -i awoooi-redis redis-cli GET {}""")
+        print()
+
+        # 5. 輸出 API 日誌指令
+        print("-" * 60)
+        print("檢查 API 日誌:")
+        print("-" * 60)
+        print("docker logs awoooi-api --tail 50 | grep -E '(signal_|incident_|aggregat)'")
+        print()
+
+        # 6. 驗證標準
+        print("-" * 60)
+        print("驗證標準 (PASS/FAIL)")
+        print("-" * 60)
+        print("[ ] 只有 1 個 Incident 被建立 (非 3 個)")
+        print("[ ] signals 陣列長度 = 3")
+        print("[ ] affected_services 包含 'payment-service'")
+        print("[ ] severity 升級為 'P0' (因為第三筆是 critical)")
+        print()
+
+        print("=" * 60)
+        print("測試腳本執行完成")
+        print("=" * 60)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/apps/api/scripts/test_phase64_proposal.py
+++ b/apps/api/scripts/test_phase64_proposal.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+"""
+Phase 6.4 全鏈路測試腳本
+========================
+
+功能:
+1. 觸發假告警 (建立 Incident)
+2. 呼叫 /proposal 端點 (產生決策)
+3. 呼叫 /approvals/pending (模擬前端撈取待簽核清單)
+4. 證明這條鏈路完全暢通
+
+使用方式:
+    cd apps/api
+    python scripts/test_phase64_proposal.py
+
+驗收標準:
+- Incident 成功建立
+- Proposal 成功生成
+- Proposal 出現在 /approvals/pending 清單中
+- 前端零改動即可渲染
+"""
+
+import asyncio
+import json
+from datetime import datetime
+
+import httpx
+
+# API 端點
+API_BASE = "http://localhost:8000"
+SIGNALS_ENDPOINT = f"{API_BASE}/api/v1/webhooks/signals"
+INCIDENTS_ENDPOINT = f"{API_BASE}/api/v1/incidents"
+APPROVALS_ENDPOINT = f"{API_BASE}/api/v1/approvals/pending"
+
+
+async def send_test_alert() -> dict | None:
+    """發送測試告警"""
+    alert = {
+        "alert_name": "PodCrashLoopBackOff",
+        "severity": "critical",  # P0
+        "source": "prometheus",
+        "namespace": "production",
+        "target": "api-gateway",
+        "fingerprint": f"fp_test_{datetime.now().strftime('%H%M%S')}",
+        "labels": {
+            "namespace": "production",
+            "pod": "api-gateway-abc123",
+        },
+        "annotations": {
+            "summary": "Pod api-gateway is in CrashLoopBackOff state",
+        },
+    }
+
+    async with httpx.AsyncClient() as client:
+        try:
+            response = await client.post(
+                SIGNALS_ENDPOINT,
+                json=alert,
+                timeout=10.0,
+            )
+            if response.status_code == 200:
+                return response.json()
+            else:
+                print(f"    [ERROR] status_code: {response.status_code}")
+                print(f"    [ERROR] response: {response.text}")
+                return None
+        except Exception as e:
+            print(f"    [ERROR] {e}")
+            return None
+
+
+async def wait_for_incident(namespace: str, timeout: int = 10) -> str | None:
+    """等待 Incident 被建立並返回 incident_id"""
+    async with httpx.AsyncClient() as client:
+        for _ in range(timeout):
+            try:
+                response = await client.get(
+                    INCIDENTS_ENDPOINT,
+                    timeout=5.0,
+                )
+                if response.status_code == 200:
+                    data = response.json()
+                    for incident in data.get("incidents", []):
+                        # 找到我們的測試 Incident
+                        if "api-gateway" in incident.get("affected_services", []):
+                            return incident.get("incident_id")
+            except Exception:
+                pass
+            await asyncio.sleep(1)
+    return None
+
+
+async def generate_proposal(incident_id: str) -> dict | None:
+    """生成 Decision Proposal"""
+    async with httpx.AsyncClient() as client:
+        try:
+            response = await client.post(
+                f"{INCIDENTS_ENDPOINT}/{incident_id}/proposal",
+                timeout=10.0,
+            )
+            if response.status_code == 200:
+                return response.json()
+            else:
+                print(f"    [ERROR] status_code: {response.status_code}")
+                print(f"    [ERROR] response: {response.text}")
+                return None
+        except Exception as e:
+            print(f"    [ERROR] {e}")
+            return None
+
+
+async def get_pending_approvals() -> dict | None:
+    """取得待簽核清單"""
+    async with httpx.AsyncClient() as client:
+        try:
+            response = await client.get(
+                APPROVALS_ENDPOINT,
+                timeout=10.0,
+            )
+            if response.status_code == 200:
+                return response.json()
+            else:
+                print(f"    [ERROR] status_code: {response.status_code}")
+                return None
+        except Exception as e:
+            print(f"    [ERROR] {e}")
+            return None
+
+
+async def main():
+    """主測試流程"""
+    print("=" * 70)
+    print("Phase 6.4 全鏈路測試: Incident → Proposal → Pending Approvals")
+    print("=" * 70)
+    print(f"時間: {datetime.now().isoformat()}")
+    print()
+
+    # 0. 健康檢查
+    print("[0] 檢查 API 健康狀態...")
+    async with httpx.AsyncClient() as client:
+        try:
+            health = await client.get(f"{API_BASE}/api/v1/health", timeout=5.0)
+            print(f"    API status: {health.status_code}")
+        except Exception as e:
+            print(f"    API 連線失敗: {e}")
+            print("    請確認 API 已啟動: docker compose up -d")
+            return
+
+    # 1. 發送測試告警
+    print("\n" + "-" * 70)
+    print("[1] 發送測試告警 (建立 Incident)")
+    print("-" * 70)
+
+    result = await send_test_alert()
+    if not result:
+        print("    [FAIL] 無法發送告警")
+        return
+
+    print(f"    message_id: {result.get('message_id', 'N/A')}")
+    print(f"    success: {result.get('success', False)}")
+
+    # 2. 等待 Incident 建立
+    print("\n" + "-" * 70)
+    print("[2] 等待 Consumer 處理並建立 Incident (最多 10 秒)")
+    print("-" * 70)
+
+    incident_id = await wait_for_incident("production")
+
+    if not incident_id:
+        print("    [FAIL] 無法找到測試 Incident")
+        print("    請檢查 API 日誌: docker logs awoooi-api --tail 50")
+        return
+
+    print(f"    incident_id: {incident_id}")
+    print("    [OK] Incident 已建立")
+
+    # 3. 生成 Proposal
+    print("\n" + "-" * 70)
+    print("[3] 呼叫 /proposal 端點生成決策")
+    print("-" * 70)
+
+    proposal_result = await generate_proposal(incident_id)
+
+    if not proposal_result or not proposal_result.get("success"):
+        print(f"    [FAIL] 無法生成 Proposal")
+        print(f"    message: {proposal_result.get('message') if proposal_result else 'N/A'}")
+        return
+
+    proposal = proposal_result.get("proposal", {})
+    print(f"    proposal_id: {proposal.get('id', 'N/A')}")
+    print(f"    action: {proposal.get('action', 'N/A')[:60]}...")
+    print(f"    risk_level: {proposal.get('risk_level', 'N/A')}")
+    print(f"    required_signatures: {proposal.get('required_signatures', 'N/A')}")
+    print(f"    incident_status: {proposal_result.get('incident_status', 'N/A')}")
+    print("    [OK] Proposal 已生成")
+
+    # 4. 驗證 /approvals/pending
+    print("\n" + "-" * 70)
+    print("[4] 呼叫 /approvals/pending 驗證前端相容性")
+    print("-" * 70)
+
+    pending = await get_pending_approvals()
+
+    if not pending:
+        print("    [FAIL] 無法取得待簽核清單")
+        return
+
+    print(f"    count: {pending.get('count', 0)}")
+
+    # 尋找我們的 Proposal
+    found = False
+    for approval in pending.get("approvals", []):
+        if approval.get("id") == proposal.get("id"):
+            found = True
+            print(f"    [FOUND] Proposal 出現在待簽核清單中!")
+            print()
+            print("    === PendingApprovalsResponse JSON ===")
+            print(json.dumps({
+                "count": pending.get("count"),
+                "target_approval": approval,
+            }, indent=2, ensure_ascii=False, default=str))
+            break
+
+    if not found:
+        print("    [WARN] Proposal 未出現在待簽核清單中")
+        print(f"    (可能因為 risk_level=LOW 已自動批准)")
+
+    # 5. 最終驗證
+    print("\n" + "=" * 70)
+    print("驗證結果")
+    print("=" * 70)
+
+    checks = [
+        ("Incident 建立", incident_id is not None),
+        ("Proposal 生成", proposal_result.get("success", False)),
+        ("風險評估", proposal.get("risk_level") is not None),
+        ("狀態推進 (MITIGATING)", proposal_result.get("incident_status") == "mitigating"),
+        ("前端相容 (/approvals/pending)", pending is not None),
+    ]
+
+    all_passed = True
+    for name, passed in checks:
+        status = "✅ PASS" if passed else "❌ FAIL"
+        print(f"[{status}] {name}")
+        if not passed:
+            all_passed = False
+
+    print()
+    print("=" * 70)
+    if all_passed:
+        print("🎉 Phase 6.4 全鏈路測試 PASSED!")
+        print("   大腦已具備決策輸出能力!")
+        print("   Decision Proposal API 已鑄造完成!")
+    else:
+        print("💥 Phase 6.4 全鏈路測試 FAILED!")
+        print("   請檢查上述失敗項目")
+    print("=" * 70)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/apps/api/scripts/test_race_condition.py
+++ b/apps/api/scripts/test_race_condition.py
@@ -0,0 +1,286 @@
+#!/usr/bin/env python3
+"""
+Phase 6.3 Race Condition 測試腳本
+==================================
+
+功能:
+1. 使用 asyncio.gather 同時發射 20 筆同源告警
+2. 證明 Lua Script 原子操作成功擋下 Race Condition
+3. 驗證最終 Incident JSON 精準包含 20 筆 Signals
+
+使用方式:
+    cd apps/api
+    python scripts/test_race_condition.py
+
+預期結果:
+- 只有 1 個 Incident 被建立
+- signals 陣列長度 = 20
+- 無任何 Signal 遺失
+
+統帥鐵律:
+- 嚴禁人工 QA
+- 必須程式化驗證
+"""
+
+import asyncio
+import json
+from datetime import datetime
+
+import httpx
+
+# API 端點
+API_BASE = "http://localhost:8000"
+SIGNALS_ENDPOINT = f"{API_BASE}/api/v1/webhooks/signals"
+
+# 併發數量
+CONCURRENT_SIGNALS = 20
+
+# 測試 namespace 和 target (同源)
+TEST_NAMESPACE = "race-test-ns"
+TEST_TARGET = "race-test-service"
+
+
+def generate_alert(index: int) -> dict:
+    """生成測試告警 (同 namespace + 同 target，不同 alert_name)"""
+    return {
+        "alert_name": f"RaceConditionTest_{index:03d}",
+        "severity": "warning",
+        "source": "prometheus",
+        "namespace": TEST_NAMESPACE,
+        "target": TEST_TARGET,
+        "fingerprint": f"fp_race_{index:03d}",  # 唯一 fingerprint 防止去重
+        "labels": {
+            "namespace": TEST_NAMESPACE,
+            "test_index": str(index),
+        },
+        "annotations": {
+            "summary": f"Race condition test signal #{index}",
+        },
+    }
+
+
+async def send_alert(client: httpx.AsyncClient, index: int) -> dict:
+    """發送單一告警"""
+    alert = generate_alert(index)
+    try:
+        response = await client.post(
+            SIGNALS_ENDPOINT,
+            json=alert,
+            timeout=30.0,
+        )
+        return {
+            "index": index,
+            "status_code": response.status_code,
+            "message_id": response.json().get("message_id"),
+            "success": response.status_code == 200,
+        }
+    except Exception as e:
+        return {
+            "index": index,
+            "status_code": 0,
+            "message_id": None,
+            "success": False,
+            "error": str(e),
+        }
+
+
+async def fire_concurrent_alerts() -> list[dict]:
+    """併發發射所有告警"""
+    async with httpx.AsyncClient() as client:
+        tasks = [send_alert(client, i) for i in range(CONCURRENT_SIGNALS)]
+        results = await asyncio.gather(*tasks)
+        return list(results)
+
+
+async def verify_redis_incident() -> dict | None:
+    """從 Redis 查詢 Incident 並驗證"""
+    import subprocess
+
+    # 查詢所有 incident keys
+    result = subprocess.run(
+        ["docker", "exec", "awoooi-redis", "redis-cli", "KEYS", "incident:INC-*"],
+        capture_output=True,
+        text=True,
+    )
+
+    keys = [k.strip() for k in result.stdout.strip().split("\n") if k.strip()]
+
+    if not keys:
+        return None
+
+    # 找到最新的 Incident (假設測試環境已清空)
+    # 對於測試，我們檢查所有 incident 找到包含 race-test-ns 的那個
+    for key in keys:
+        get_result = subprocess.run(
+            ["docker", "exec", "awoooi-redis", "redis-cli", "GET", key],
+            capture_output=True,
+            text=True,
+        )
+
+        if get_result.stdout.strip():
+            try:
+                incident = json.loads(get_result.stdout.strip())
+                # 檢查是否是我們的測試 Incident
+                if any(
+                    s.get("labels", {}).get("namespace") == TEST_NAMESPACE
+                    for s in incident.get("signals", [])
+                ):
+                    return incident
+            except json.JSONDecodeError:
+                continue
+
+    return None
+
+
+async def main():
+    """主測試流程"""
+    print("=" * 70)
+    print("Phase 6.3 Race Condition 併發測試")
+    print("=" * 70)
+    print(f"時間: {datetime.now().isoformat()}")
+    print(f"併發數量: {CONCURRENT_SIGNALS} 筆告警")
+    print(f"測試 Namespace: {TEST_NAMESPACE}")
+    print(f"測試 Target: {TEST_TARGET}")
+    print()
+
+    # 0. 清除舊的測試 Incident (可選)
+    print("[0] 準備測試環境...")
+    import subprocess
+
+    # 刪除舊的索引 (如果存在)
+    subprocess.run(
+        [
+            "docker", "exec", "awoooi-redis", "redis-cli",
+            "DEL",
+            f"incident:idx:ns:{TEST_NAMESPACE}",
+            f"incident:idx:target:{TEST_TARGET}",
+        ],
+        capture_output=True,
+    )
+    print("    已清除舊索引")
+
+    # 1. 檢查 API
+    print("\n[1] 檢查 API 健康狀態...")
+    async with httpx.AsyncClient() as client:
+        try:
+            health = await client.get(f"{API_BASE}/api/v1/health", timeout=5.0)
+            print(f"    API status: {health.status_code}")
+        except Exception as e:
+            print(f"    API 連線失敗: {e}")
+            print("    請確認 API 已啟動: docker compose up -d")
+            return
+
+    # 2. 併發發射告警
+    print("\n" + "-" * 70)
+    print("[2] 併發發射 20 筆告警 (asyncio.gather)")
+    print("-" * 70)
+
+    start_time = datetime.now()
+    results = await fire_concurrent_alerts()
+    end_time = datetime.now()
+    duration = (end_time - start_time).total_seconds()
+
+    success_count = sum(1 for r in results if r["success"])
+    fail_count = sum(1 for r in results if not r["success"])
+
+    print(f"\n發射結果:")
+    print(f"    成功: {success_count}/{CONCURRENT_SIGNALS}")
+    print(f"    失敗: {fail_count}/{CONCURRENT_SIGNALS}")
+    print(f"    耗時: {duration:.3f} 秒")
+
+    if fail_count > 0:
+        print("\n失敗詳情:")
+        for r in results:
+            if not r["success"]:
+                print(f"    - Index {r['index']}: {r.get('error', 'Unknown')}")
+
+    # 3. 等待 Consumer 處理
+    print("\n" + "-" * 70)
+    print("[3] 等待 Consumer 處理 (5 秒)")
+    print("-" * 70)
+    await asyncio.sleep(5)
+
+    # 4. 驗證 Redis Incident
+    print("\n" + "-" * 70)
+    print("[4] 驗證 Redis Incident")
+    print("-" * 70)
+
+    incident = await verify_redis_incident()
+
+    if not incident:
+        print("\n❌ 錯誤: 找不到測試 Incident!")
+        print("    請檢查 API 日誌: docker logs awoooi-api --tail 100")
+        return
+
+    incident_id = incident.get("incident_id", "N/A")
+    signals = incident.get("signals", [])
+    signal_count = len(signals)
+    severity = incident.get("severity", "N/A")
+    affected_services = incident.get("affected_services", [])
+
+    print(f"\n找到 Incident:")
+    print(f"    incident_id: {incident_id}")
+    print(f"    signal_count: {signal_count}")
+    print(f"    severity: {severity}")
+    print(f"    affected_services: {affected_services}")
+
+    # 5. 驗證結果
+    print("\n" + "=" * 70)
+    print("驗證結果")
+    print("=" * 70)
+
+    # 計算聚合的告警數量
+    race_signals = [
+        s for s in signals
+        if s.get("alert_name", "").startswith("RaceConditionTest_")
+    ]
+    race_signal_count = len(race_signals)
+
+    # 檢查告警名稱分布
+    alert_names = [s.get("alert_name") for s in race_signals]
+    unique_names = set(alert_names)
+
+    print()
+    passed = True
+
+    # 驗證 1: signal_count
+    if race_signal_count == CONCURRENT_SIGNALS:
+        print(f"[✅ PASS] Signal 數量: {race_signal_count}/{CONCURRENT_SIGNALS}")
+    else:
+        print(f"[❌ FAIL] Signal 數量: {race_signal_count}/{CONCURRENT_SIGNALS}")
+        print(f"          遺失 {CONCURRENT_SIGNALS - race_signal_count} 筆 Signal!")
+        passed = False
+
+    # 驗證 2: unique names (無重複跳過)
+    if len(unique_names) == race_signal_count:
+        print(f"[✅ PASS] 唯一告警名稱: {len(unique_names)} 個 (無重複)")
+    else:
+        print(f"[❌ FAIL] 唯一告警名稱: {len(unique_names)} 個 (有重複被覆蓋)")
+        passed = False
+
+    # 驗證 3: affected_services
+    if TEST_TARGET in affected_services:
+        print(f"[✅ PASS] affected_services 包含 '{TEST_TARGET}'")
+    else:
+        print(f"[❌ FAIL] affected_services 不包含 '{TEST_TARGET}'")
+        passed = False
+
+    # 最終結論
+    print()
+    print("=" * 70)
+    if passed:
+        print("🎉 Race Condition 測試 PASSED!")
+        print(f"   {CONCURRENT_SIGNALS} 筆併發告警全部成功聚合!")
+        print("   Lua Script 原子操作有效防止了資料遺失!")
+    else:
+        print("💥 Race Condition 測試 FAILED!")
+        print("   存在資料遺失，需要進一步調查!")
+    print("=" * 70)
+
+    # 輸出詳細日誌指令
+    print("\n檢查詳細日誌:")
+    print("docker logs awoooi-api --tail 100 | grep -E '(atomic|aggregate|race)'")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/apps/api/scripts/test_signal_stream.py
+++ b/apps/api/scripts/test_signal_stream.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+"""
+Phase 6.1 測試腳本: Redis Streams Signal 流程驗證
+=================================================
+
+功能:
+1. 發送測試 Signal 到 /api/v1/webhooks/signals
+2. 驗證 Redis Stream 中有新訊息
+3. 輸出 Stream 狀態
+
+使用:
+    python scripts/test_signal_stream.py
+
+環境變數:
+    API_BASE_URL: API 基礎 URL (預設: http://localhost:8000)
+"""
+
+import asyncio
+import json
+import os
+import sys
+
+import httpx
+
+
+API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000")
+SIGNAL_ENDPOINT = f"{API_BASE_URL}/api/v1/webhooks/signals"
+
+
+async def send_test_signal() -> dict:
+    """發送測試 Signal"""
+    payload = {
+        "source": "test-script",
+        "alert_name": "TestSignal",
+        "severity": "warning",
+        "namespace": "awoooi-test",
+        "target": "test-pod-123",
+        "message": "Phase 6.1 Event Bus 驗證測試",
+        "labels": {"team": "devops", "env": "test"},
+        "annotations": {"runbook_url": "https://wiki.example.com/runbook"},
+    }
+
+    async with httpx.AsyncClient(timeout=10.0) as client:
+        response = await client.post(SIGNAL_ENDPOINT, json=payload)
+        response.raise_for_status()
+        return response.json()
+
+
+async def main():
+    print("=" * 60)
+    print("Phase 6.1 Event Bus 測試")
+    print("=" * 60)
+    print()
+
+    print(f"[1] 發送測試 Signal 到 {SIGNAL_ENDPOINT}")
+    try:
+        result = await send_test_signal()
+        print(f"    ✅ 成功!")
+        print(f"    Message ID: {result.get('message_id')}")
+        print(f"    Stream: {result.get('stream')}")
+    except httpx.HTTPStatusError as e:
+        print(f"    ❌ HTTP 錯誤: {e.response.status_code}")
+        print(f"    {e.response.text}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"    ❌ 錯誤: {e}")
+        sys.exit(1)
+
+    print()
+    print("[2] 驗證 Signal Worker (Consumer) 是否收到訊息")
+    print("    查看 API 日誌: docker logs awoooi-api | grep signal_received")
+    print()
+    print("[3] 手動檢查 Redis Stream 狀態")
+    print("    redis-cli XINFO STREAM stream:awoooi_signals")
+    print("    redis-cli XINFO GROUPS stream:awoooi_signals")
+    print()
+    print("=" * 60)
+    print("測試完成!")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/apps/api/scripts/tracer_bullet_2.py
+++ b/apps/api/scripts/tracer_bullet_2.py
@@ -0,0 +1,360 @@
+#!/usr/bin/env python3
+"""
+Tracer Bullet 2.0 - 全站閉環測試腳本
+Phase 4: E2E Integration Test
+
+測試流程:
+1. 觸發假告警 (Mock Alert)
+2. GraphRAG 分析 (Blast Radius + Root Cause)
+3. 產生 ApprovalCard (Dry-Run)
+4. 人類批准 (Multi-Sig)
+5. MCP 模擬執行
+
+執行方式:
+  cd apps/api
+  python scripts/tracer_bullet_2.py
+"""
+
+import asyncio
+import json
+from datetime import datetime
+
+# ==================== 模擬模組導入 ====================
+
+# 實際運行時這些會從專案導入
+# from src.services import (
+#     topology_graph, trust_engine, multi_sig_engine, dry_run_engine
+# )
+# from src.plugins.finops import idle_scanner
+# from src.plugins.mcp import mcp_bridge
+
+
+# ==================== Test Configuration ====================
+
+
+class TracerBullet2:
+    """全站閉環測試器"""
+
+    def __init__(self):
+        self.test_id = f"tb2-{datetime.utcnow().strftime('%Y%m%d%H%M%S')}"
+        self.results: list[dict] = []
+
+    def log(self, step: str, status: str, data: dict | None = None):
+        """記錄測試結果"""
+        result = {
+            "step": step,
+            "status": status,
+            "timestamp": datetime.utcnow().isoformat(),
+            "data": data or {},
+        }
+        self.results.append(result)
+        emoji = "✅" if status == "PASS" else "❌" if status == "FAIL" else "🔄"
+        print(f"{emoji} [{step}] {status}")
+        if data:
+            print(f"   {json.dumps(data, indent=2, default=str)}")
+
+    # ==================== Step 1: Mock Alert ====================
+
+    async def step1_trigger_alert(self) -> dict:
+        """
+        Step 1: 觸發假告警
+
+        模擬 Prometheus AlertManager 發送告警:
+        - frontend 服務 5xx 錯誤率上升
+        """
+        print("\n" + "=" * 60)
+        print("STEP 1: TRIGGER MOCK ALERT")
+        print("=" * 60)
+
+        alert = {
+            "alertname": "HighErrorRate",
+            "service": "frontend",
+            "namespace": "production",
+            "severity": "critical",
+            "error_rate": 15.2,  # 15% 5xx
+            "threshold": 5.0,
+            "fired_at": datetime.utcnow().isoformat(),
+        }
+
+        self.log("trigger_alert", "PASS", alert)
+        return alert
+
+    # ==================== Step 2: GraphRAG Analysis ====================
+
+    async def step2_graphrag_analysis(self, alert: dict) -> dict:
+        """
+        Step 2: GraphRAG 分析
+
+        呼叫 TopologyGraph.get_blast_radius_and_root_cause()
+        分析:
+        - Blast Radius: frontend 掛了誰會跟著掛
+        - Root Cause: frontend 的依賴誰目前有問題
+        """
+        print("\n" + "=" * 60)
+        print("STEP 2: GRAPHRAG ANALYSIS")
+        print("=" * 60)
+
+        target_service = alert["service"]
+
+        # Mock GraphRAG 結果 (實際會呼叫 topology_graph)
+        analysis = {
+            "targetService": target_service,
+            "blastRadius": {
+                "affectedServices": ["ingress"],
+                "affectedCount": 1,
+                "criticalPath": ["ingress -> frontend"],
+                "impactSummary": "If 'frontend' goes down, 1 upstream service (ingress) will be affected.",
+            },
+            "rootCause": {
+                "unhealthyDependencies": ["postgres-db"],
+                "dependencyChain": ["auth-service", "product-api", "order-api", "postgres-db", "redis-cache"],
+                "probableRootCauses": ["postgres-db"],
+                "analysisSummary": "Found 1 unhealthy dependency: postgres-db. Probable root cause: postgres-db.",
+            },
+            "analyzedAt": datetime.utcnow().isoformat(),
+        }
+
+        # 視覺化輸出
+        print("\n[BLAST RADIUS - Upstream Impact]")
+        print("    ┌─────────────────────┐")
+        print("    │ ingress             │")
+        print("    └─────────┬───────────┘")
+        print("              │ depends on")
+        print("              ▼")
+        print("    ┌─────────────────────┐")
+        print("    │ frontend            │ X")
+        print("    └─────────────────────┘")
+
+        print("\n[ROOT CAUSE - Downstream Chain]")
+        print("    ┌─────────────────────┐")
+        print("    │ frontend            │ !")
+        print("    └─────────┬───────────┘")
+        print("              │ calls")
+        print("              ▼")
+        print("    ┌─────────────────────┐")
+        print("    │ postgres-db         │ X (UNHEALTHY)")
+        print("    └─────────────────────┘")
+
+        self.log("graphrag_analysis", "PASS", analysis)
+        return analysis
+
+    # ==================== Step 3: Dry-Run & ApprovalCard ====================
+
+    async def step3_generate_approval(self, analysis: dict) -> dict:
+        """
+        Step 3: 產生 ApprovalCard
+
+        根據分析結果，建議重啟 postgres-db
+        執行 Dry-Run 檢查
+        """
+        print("\n" + "=" * 60)
+        print("STEP 3: DRY-RUN & APPROVAL CARD")
+        print("=" * 60)
+
+        root_cause = analysis["rootCause"]["probableRootCauses"][0]
+
+        # 建議動作
+        proposed_action = {
+            "operation": "restart_pod",
+            "parameters": {
+                "pod_name": f"{root_cause}-0",
+                "namespace": "production",
+                "graceful": True,
+            },
+            "reason": f"Auto-suggested based on GraphRAG root cause analysis: {root_cause} is unhealthy",
+        }
+
+        # Mock Dry-Run 結果
+        dry_run_result = {
+            "checks": [
+                {"name": "RBAC Permission", "passed": True, "message": "User has pod/delete permission"},
+                {"name": "Syntax Validation", "passed": True, "message": "Parameters valid"},
+                {"name": "Resource Exists", "passed": True, "message": "Pod postgres-db-0 exists"},
+                {"name": "No PDB Violation", "passed": True, "message": "PodDisruptionBudget allows 1 eviction"},
+            ],
+            "overallPassed": True,
+            "blastRadius": {
+                "affectedPods": 1,
+                "affectedServices": ["postgres-db"],
+                "dataImpact": "NONE",  # Graceful restart
+            },
+            "riskLevel": "high",  # Database 操作
+        }
+
+        # 產生 ApprovalCard
+        approval_card = {
+            "approvalId": f"approval-{self.test_id}",
+            "action": proposed_action,
+            "dryRunResult": dry_run_result,
+            "requiredSignatures": 2,  # HIGH risk = 2-sig
+            "allowedRoles": ["admin", "devops", "sre"],
+            "createdAt": datetime.utcnow().isoformat(),
+            "expiresAt": None,  # No expiry for critical ops
+        }
+
+        print("\n[APPROVAL CARD]")
+        print(f"  Action: {proposed_action['operation']}")
+        print(f"  Target: {proposed_action['parameters']['pod_name']}")
+        print(f"  Risk Level: {dry_run_result['riskLevel'].upper()}")
+        print(f"  Required Signatures: {approval_card['requiredSignatures']}")
+        print(f"  Dry-Run: {'PASSED' if dry_run_result['overallPassed'] else 'FAILED'}")
+
+        self.log("generate_approval", "PASS", approval_card)
+        return approval_card
+
+    # ==================== Step 4: Multi-Sig Approval ====================
+
+    async def step4_multisig_approval(self, approval_card: dict) -> dict:
+        """
+        Step 4: 人類批准 (Multi-Sig)
+
+        模擬兩位管理者簽名:
+        1. DevOps Engineer
+        2. SRE Lead
+        """
+        print("\n" + "=" * 60)
+        print("STEP 4: MULTI-SIG APPROVAL")
+        print("=" * 60)
+
+        approval_id = approval_card["approvalId"]
+
+        # 第一位簽名
+        sig1 = {
+            "userId": "devops-alice",
+            "role": "devops",
+            "signedAt": datetime.utcnow().isoformat(),
+            "comment": "GraphRAG analysis looks correct. Approving restart.",
+        }
+        print(f"\n[SIGNATURE 1] {sig1['role']}: {sig1['userId']}")
+        print(f"  Comment: {sig1['comment']}")
+
+        # 第二位簽名
+        sig2 = {
+            "userId": "sre-bob",
+            "role": "sre",
+            "signedAt": datetime.utcnow().isoformat(),
+            "comment": "Verified PDB. Safe to proceed.",
+        }
+        print(f"\n[SIGNATURE 2] {sig2['role']}: {sig2['userId']}")
+        print(f"  Comment: {sig2['comment']}")
+
+        # 批准結果
+        approval_result = {
+            "approvalId": approval_id,
+            "status": "APPROVED",
+            "signatures": [sig1, sig2],
+            "approvedAt": datetime.utcnow().isoformat(),
+        }
+
+        print(f"\n[APPROVAL STATUS] {approval_result['status']}")
+        print(f"  Signatures: {len(approval_result['signatures'])}/{approval_card['requiredSignatures']}")
+
+        self.log("multisig_approval", "PASS", approval_result)
+        return approval_result
+
+    # ==================== Step 5: MCP Execution ====================
+
+    async def step5_mcp_execution(self, approval_result: dict, approval_card: dict) -> dict:
+        """
+        Step 5: MCP 模擬執行
+
+        透過 MCP Bridge 執行操作
+        (Phase 3 為模擬，Phase 4+ 連接真實 K8s)
+        """
+        print("\n" + "=" * 60)
+        print("STEP 5: MCP EXECUTION")
+        print("=" * 60)
+
+        action = approval_card["action"]
+
+        # TOCTOU 保護: 再次執行 Dry-Run
+        print("\n[TOCTOU CHECK] Re-running dry-run before execution...")
+        toctou_passed = True  # Mock
+        print(f"  Result: {'PASSED' if toctou_passed else 'VOIDED'}")
+
+        if not toctou_passed:
+            self.log("mcp_execution", "FAIL", {"reason": "TOCTOU conflict detected"})
+            return {"status": "VOIDED"}
+
+        # MCP 執行
+        execution_result = {
+            "executionId": f"exec-{self.test_id}",
+            "operation": action["operation"],
+            "parameters": action["parameters"],
+            "status": "SUCCESS",
+            "output": {
+                "message": f"Pod {action['parameters']['pod_name']} restarted successfully",
+                "newPodName": "postgres-db-0",  # Same name after restart
+                "restartTime": "2.3s",
+            },
+            "executedAt": datetime.utcnow().isoformat(),
+        }
+
+        print(f"\n[EXECUTION RESULT]")
+        print(f"  Status: {execution_result['status']}")
+        print(f"  Output: {execution_result['output']['message']}")
+        print(f"  Restart Time: {execution_result['output']['restartTime']}")
+
+        # 更新 Trust Engine
+        print("\n[TRUST ENGINE] Recording approval for progressive autonomy...")
+        print("  Action Pattern: restart_pod:postgres-*")
+        print("  Trust Score: +1")
+
+        self.log("mcp_execution", "PASS", execution_result)
+        return execution_result
+
+    # ==================== Run All ====================
+
+    async def run(self):
+        """執行完整測試流程"""
+        print("\n" + "=" * 60)
+        print("TRACER BULLET 2.0 - FULL LOOP TEST")
+        print(f"Test ID: {self.test_id}")
+        print("=" * 60)
+
+        try:
+            # Step 1: Trigger Alert
+            alert = await self.step1_trigger_alert()
+
+            # Step 2: GraphRAG Analysis
+            analysis = await self.step2_graphrag_analysis(alert)
+
+            # Step 3: Dry-Run & Approval Card
+            approval_card = await self.step3_generate_approval(analysis)
+
+            # Step 4: Multi-Sig Approval
+            approval_result = await self.step4_multisig_approval(approval_card)
+
+            # Step 5: MCP Execution
+            execution_result = await self.step5_mcp_execution(approval_result, approval_card)
+
+            # Summary
+            print("\n" + "=" * 60)
+            print("TEST SUMMARY")
+            print("=" * 60)
+
+            passed = sum(1 for r in self.results if r["status"] == "PASS")
+            failed = sum(1 for r in self.results if r["status"] == "FAIL")
+
+            print(f"  Total Steps: {len(self.results)}")
+            print(f"  Passed: {passed}")
+            print(f"  Failed: {failed}")
+            print(f"  Status: {'ALL PASSED' if failed == 0 else 'SOME FAILED'}")
+
+            return {
+                "testId": self.test_id,
+                "status": "PASS" if failed == 0 else "FAIL",
+                "results": self.results,
+            }
+
+        except Exception as e:
+            self.log("unexpected_error", "FAIL", {"error": str(e)})
+            raise
+
+
+# ==================== Main ====================
+
+
+if __name__ == "__main__":
+    tracer = TracerBullet2()
+    asyncio.run(tracer.run())