Phase 6.4 - Modular Architecture: - Add lewooogo-brain adapters for LLM providers - Add lewooogo-data dual memory (Redis + PostgreSQL) - Implement consensus engine for multi-agent decisions - Add incident memory service for historical context Phase 9 - Agent Teams (Claude Agent SDK): - Add base agent class with Claude Sonnet 4 integration - Implement action planner, blast radius, and security agents - Add agent API endpoints and proposal workflow - Integrate ADR-009 OpenClaw Agent Teams architecture DevOps & CI/CD: - Add GitHub Actions CI/CD workflows (ci.yaml, cd.yaml) - Add pre-commit hooks and secrets baseline - Add docker-compose for local development - Update Kubernetes network policies Frontend Improvements: - Add auto-healing error boundary component - Update i18n messages for agent features - Enhance dual-state incident card with execution feedback Documentation: - Add 7 ADRs covering MCP, design system, architecture decisions - Update ARCHITECTURE_MEMORY.md with modular design - Add GLOBAL_RULES.md and SOUL.md for project identity Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
187 lines
6.7 KiB
YAML
187 lines
6.7 KiB
YAML
# =============================================================================
|
||
# Prometheus Alertmanager → AWOOOI Webhook 對接設定
|
||
# =============================================================================
|
||
#
|
||
# 統帥戰略 C: 影子模式 (Shadow Mode) 實彈接線
|
||
#
|
||
# 此設定檔指導如何將真實的 Prometheus Alertmanager
|
||
# 指向 AWOOOI OpenClaw Webhook 端點
|
||
#
|
||
# 安全要求:
|
||
# 1. 必須設定 HMAC Secret (WEBHOOK_HMAC_SECRET)
|
||
# 2. 生產環境強制驗證簽章 (Fail-Closed)
|
||
# 3. 影子模式預設開啟 (SHADOW_MODE_ENABLED=true)
|
||
#
|
||
# =============================================================================
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# alertmanager.yml 範例設定
|
||
# -----------------------------------------------------------------------------
|
||
# 位置: /etc/alertmanager/alertmanager.yml (K3s ConfigMap)
|
||
# -----------------------------------------------------------------------------
|
||
|
||
global:
|
||
resolve_timeout: 5m
|
||
|
||
route:
|
||
group_by: ['alertname', 'namespace', 'deployment']
|
||
group_wait: 30s
|
||
group_interval: 5m
|
||
repeat_interval: 4h
|
||
receiver: 'awoooi-openclaw'
|
||
|
||
# 路由規則: 依據嚴重度分流
|
||
routes:
|
||
# Critical 告警立即發送
|
||
- match:
|
||
severity: critical
|
||
receiver: 'awoooi-openclaw'
|
||
group_wait: 10s
|
||
repeat_interval: 1h
|
||
|
||
# Warning 告警稍微聚合
|
||
- match:
|
||
severity: warning
|
||
receiver: 'awoooi-openclaw'
|
||
group_wait: 1m
|
||
|
||
receivers:
|
||
- name: 'awoooi-openclaw'
|
||
webhook_configs:
|
||
- url: 'http://192.168.0.188:8000/api/v1/webhooks/alerts'
|
||
send_resolved: true
|
||
max_alerts: 10
|
||
|
||
# =======================================================================
|
||
# HMAC 簽章設定 (CISO 要求)
|
||
# =======================================================================
|
||
# Alertmanager 原生不支援 HMAC,需透過以下方式實現:
|
||
#
|
||
# 方案 A: 使用 http_config 的 authorization (Bearer Token)
|
||
# http_config:
|
||
# authorization:
|
||
# type: Bearer
|
||
# credentials: '<your-hmac-token>'
|
||
#
|
||
# 方案 B: 使用外部轉發服務 (推薦)
|
||
# 部署一個輕量級 sidecar 來計算 HMAC 並注入 X-Signature-256 Header
|
||
# 見下方 hmac-sidecar 說明
|
||
# =======================================================================
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# K3s ConfigMap 部署範例
|
||
# -----------------------------------------------------------------------------
|
||
# kubectl apply -f - <<EOF
|
||
# apiVersion: v1
|
||
# kind: ConfigMap
|
||
# metadata:
|
||
# name: alertmanager-config
|
||
# namespace: monitoring
|
||
# data:
|
||
# alertmanager.yml: |
|
||
# <上述設定內容>
|
||
# EOF
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# HMAC Sidecar 範例 (Go)
|
||
# -----------------------------------------------------------------------------
|
||
# 如果需要 HMAC 簽章,可部署此 sidecar:
|
||
#
|
||
# 流程: Alertmanager → HMAC Sidecar → AWOOOI Webhook
|
||
#
|
||
# 環境變數:
|
||
# WEBHOOK_TARGET_URL: http://192.168.0.188:8000/api/v1/webhooks/alerts
|
||
# WEBHOOK_HMAC_SECRET: <your-secret>
|
||
#
|
||
# Docker Image: ghcr.io/awoooi/hmac-sidecar:latest (待建置)
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# K8s Alert Rules 範例 (PrometheusRule CRD)
|
||
# -----------------------------------------------------------------------------
|
||
# apiVersion: monitoring.coreos.com/v1
|
||
# kind: PrometheusRule
|
||
# metadata:
|
||
# name: awoooi-alerts
|
||
# namespace: monitoring
|
||
# spec:
|
||
# groups:
|
||
# - name: k8s-pod-alerts
|
||
# rules:
|
||
# - alert: PodCrashLooping
|
||
# expr: |
|
||
# increase(kube_pod_container_status_restarts_total[1h]) > 3
|
||
# for: 5m
|
||
# labels:
|
||
# severity: warning
|
||
# alert_type: k8s_pod_crash
|
||
# annotations:
|
||
# summary: "Pod {{ $labels.pod }} 發生 CrashLoop"
|
||
# description: "Pod 在過去 1 小時重啟超過 3 次"
|
||
#
|
||
# - alert: PodHighCPU
|
||
# expr: |
|
||
# sum(rate(container_cpu_usage_seconds_total{container!=""}[5m])) by (pod, namespace)
|
||
# / sum(kube_pod_container_resource_limits{resource="cpu"}) by (pod, namespace) > 0.9
|
||
# for: 10m
|
||
# labels:
|
||
# severity: warning
|
||
# alert_type: high_cpu
|
||
# annotations:
|
||
# summary: "Pod {{ $labels.pod }} CPU 超過 90%"
|
||
#
|
||
# - alert: PodHighMemory
|
||
# expr: |
|
||
# sum(container_memory_working_set_bytes{container!=""}) by (pod, namespace)
|
||
# / sum(kube_pod_container_resource_limits{resource="memory"}) by (pod, namespace) > 0.9
|
||
# for: 10m
|
||
# labels:
|
||
# severity: warning
|
||
# alert_type: high_memory
|
||
# annotations:
|
||
# summary: "Pod {{ $labels.pod }} Memory 超過 90%"
|
||
#
|
||
# - alert: NodeDiskPressure
|
||
# expr: kube_node_status_condition{condition="DiskPressure",status="true"} == 1
|
||
# for: 5m
|
||
# labels:
|
||
# severity: critical
|
||
# alert_type: disk_full
|
||
# annotations:
|
||
# summary: "Node {{ $labels.node }} 磁碟壓力過高"
|
||
|
||
# -----------------------------------------------------------------------------
|
||
# 測試指令
|
||
# -----------------------------------------------------------------------------
|
||
# 1. 模擬發送告警 (無 HMAC,僅限 dev 環境):
|
||
#
|
||
# curl -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts \
|
||
# -H "Content-Type: application/json" \
|
||
# -d '{
|
||
# "alert_type": "k8s_pod_crash",
|
||
# "severity": "warning",
|
||
# "source": "prometheus",
|
||
# "target_resource": "test-pod-123",
|
||
# "namespace": "default",
|
||
# "message": "Manual test alert"
|
||
# }'
|
||
#
|
||
# 2. 帶 HMAC 簽章發送 (生產環境):
|
||
#
|
||
# SECRET="your-hmac-secret"
|
||
# PAYLOAD='{"alert_type":"k8s_pod_crash","severity":"warning","source":"prometheus","target_resource":"test-pod-123","namespace":"default","message":"HMAC test"}'
|
||
# SIGNATURE=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "$SECRET" | awk '{print $2}')
|
||
#
|
||
# curl -X POST http://192.168.0.188:8000/api/v1/webhooks/alerts \
|
||
# -H "Content-Type: application/json" \
|
||
# -H "X-Signature-256: sha256=$SIGNATURE" \
|
||
# -d "$PAYLOAD"
|
||
#
|
||
# -----------------------------------------------------------------------------
|
||
# 驗證影子模式
|
||
# -----------------------------------------------------------------------------
|
||
# 查看 AWOOOI API 日誌,確認出現:
|
||
# shadow_mode_intercept | operation=DELETE_POD | message=[SHADOW MODE]
|
||
#
|
||
# 這表示 AI 決策已觸發,但 K8s 操作被安全攔截
|
||
# =============================================================================
|