163 lines
8.5 KiB
JSON
163 lines
8.5 KiB
JSON
{
|
||
"name": "事件管理與 Runbook 執行",
|
||
"nodes": [
|
||
{
|
||
"parameters": {
|
||
"httpMethod": "POST",
|
||
"path": "incident",
|
||
"options": {}
|
||
},
|
||
"id": "webhook",
|
||
"name": "事件接收器",
|
||
"type": "n8n-nodes-base.webhook",
|
||
"typeVersion": 2,
|
||
"position": [240, 300]
|
||
},
|
||
{
|
||
"parameters": {
|
||
"jsCode": "const incident = $input.first().json;\nconst incidentId = `INC-${Date.now()}`;\nconst now = new Date().toLocaleString('zh-TW', { timeZone: 'Asia/Taipei' });\n\n// 事件類型與對應 Runbook\nconst runbooks = {\n 'pod_crash': {\n name: 'Pod 崩潰',\n steps: [\n '1. 檢查 Pod 日誌: kubectl logs -n momo <pod-name> --previous',\n '2. 檢查資源使用: kubectl top pod -n momo',\n '3. 檢查 OOM: kubectl describe pod -n momo <pod-name> | grep -A5 \"Last State\"',\n '4. 嘗試重啟: kubectl rollout restart deployment/momo-app -n momo',\n '5. 如持續失敗,檢查映像和配置'\n ],\n autoFix: true,\n command: 'kubectl rollout restart deployment/momo-app -n momo'\n },\n 'db_connection': {\n name: '資料庫連線問題',\n steps: [\n '1. 檢查 PostgreSQL Pod: kubectl get pod -n momo -l app=momo-postgres',\n '2. 檢查連線數: kubectl exec -n momo momo-postgres-0 -- psql -U momo -c \"SELECT count(*) FROM pg_stat_activity;\"',\n '3. 重啟 PostgreSQL: kubectl rollout restart statefulset/momo-postgres -n momo',\n '4. 檢查應用連線池配置'\n ],\n autoFix: false\n },\n 'high_latency': {\n name: '高延遲',\n steps: [\n '1. 檢查 CPU/Memory: kubectl top pod -n momo',\n '2. 檢查慢查詢: 透過 API /api/system/db/slow_queries',\n '3. 執行 VACUUM ANALYZE: kubectl exec -n momo momo-postgres-0 -- psql -U momo -d momo_analytics -c \"VACUUM ANALYZE;\"',\n '4. 考慮增加 Pod 副本數'\n ],\n autoFix: true,\n command: 'kubectl exec -n momo momo-postgres-0 -- psql -U momo -d momo_analytics -c \"VACUUM ANALYZE;\"'\n },\n 'disk_full': {\n name: '磁碟空間不足',\n steps: [\n '1. 檢查磁碟使用: df -h',\n '2. 清理 Docker: docker system prune -af',\n '3. 清理舊日誌: find /var/log -type f -mtime +7 -delete',\n '4. 清理舊備份: find /home/wooo/backups -type f -mtime +30 -delete'\n ],\n autoFix: true,\n command: 'docker system prune -af && journalctl --vacuum-time=7d'\n },\n 'ssl_expiry': {\n name: 'SSL 證書即將到期',\n steps: [\n '1. 檢查證書到期日: openssl x509 -enddate -noout -in /etc/letsencrypt/live/*/fullchain.pem',\n '2. 手動更新證書: certbot renew',\n '3. 重啟 Nginx: systemctl restart nginx'\n ],\n autoFix: false\n }\n};\n\nconst incidentType = incident.type || 'unknown';\nconst runbook = runbooks[incidentType] || {\n name: '未知事件',\n steps: ['請聯繫系統管理員進行診斷'],\n autoFix: false\n};\n\nreturn [{\n json: {\n incidentId,\n type: incidentType,\n runbook,\n environment: incident.environment || 'UAT',\n service: incident.service || 'unknown',\n description: incident.description || '',\n severity: incident.severity || 'warning',\n timestamp: now,\n canAutoFix: runbook.autoFix,\n autoFixCommand: runbook.command || null\n }\n}];"
|
||
},
|
||
"id": "match-runbook",
|
||
"name": "匹配 Runbook",
|
||
"type": "n8n-nodes-base.code",
|
||
"typeVersion": 2,
|
||
"position": [460, 300]
|
||
},
|
||
{
|
||
"parameters": {
|
||
"jsCode": "const incident = $input.first().json;\n\nlet message = `🚨 <b>事件通知 #${incident.incidentId}</b>\\n\\n`;\nmessage += `📍 環境: ${incident.environment}\\n`;\nmessage += `🔧 服務: ${incident.service}\\n`;\nmessage += `📋 類型: ${incident.runbook.name}\\n`;\nmessage += `📝 描述: ${incident.description}\\n\\n`;\n\nmessage += `<b>📚 Runbook 步驟:</b>\\n`;\nincident.runbook.steps.forEach(step => {\n message += `${step}\\n`;\n});\n\nif (incident.canAutoFix) {\n message += `\\n✅ 自動修復: 可用\\n`;\n message += `🔄 正在嘗試自動修復...\\n`;\n} else {\n message += `\\n⚠️ 需要人工介入\\n`;\n}\n\nmessage += `\\n⏰ 時間: ${incident.timestamp}`;\n\nreturn [{ json: { ...incident, notificationMessage: message } }];"
|
||
},
|
||
"id": "format-notification",
|
||
"name": "格式化通知",
|
||
"type": "n8n-nodes-base.code",
|
||
"typeVersion": 2,
|
||
"position": [680, 300]
|
||
},
|
||
{
|
||
"parameters": {
|
||
"url": "https://api.telegram.org/bot<TELEGRAM_BOT_TOKEN>/sendMessage",
|
||
"sendBody": true,
|
||
"specifyBody": "json",
|
||
"jsonBody": "={\n \"chat_id\": \"5619078117\",\n \"parse_mode\": \"HTML\",\n \"text\": \"{{ $json.notificationMessage }}\"\n}",
|
||
"options": {}
|
||
},
|
||
"id": "telegram-notify",
|
||
"name": "發送事件通知",
|
||
"type": "n8n-nodes-base.httpRequest",
|
||
"typeVersion": 4.2,
|
||
"position": [900, 300]
|
||
},
|
||
{
|
||
"parameters": {
|
||
"conditions": {
|
||
"options": {
|
||
"caseSensitive": true,
|
||
"leftValue": "",
|
||
"typeValidation": "strict"
|
||
},
|
||
"conditions": [
|
||
{
|
||
"id": "can-autofix",
|
||
"leftValue": "={{ $json.canAutoFix }}",
|
||
"rightValue": true,
|
||
"operator": {
|
||
"type": "boolean",
|
||
"operation": "equals"
|
||
}
|
||
}
|
||
],
|
||
"combinator": "and"
|
||
},
|
||
"options": {}
|
||
},
|
||
"id": "if-autofix",
|
||
"name": "可自動修復?",
|
||
"type": "n8n-nodes-base.if",
|
||
"typeVersion": 2,
|
||
"position": [1120, 300]
|
||
},
|
||
{
|
||
"parameters": {
|
||
"command": "={{ $json.autoFixCommand }}",
|
||
"cwd": "/home/wooo"
|
||
},
|
||
"id": "execute-fix",
|
||
"name": "執行修復",
|
||
"type": "n8n-nodes-base.executeCommand",
|
||
"typeVersion": 1,
|
||
"position": [1340, 200],
|
||
"continueOnFail": true
|
||
},
|
||
{
|
||
"parameters": {
|
||
"jsCode": "const incident = $('format-notification').first().json;\nconst fixResult = $input.first().json;\nconst now = new Date().toLocaleString('zh-TW', { timeZone: 'Asia/Taipei' });\n\nconst success = fixResult.exitCode === 0;\n\nlet message = '';\nif (success) {\n message = `✅ <b>事件 #${incident.incidentId} 自動修復成功</b>\\n\\n`;\n message += `📋 類型: ${incident.runbook.name}\\n`;\n message += `🔧 執行: ${incident.autoFixCommand}\\n`;\n message += `⏰ 時間: ${now}`;\n} else {\n message = `🔴 <b>事件 #${incident.incidentId} 自動修復失敗</b>\\n\\n`;\n message += `📋 類型: ${incident.runbook.name}\\n`;\n message += `❌ 錯誤: ${fixResult.stderr || fixResult.stdout}\\n`;\n message += `⚠️ 需要人工介入\\n`;\n message += `⏰ 時間: ${now}`;\n}\n\nreturn [{ json: { message } }];"
|
||
},
|
||
"id": "process-fix-result",
|
||
"name": "處理修復結果",
|
||
"type": "n8n-nodes-base.code",
|
||
"typeVersion": 2,
|
||
"position": [1560, 200]
|
||
},
|
||
{
|
||
"parameters": {
|
||
"url": "https://api.telegram.org/bot<TELEGRAM_BOT_TOKEN>/sendMessage",
|
||
"sendBody": true,
|
||
"specifyBody": "json",
|
||
"jsonBody": "={\n \"chat_id\": \"5619078117\",\n \"parse_mode\": \"HTML\",\n \"text\": \"{{ $json.message }}\"\n}",
|
||
"options": {}
|
||
},
|
||
"id": "telegram-result",
|
||
"name": "發送修復結果",
|
||
"type": "n8n-nodes-base.httpRequest",
|
||
"typeVersion": 4.2,
|
||
"position": [1780, 200]
|
||
}
|
||
],
|
||
"connections": {
|
||
"事件接收器": {
|
||
"main": [
|
||
[{ "node": "匹配 Runbook", "type": "main", "index": 0 }]
|
||
]
|
||
},
|
||
"匹配 Runbook": {
|
||
"main": [
|
||
[{ "node": "格式化通知", "type": "main", "index": 0 }]
|
||
]
|
||
},
|
||
"格式化通知": {
|
||
"main": [
|
||
[{ "node": "發送事件通知", "type": "main", "index": 0 }]
|
||
]
|
||
},
|
||
"發送事件通知": {
|
||
"main": [
|
||
[{ "node": "可自動修復?", "type": "main", "index": 0 }]
|
||
]
|
||
},
|
||
"可自動修復?": {
|
||
"main": [
|
||
[{ "node": "執行修復", "type": "main", "index": 0 }],
|
||
[]
|
||
]
|
||
},
|
||
"執行修復": {
|
||
"main": [
|
||
[{ "node": "處理修復結果", "type": "main", "index": 0 }]
|
||
]
|
||
},
|
||
"處理修復結果": {
|
||
"main": [
|
||
[{ "node": "發送修復結果", "type": "main", "index": 0 }]
|
||
]
|
||
}
|
||
},
|
||
"settings": {
|
||
"executionOrder": "v1"
|
||
},
|
||
"staticData": null,
|
||
"tags": [],
|
||
"triggerCount": 0,
|
||
"updatedAt": "2026-02-07T17:00:00.000Z",
|
||
"versionId": "1"
|
||
}
|