Files
ewoooc/n8n-workflows/26-incident-runbook.json
OoO d6d8777e41
All checks were successful
CD Pipeline / deploy (push) Successful in 1m12s
V10.601 收斂 Gemini 與密鑰治理
2026-06-06 14:52:46 +08:00

163 lines
8.5 KiB
JSON
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"name": "事件管理與 Runbook 執行",
"nodes": [
{
"parameters": {
"httpMethod": "POST",
"path": "incident",
"options": {}
},
"id": "webhook",
"name": "事件接收器",
"type": "n8n-nodes-base.webhook",
"typeVersion": 2,
"position": [240, 300]
},
{
"parameters": {
"jsCode": "const incident = $input.first().json;\nconst incidentId = `INC-${Date.now()}`;\nconst now = new Date().toLocaleString('zh-TW', { timeZone: 'Asia/Taipei' });\n\n// 事件類型與對應 Runbook\nconst runbooks = {\n 'pod_crash': {\n name: 'Pod 崩潰',\n steps: [\n '1. 檢查 Pod 日誌: kubectl logs -n momo <pod-name> --previous',\n '2. 檢查資源使用: kubectl top pod -n momo',\n '3. 檢查 OOM: kubectl describe pod -n momo <pod-name> | grep -A5 \"Last State\"',\n '4. 嘗試重啟: kubectl rollout restart deployment/momo-app -n momo',\n '5. 如持續失敗,檢查映像和配置'\n ],\n autoFix: true,\n command: 'kubectl rollout restart deployment/momo-app -n momo'\n },\n 'db_connection': {\n name: '資料庫連線問題',\n steps: [\n '1. 檢查 PostgreSQL Pod: kubectl get pod -n momo -l app=momo-postgres',\n '2. 檢查連線數: kubectl exec -n momo momo-postgres-0 -- psql -U momo -c \"SELECT count(*) FROM pg_stat_activity;\"',\n '3. 重啟 PostgreSQL: kubectl rollout restart statefulset/momo-postgres -n momo',\n '4. 檢查應用連線池配置'\n ],\n autoFix: false\n },\n 'high_latency': {\n name: '高延遲',\n steps: [\n '1. 檢查 CPU/Memory: kubectl top pod -n momo',\n '2. 檢查慢查詢: 透過 API /api/system/db/slow_queries',\n '3. 執行 VACUUM ANALYZE: kubectl exec -n momo momo-postgres-0 -- psql -U momo -d momo_analytics -c \"VACUUM ANALYZE;\"',\n '4. 考慮增加 Pod 副本數'\n ],\n autoFix: true,\n command: 'kubectl exec -n momo momo-postgres-0 -- psql -U momo -d momo_analytics -c \"VACUUM ANALYZE;\"'\n },\n 'disk_full': {\n name: '磁碟空間不足',\n steps: [\n '1. 檢查磁碟使用: df -h',\n '2. 清理 Docker: docker system prune -af',\n '3. 清理舊日誌: find /var/log -type f -mtime +7 -delete',\n '4. 清理舊備份: find /home/wooo/backups -type f -mtime +30 -delete'\n ],\n autoFix: true,\n command: 'docker system prune -af && journalctl --vacuum-time=7d'\n },\n 'ssl_expiry': {\n name: 'SSL 證書即將到期',\n steps: [\n '1. 檢查證書到期日: openssl x509 -enddate -noout -in /etc/letsencrypt/live/*/fullchain.pem',\n '2. 手動更新證書: certbot renew',\n '3. 重啟 Nginx: systemctl restart nginx'\n ],\n autoFix: false\n }\n};\n\nconst incidentType = incident.type || 'unknown';\nconst runbook = runbooks[incidentType] || {\n name: '未知事件',\n steps: ['請聯繫系統管理員進行診斷'],\n autoFix: false\n};\n\nreturn [{\n json: {\n incidentId,\n type: incidentType,\n runbook,\n environment: incident.environment || 'UAT',\n service: incident.service || 'unknown',\n description: incident.description || '',\n severity: incident.severity || 'warning',\n timestamp: now,\n canAutoFix: runbook.autoFix,\n autoFixCommand: runbook.command || null\n }\n}];"
},
"id": "match-runbook",
"name": "匹配 Runbook",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [460, 300]
},
{
"parameters": {
"jsCode": "const incident = $input.first().json;\n\nlet message = `🚨 <b>事件通知 #${incident.incidentId}</b>\\n\\n`;\nmessage += `📍 環境: ${incident.environment}\\n`;\nmessage += `🔧 服務: ${incident.service}\\n`;\nmessage += `📋 類型: ${incident.runbook.name}\\n`;\nmessage += `📝 描述: ${incident.description}\\n\\n`;\n\nmessage += `<b>📚 Runbook 步驟:</b>\\n`;\nincident.runbook.steps.forEach(step => {\n message += `${step}\\n`;\n});\n\nif (incident.canAutoFix) {\n message += `\\n✅ 自動修復: 可用\\n`;\n message += `🔄 正在嘗試自動修復...\\n`;\n} else {\n message += `\\n⚠ 需要人工介入\\n`;\n}\n\nmessage += `\\n⏰ 時間: ${incident.timestamp}`;\n\nreturn [{ json: { ...incident, notificationMessage: message } }];"
},
"id": "format-notification",
"name": "格式化通知",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [680, 300]
},
{
"parameters": {
"url": "https://api.telegram.org/bot<TELEGRAM_BOT_TOKEN>/sendMessage",
"sendBody": true,
"specifyBody": "json",
"jsonBody": "={\n \"chat_id\": \"5619078117\",\n \"parse_mode\": \"HTML\",\n \"text\": \"{{ $json.notificationMessage }}\"\n}",
"options": {}
},
"id": "telegram-notify",
"name": "發送事件通知",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [900, 300]
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"leftValue": "",
"typeValidation": "strict"
},
"conditions": [
{
"id": "can-autofix",
"leftValue": "={{ $json.canAutoFix }}",
"rightValue": true,
"operator": {
"type": "boolean",
"operation": "equals"
}
}
],
"combinator": "and"
},
"options": {}
},
"id": "if-autofix",
"name": "可自動修復?",
"type": "n8n-nodes-base.if",
"typeVersion": 2,
"position": [1120, 300]
},
{
"parameters": {
"command": "={{ $json.autoFixCommand }}",
"cwd": "/home/wooo"
},
"id": "execute-fix",
"name": "執行修復",
"type": "n8n-nodes-base.executeCommand",
"typeVersion": 1,
"position": [1340, 200],
"continueOnFail": true
},
{
"parameters": {
"jsCode": "const incident = $('format-notification').first().json;\nconst fixResult = $input.first().json;\nconst now = new Date().toLocaleString('zh-TW', { timeZone: 'Asia/Taipei' });\n\nconst success = fixResult.exitCode === 0;\n\nlet message = '';\nif (success) {\n message = `✅ <b>事件 #${incident.incidentId} 自動修復成功</b>\\n\\n`;\n message += `📋 類型: ${incident.runbook.name}\\n`;\n message += `🔧 執行: ${incident.autoFixCommand}\\n`;\n message += `⏰ 時間: ${now}`;\n} else {\n message = `🔴 <b>事件 #${incident.incidentId} 自動修復失敗</b>\\n\\n`;\n message += `📋 類型: ${incident.runbook.name}\\n`;\n message += `❌ 錯誤: ${fixResult.stderr || fixResult.stdout}\\n`;\n message += `⚠️ 需要人工介入\\n`;\n message += `⏰ 時間: ${now}`;\n}\n\nreturn [{ json: { message } }];"
},
"id": "process-fix-result",
"name": "處理修復結果",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [1560, 200]
},
{
"parameters": {
"url": "https://api.telegram.org/bot<TELEGRAM_BOT_TOKEN>/sendMessage",
"sendBody": true,
"specifyBody": "json",
"jsonBody": "={\n \"chat_id\": \"5619078117\",\n \"parse_mode\": \"HTML\",\n \"text\": \"{{ $json.message }}\"\n}",
"options": {}
},
"id": "telegram-result",
"name": "發送修復結果",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [1780, 200]
}
],
"connections": {
"事件接收器": {
"main": [
[{ "node": "匹配 Runbook", "type": "main", "index": 0 }]
]
},
"匹配 Runbook": {
"main": [
[{ "node": "格式化通知", "type": "main", "index": 0 }]
]
},
"格式化通知": {
"main": [
[{ "node": "發送事件通知", "type": "main", "index": 0 }]
]
},
"發送事件通知": {
"main": [
[{ "node": "可自動修復?", "type": "main", "index": 0 }]
]
},
"可自動修復?": {
"main": [
[{ "node": "執行修復", "type": "main", "index": 0 }],
[]
]
},
"執行修復": {
"main": [
[{ "node": "處理修復結果", "type": "main", "index": 0 }]
]
},
"處理修復結果": {
"main": [
[{ "node": "發送修復結果", "type": "main", "index": 0 }]
]
}
},
"settings": {
"executionOrder": "v1"
},
"staticData": null,
"tags": [],
"triggerCount": 0,
"updatedAt": "2026-02-07T17:00:00.000Z",
"versionId": "1"
}