Files
ewoooc/n8n-workflows/15-k8s-health-monitor.json
OoO d6d8777e41
All checks were successful
CD Pipeline / deploy (push) Successful in 1m12s
V10.601 收斂 Gemini 與密鑰治理
2026-06-06 14:52:46 +08:00

457 lines
12 KiB
JSON

{
"name": "K8s MOMO App 健康監控 (含自動修復)",
"nodes": [
{
"parameters": {
"rule": {
"interval": [
{
"field": "minutes",
"minutesInterval": 5
}
]
}
},
"id": "schedule-trigger",
"name": "每 5 分鐘執行",
"type": "n8n-nodes-base.scheduleTrigger",
"typeVersion": 1.2,
"position": [240, 300]
},
{
"parameters": {
"url": "https://mo.wooo.work/health",
"options": {
"timeout": 15000
}
},
"id": "check-app-health",
"name": "檢查 MOMO App Health",
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [460, 300],
"continueOnFail": true
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"leftValue": "",
"typeValidation": "strict"
},
"conditions": [
{
"id": "health-check",
"leftValue": "={{ $json.status === 'healthy' }}",
"rightValue": true,
"operator": {
"type": "boolean",
"operation": "equals"
}
}
],
"combinator": "and"
}
},
"id": "check-healthy",
"name": "服務健康?",
"type": "n8n-nodes-base.if",
"typeVersion": 2,
"position": [680, 300]
},
{
"parameters": {},
"id": "no-op-healthy",
"name": "服務正常",
"type": "n8n-nodes-base.noOp",
"typeVersion": 1,
"position": [900, 200]
},
{
"parameters": {
"jsCode": "// 分析失敗原因\nconst result = $('檢查 MOMO App Health').first();\n\nlet issue = {\n type: 'unknown',\n error: '',\n needsRepair: true\n};\n\nif (result.json && result.json.error) {\n const errorMsg = result.json.error || '';\n \n if (errorMsg.includes('504') || errorMsg.includes('Gateway')) {\n issue.type = 'gateway_timeout';\n issue.error = '504 Gateway Timeout - App 或 DNS 問題';\n } else if (errorMsg.includes('502')) {\n issue.type = 'bad_gateway';\n issue.error = '502 Bad Gateway - App 無回應';\n } else if (errorMsg.includes('ECONNREFUSED')) {\n issue.type = 'connection_refused';\n issue.error = '連線被拒絕 - App 未啟動';\n } else if (errorMsg.includes('timeout')) {\n issue.type = 'timeout';\n issue.error = '請求超時';\n } else {\n issue.error = errorMsg;\n }\n} else if (!result.json || !result.json.status) {\n issue.type = 'no_response';\n issue.error = '無回應或回應格式錯誤';\n}\n\nreturn {\n timestamp: new Date().toISOString(),\n issue: issue,\n rawResponse: result.json\n};"
},
"id": "analyze-failure",
"name": "分析失敗原因",
"type": "n8n-nodes-base.code",
"typeVersion": 2,
"position": [900, 400]
},
{
"parameters": {
"chatId": "5619078117",
"text": "=🔴 <b>MOMO App 服務異常</b>\n\n問題類型: {{ $json.issue.type }}\n錯誤: {{ $json.issue.error }}\n\n<b>自動修復中...</b>\n時間: {{ new Date().toLocaleString('zh-TW', {timeZone: 'Asia/Taipei'}) }}",
"additionalFields": {
"parse_mode": "HTML"
}
},
"id": "alert-failure",
"name": "發送異常告警",
"type": "n8n-nodes-base.telegram",
"typeVersion": 1.2,
"position": [1120, 400],
"credentials": {
"telegramApi": {
"id": "telegram-bot",
"name": "Telegram Bot"
}
}
},
{
"parameters": {
"authentication": "predefinedCredentialType",
"nodeCredentialType": "sshPassword",
"resource": "command",
"command": "echo '<LOGIN_PASSWORD>' | sudo -S kubectl rollout restart deployment momo-app -n momo && sleep 60 && curl -s -o /dev/null -w '%{http_code}' https://mo.wooo.work/health"
},
"id": "repair-step-1",
"name": "修復步驟1: 重啟 Pod",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [1340, 300],
"credentials": {
"sshPassword": {
"id": "uat-ssh",
"name": "UAT SSH"
}
},
"continueOnFail": true
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"leftValue": "",
"typeValidation": "strict"
},
"conditions": [
{
"id": "step1-success",
"leftValue": "={{ $json.stdout.includes('200') }}",
"rightValue": true,
"operator": {
"type": "boolean",
"operation": "equals"
}
}
],
"combinator": "and"
}
},
"id": "step1-success-check",
"name": "步驟1成功?",
"type": "n8n-nodes-base.if",
"typeVersion": 2,
"position": [1560, 300]
},
{
"parameters": {
"authentication": "predefinedCredentialType",
"nodeCredentialType": "sshPassword",
"resource": "command",
"command": "echo '<LOGIN_PASSWORD>' | sudo -S kubectl rollout restart deployment coredns -n kube-system && sleep 30 && sudo kubectl delete pods -l app=momo-app -n momo --force --grace-period=0 && sleep 60 && curl -s -o /dev/null -w '%{http_code}' https://mo.wooo.work/health"
},
"id": "repair-step-2",
"name": "修復步驟2: CoreDNS + 強制刪除 Pod",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [1780, 400],
"credentials": {
"sshPassword": {
"id": "uat-ssh",
"name": "UAT SSH"
}
},
"continueOnFail": true
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"leftValue": "",
"typeValidation": "strict"
},
"conditions": [
{
"id": "step2-success",
"leftValue": "={{ $json.stdout.includes('200') }}",
"rightValue": true,
"operator": {
"type": "boolean",
"operation": "equals"
}
}
],
"combinator": "and"
}
},
"id": "step2-success-check",
"name": "步驟2成功?",
"type": "n8n-nodes-base.if",
"typeVersion": 2,
"position": [2000, 400]
},
{
"parameters": {
"authentication": "predefinedCredentialType",
"nodeCredentialType": "sshPassword",
"resource": "command",
"command": "echo '<LOGIN_PASSWORD>' | sudo -S kubectl scale deployment momo-app -n momo --replicas=0 && sleep 10 && sudo kubectl scale deployment momo-app -n momo --replicas=1 && sleep 90 && curl -s -o /dev/null -w '%{http_code}' https://mo.wooo.work/health"
},
"id": "repair-step-3",
"name": "修復步驟3: 重建 Deployment",
"type": "n8n-nodes-base.ssh",
"typeVersion": 1,
"position": [2220, 500],
"credentials": {
"sshPassword": {
"id": "uat-ssh",
"name": "UAT SSH"
}
},
"continueOnFail": true
},
{
"parameters": {
"conditions": {
"options": {
"caseSensitive": true,
"leftValue": "",
"typeValidation": "strict"
},
"conditions": [
{
"id": "step3-success",
"leftValue": "={{ $json.stdout.includes('200') }}",
"rightValue": true,
"operator": {
"type": "boolean",
"operation": "equals"
}
}
],
"combinator": "and"
}
},
"id": "step3-success-check",
"name": "步驟3成功?",
"type": "n8n-nodes-base.if",
"typeVersion": 2,
"position": [2440, 500]
},
{
"parameters": {
"chatId": "5619078117",
"text": "=🟢 <b>MOMO App 已恢復</b>\n\n修復步驟: {{ $('步驟1成功?').isExecuted ? '1 (重啟 Pod)' : ($('步驟2成功?').isExecuted ? '2 (CoreDNS + 強制刪除)' : '3 (重建 Deployment)') }}\n\n時間: {{ new Date().toLocaleString('zh-TW', {timeZone: 'Asia/Taipei'}) }}",
"additionalFields": {
"parse_mode": "HTML"
}
},
"id": "notify-success",
"name": "通知修復成功",
"type": "n8n-nodes-base.telegram",
"typeVersion": 1.2,
"position": [2660, 300],
"credentials": {
"telegramApi": {
"id": "telegram-bot",
"name": "Telegram Bot"
}
}
},
{
"parameters": {
"chatId": "5619078117",
"text": "=🔴 <b>MOMO App 修復失敗</b>\n\n已嘗試 3 種修復方法均失敗\n\n<b>需要人工介入</b>\nSSH: ssh wooo@192.168.0.110\n\n檢查指令:\nkubectl get pods -n momo\nkubectl logs deployment/momo-app -n momo\nkubectl describe pod -l app=momo-app -n momo\n\n時間: {{ new Date().toLocaleString('zh-TW', {timeZone: 'Asia/Taipei'}) }}",
"additionalFields": {
"parse_mode": "HTML"
}
},
"id": "notify-failure",
"name": "通知修復失敗",
"type": "n8n-nodes-base.telegram",
"typeVersion": 1.2,
"position": [2660, 600],
"credentials": {
"telegramApi": {
"id": "telegram-bot",
"name": "Telegram Bot"
}
}
}
],
"connections": {
"每 5 分鐘執行": {
"main": [
[
{
"node": "檢查 MOMO App Health",
"type": "main",
"index": 0
}
]
]
},
"檢查 MOMO App Health": {
"main": [
[
{
"node": "服務健康?",
"type": "main",
"index": 0
}
]
]
},
"服務健康?": {
"main": [
[
{
"node": "服務正常",
"type": "main",
"index": 0
}
],
[
{
"node": "分析失敗原因",
"type": "main",
"index": 0
}
]
]
},
"分析失敗原因": {
"main": [
[
{
"node": "發送異常告警",
"type": "main",
"index": 0
}
]
]
},
"發送異常告警": {
"main": [
[
{
"node": "修復步驟1: 重啟 Pod",
"type": "main",
"index": 0
}
]
]
},
"修復步驟1: 重啟 Pod": {
"main": [
[
{
"node": "步驟1成功?",
"type": "main",
"index": 0
}
]
]
},
"步驟1成功?": {
"main": [
[
{
"node": "通知修復成功",
"type": "main",
"index": 0
}
],
[
{
"node": "修復步驟2: CoreDNS + 強制刪除 Pod",
"type": "main",
"index": 0
}
]
]
},
"修復步驟2: CoreDNS + 強制刪除 Pod": {
"main": [
[
{
"node": "步驟2成功?",
"type": "main",
"index": 0
}
]
]
},
"步驟2成功?": {
"main": [
[
{
"node": "通知修復成功",
"type": "main",
"index": 0
}
],
[
{
"node": "修復步驟3: 重建 Deployment",
"type": "main",
"index": 0
}
]
]
},
"修復步驟3: 重建 Deployment": {
"main": [
[
{
"node": "步驟3成功?",
"type": "main",
"index": 0
}
]
]
},
"步驟3成功?": {
"main": [
[
{
"node": "通知修復成功",
"type": "main",
"index": 0
}
],
[
{
"node": "通知修復失敗",
"type": "main",
"index": 0
}
]
]
}
},
"active": true,
"settings": {
"executionOrder": "v1"
},
"tags": [
{
"name": "monitoring"
},
{
"name": "k8s"
},
{
"name": "auto-repair"
}
],
"pinData": {},
"meta": {
"instanceId": "momo-pro-system"
}
}