457 lines
12 KiB
JSON
457 lines
12 KiB
JSON
{
|
|
"name": "K8s MOMO App 健康監控 (含自動修復)",
|
|
"nodes": [
|
|
{
|
|
"parameters": {
|
|
"rule": {
|
|
"interval": [
|
|
{
|
|
"field": "minutes",
|
|
"minutesInterval": 5
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"id": "schedule-trigger",
|
|
"name": "每 5 分鐘執行",
|
|
"type": "n8n-nodes-base.scheduleTrigger",
|
|
"typeVersion": 1.2,
|
|
"position": [240, 300]
|
|
},
|
|
{
|
|
"parameters": {
|
|
"url": "https://mo.wooo.work/health",
|
|
"options": {
|
|
"timeout": 15000
|
|
}
|
|
},
|
|
"id": "check-app-health",
|
|
"name": "檢查 MOMO App Health",
|
|
"type": "n8n-nodes-base.httpRequest",
|
|
"typeVersion": 4.2,
|
|
"position": [460, 300],
|
|
"continueOnFail": true
|
|
},
|
|
{
|
|
"parameters": {
|
|
"conditions": {
|
|
"options": {
|
|
"caseSensitive": true,
|
|
"leftValue": "",
|
|
"typeValidation": "strict"
|
|
},
|
|
"conditions": [
|
|
{
|
|
"id": "health-check",
|
|
"leftValue": "={{ $json.status === 'healthy' }}",
|
|
"rightValue": true,
|
|
"operator": {
|
|
"type": "boolean",
|
|
"operation": "equals"
|
|
}
|
|
}
|
|
],
|
|
"combinator": "and"
|
|
}
|
|
},
|
|
"id": "check-healthy",
|
|
"name": "服務健康?",
|
|
"type": "n8n-nodes-base.if",
|
|
"typeVersion": 2,
|
|
"position": [680, 300]
|
|
},
|
|
{
|
|
"parameters": {},
|
|
"id": "no-op-healthy",
|
|
"name": "服務正常",
|
|
"type": "n8n-nodes-base.noOp",
|
|
"typeVersion": 1,
|
|
"position": [900, 200]
|
|
},
|
|
{
|
|
"parameters": {
|
|
"jsCode": "// 分析失敗原因\nconst result = $('檢查 MOMO App Health').first();\n\nlet issue = {\n type: 'unknown',\n error: '',\n needsRepair: true\n};\n\nif (result.json && result.json.error) {\n const errorMsg = result.json.error || '';\n \n if (errorMsg.includes('504') || errorMsg.includes('Gateway')) {\n issue.type = 'gateway_timeout';\n issue.error = '504 Gateway Timeout - App 或 DNS 問題';\n } else if (errorMsg.includes('502')) {\n issue.type = 'bad_gateway';\n issue.error = '502 Bad Gateway - App 無回應';\n } else if (errorMsg.includes('ECONNREFUSED')) {\n issue.type = 'connection_refused';\n issue.error = '連線被拒絕 - App 未啟動';\n } else if (errorMsg.includes('timeout')) {\n issue.type = 'timeout';\n issue.error = '請求超時';\n } else {\n issue.error = errorMsg;\n }\n} else if (!result.json || !result.json.status) {\n issue.type = 'no_response';\n issue.error = '無回應或回應格式錯誤';\n}\n\nreturn {\n timestamp: new Date().toISOString(),\n issue: issue,\n rawResponse: result.json\n};"
|
|
},
|
|
"id": "analyze-failure",
|
|
"name": "分析失敗原因",
|
|
"type": "n8n-nodes-base.code",
|
|
"typeVersion": 2,
|
|
"position": [900, 400]
|
|
},
|
|
{
|
|
"parameters": {
|
|
"chatId": "5619078117",
|
|
"text": "=🔴 <b>MOMO App 服務異常</b>\n\n問題類型: {{ $json.issue.type }}\n錯誤: {{ $json.issue.error }}\n\n<b>自動修復中...</b>\n時間: {{ new Date().toLocaleString('zh-TW', {timeZone: 'Asia/Taipei'}) }}",
|
|
"additionalFields": {
|
|
"parse_mode": "HTML"
|
|
}
|
|
},
|
|
"id": "alert-failure",
|
|
"name": "發送異常告警",
|
|
"type": "n8n-nodes-base.telegram",
|
|
"typeVersion": 1.2,
|
|
"position": [1120, 400],
|
|
"credentials": {
|
|
"telegramApi": {
|
|
"id": "telegram-bot",
|
|
"name": "Telegram Bot"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"parameters": {
|
|
"authentication": "predefinedCredentialType",
|
|
"nodeCredentialType": "sshPassword",
|
|
"resource": "command",
|
|
"command": "echo '<LOGIN_PASSWORD>' | sudo -S kubectl rollout restart deployment momo-app -n momo && sleep 60 && curl -s -o /dev/null -w '%{http_code}' https://mo.wooo.work/health"
|
|
},
|
|
"id": "repair-step-1",
|
|
"name": "修復步驟1: 重啟 Pod",
|
|
"type": "n8n-nodes-base.ssh",
|
|
"typeVersion": 1,
|
|
"position": [1340, 300],
|
|
"credentials": {
|
|
"sshPassword": {
|
|
"id": "uat-ssh",
|
|
"name": "UAT SSH"
|
|
}
|
|
},
|
|
"continueOnFail": true
|
|
},
|
|
{
|
|
"parameters": {
|
|
"conditions": {
|
|
"options": {
|
|
"caseSensitive": true,
|
|
"leftValue": "",
|
|
"typeValidation": "strict"
|
|
},
|
|
"conditions": [
|
|
{
|
|
"id": "step1-success",
|
|
"leftValue": "={{ $json.stdout.includes('200') }}",
|
|
"rightValue": true,
|
|
"operator": {
|
|
"type": "boolean",
|
|
"operation": "equals"
|
|
}
|
|
}
|
|
],
|
|
"combinator": "and"
|
|
}
|
|
},
|
|
"id": "step1-success-check",
|
|
"name": "步驟1成功?",
|
|
"type": "n8n-nodes-base.if",
|
|
"typeVersion": 2,
|
|
"position": [1560, 300]
|
|
},
|
|
{
|
|
"parameters": {
|
|
"authentication": "predefinedCredentialType",
|
|
"nodeCredentialType": "sshPassword",
|
|
"resource": "command",
|
|
"command": "echo '<LOGIN_PASSWORD>' | sudo -S kubectl rollout restart deployment coredns -n kube-system && sleep 30 && sudo kubectl delete pods -l app=momo-app -n momo --force --grace-period=0 && sleep 60 && curl -s -o /dev/null -w '%{http_code}' https://mo.wooo.work/health"
|
|
},
|
|
"id": "repair-step-2",
|
|
"name": "修復步驟2: CoreDNS + 強制刪除 Pod",
|
|
"type": "n8n-nodes-base.ssh",
|
|
"typeVersion": 1,
|
|
"position": [1780, 400],
|
|
"credentials": {
|
|
"sshPassword": {
|
|
"id": "uat-ssh",
|
|
"name": "UAT SSH"
|
|
}
|
|
},
|
|
"continueOnFail": true
|
|
},
|
|
{
|
|
"parameters": {
|
|
"conditions": {
|
|
"options": {
|
|
"caseSensitive": true,
|
|
"leftValue": "",
|
|
"typeValidation": "strict"
|
|
},
|
|
"conditions": [
|
|
{
|
|
"id": "step2-success",
|
|
"leftValue": "={{ $json.stdout.includes('200') }}",
|
|
"rightValue": true,
|
|
"operator": {
|
|
"type": "boolean",
|
|
"operation": "equals"
|
|
}
|
|
}
|
|
],
|
|
"combinator": "and"
|
|
}
|
|
},
|
|
"id": "step2-success-check",
|
|
"name": "步驟2成功?",
|
|
"type": "n8n-nodes-base.if",
|
|
"typeVersion": 2,
|
|
"position": [2000, 400]
|
|
},
|
|
{
|
|
"parameters": {
|
|
"authentication": "predefinedCredentialType",
|
|
"nodeCredentialType": "sshPassword",
|
|
"resource": "command",
|
|
"command": "echo '<LOGIN_PASSWORD>' | sudo -S kubectl scale deployment momo-app -n momo --replicas=0 && sleep 10 && sudo kubectl scale deployment momo-app -n momo --replicas=1 && sleep 90 && curl -s -o /dev/null -w '%{http_code}' https://mo.wooo.work/health"
|
|
},
|
|
"id": "repair-step-3",
|
|
"name": "修復步驟3: 重建 Deployment",
|
|
"type": "n8n-nodes-base.ssh",
|
|
"typeVersion": 1,
|
|
"position": [2220, 500],
|
|
"credentials": {
|
|
"sshPassword": {
|
|
"id": "uat-ssh",
|
|
"name": "UAT SSH"
|
|
}
|
|
},
|
|
"continueOnFail": true
|
|
},
|
|
{
|
|
"parameters": {
|
|
"conditions": {
|
|
"options": {
|
|
"caseSensitive": true,
|
|
"leftValue": "",
|
|
"typeValidation": "strict"
|
|
},
|
|
"conditions": [
|
|
{
|
|
"id": "step3-success",
|
|
"leftValue": "={{ $json.stdout.includes('200') }}",
|
|
"rightValue": true,
|
|
"operator": {
|
|
"type": "boolean",
|
|
"operation": "equals"
|
|
}
|
|
}
|
|
],
|
|
"combinator": "and"
|
|
}
|
|
},
|
|
"id": "step3-success-check",
|
|
"name": "步驟3成功?",
|
|
"type": "n8n-nodes-base.if",
|
|
"typeVersion": 2,
|
|
"position": [2440, 500]
|
|
},
|
|
{
|
|
"parameters": {
|
|
"chatId": "5619078117",
|
|
"text": "=🟢 <b>MOMO App 已恢復</b>\n\n修復步驟: {{ $('步驟1成功?').isExecuted ? '1 (重啟 Pod)' : ($('步驟2成功?').isExecuted ? '2 (CoreDNS + 強制刪除)' : '3 (重建 Deployment)') }}\n\n時間: {{ new Date().toLocaleString('zh-TW', {timeZone: 'Asia/Taipei'}) }}",
|
|
"additionalFields": {
|
|
"parse_mode": "HTML"
|
|
}
|
|
},
|
|
"id": "notify-success",
|
|
"name": "通知修復成功",
|
|
"type": "n8n-nodes-base.telegram",
|
|
"typeVersion": 1.2,
|
|
"position": [2660, 300],
|
|
"credentials": {
|
|
"telegramApi": {
|
|
"id": "telegram-bot",
|
|
"name": "Telegram Bot"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"parameters": {
|
|
"chatId": "5619078117",
|
|
"text": "=🔴 <b>MOMO App 修復失敗</b>\n\n已嘗試 3 種修復方法均失敗\n\n<b>需要人工介入</b>\nSSH: ssh wooo@192.168.0.110\n\n檢查指令:\nkubectl get pods -n momo\nkubectl logs deployment/momo-app -n momo\nkubectl describe pod -l app=momo-app -n momo\n\n時間: {{ new Date().toLocaleString('zh-TW', {timeZone: 'Asia/Taipei'}) }}",
|
|
"additionalFields": {
|
|
"parse_mode": "HTML"
|
|
}
|
|
},
|
|
"id": "notify-failure",
|
|
"name": "通知修復失敗",
|
|
"type": "n8n-nodes-base.telegram",
|
|
"typeVersion": 1.2,
|
|
"position": [2660, 600],
|
|
"credentials": {
|
|
"telegramApi": {
|
|
"id": "telegram-bot",
|
|
"name": "Telegram Bot"
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"connections": {
|
|
"每 5 分鐘執行": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "檢查 MOMO App Health",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"檢查 MOMO App Health": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "服務健康?",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"服務健康?": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "服務正常",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
],
|
|
[
|
|
{
|
|
"node": "分析失敗原因",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"分析失敗原因": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "發送異常告警",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"發送異常告警": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "修復步驟1: 重啟 Pod",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"修復步驟1: 重啟 Pod": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "步驟1成功?",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"步驟1成功?": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "通知修復成功",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
],
|
|
[
|
|
{
|
|
"node": "修復步驟2: CoreDNS + 強制刪除 Pod",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"修復步驟2: CoreDNS + 強制刪除 Pod": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "步驟2成功?",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"步驟2成功?": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "通知修復成功",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
],
|
|
[
|
|
{
|
|
"node": "修復步驟3: 重建 Deployment",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"修復步驟3: 重建 Deployment": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "步驟3成功?",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
},
|
|
"步驟3成功?": {
|
|
"main": [
|
|
[
|
|
{
|
|
"node": "通知修復成功",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
],
|
|
[
|
|
{
|
|
"node": "通知修復失敗",
|
|
"type": "main",
|
|
"index": 0
|
|
}
|
|
]
|
|
]
|
|
}
|
|
},
|
|
"active": true,
|
|
"settings": {
|
|
"executionOrder": "v1"
|
|
},
|
|
"tags": [
|
|
{
|
|
"name": "monitoring"
|
|
},
|
|
{
|
|
"name": "k8s"
|
|
},
|
|
{
|
|
"name": "auto-repair"
|
|
}
|
|
],
|
|
"pinData": {},
|
|
"meta": {
|
|
"instanceId": "momo-pro-system"
|
|
}
|
|
}
|