Files
awoooi/apps/api/src/services/callback_action_spec.yaml
Your Name 433f7b068e
All checks were successful
CD Pipeline / tests (push) Successful in 2m7s
Code Review / ai-code-review (push) Successful in 42s
CD Pipeline / build-and-deploy (push) Successful in 13m14s
CD Pipeline / post-deploy-checks (push) Successful in 4m29s
fix(aiops): close ssh and telegram remediation gaps
2026-05-01 16:53:02 +08:00

479 lines
12 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Telegram 分類按鈕 Action 規格註冊表
# ============================================
# Phase 5 Sprint 5.0 — 2026-04-14 Claude Sonnet 4.6
# 相關: docs/superpowers/plans/2026-04-14-PHASE-5-category-buttons-completion.md
#
# 格式:
# actions:
# <action_name>:
# label: <UI 顯示文字>
# emoji: <按鈕 emoji>
# risk: low | medium | high | critical
# callback_format: info | nonce # info=2-part (查類), nonce=4-part (寫類)
# category: <alert_category> # 綁定於 classify_alert_early 輸出
# mcp:
# provider: k8s | ssh | prometheus | signoz | database | internal
# tool: <MCP tool name>
# params: # 參數模板 (支援 {incident_id}/{labels.xxx}/{signals[0].xxx})
# <key>: <template>
# reply_format: text | code | url | truncated # 執行結果 reply 樣式
# requires_multi_sig: false # secops 類設 true
# timeout_sec: 10 # MCP 呼叫 timeout
# description: <說明>
version: "1.0"
last_updated: "2026-05-01"
actions:
# ==========================================================================
# 查類按鈕無副作用2-part info callback
# ==========================================================================
check_process:
label: "查程序"
emoji: "🔍"
risk: low
callback_format: info
category: host_resource
mcp:
provider: ssh
tool: ssh_get_top_processes
params:
host: "{labels.instance}"
limit: 10
reply_format: code
timeout_sec: 8
description: "列出主機上 CPU 最高的 10 個程序"
check_log_nginx:
label: "查 Nginx Log"
emoji: "📋"
risk: low
callback_format: info
category: network
mcp:
provider: ssh
tool: ssh_get_nginx_error_log
params:
host: "{labels.instance}"
lines: 50
reply_format: truncated
timeout_sec: 10
description: "最後 50 行 Nginx error log"
check_log_container:
label: "查容器 Log"
emoji: "📋"
risk: low
callback_format: info
category: devops_tool
mcp:
provider: ssh
tool: ssh_get_container_logs
params:
host: "{labels.instance}"
container: "{labels.container}"
lines: 50
reply_format: truncated
timeout_sec: 10
description: "最後 50 行容器 stdout/stderr log"
check_log_minio:
label: "查 MinIO Log"
emoji: "📋"
risk: low
callback_format: info
category: storage
mcp:
provider: ssh
tool: ssh_get_container_logs
params:
host: "192.168.0.188"
container: "minio"
lines: 50
reply_format: truncated
timeout_sec: 10
description: "MinIO 容器最後 50 行 log"
check_port:
label: "查 Port"
emoji: "🔌"
risk: low
callback_format: info
category: network
mcp:
provider: ssh
tool: ssh_check_port
params:
host: "{labels.instance}"
port: "{labels.port}"
reply_format: text
timeout_sec: 5
description: "檢查主機上指定 port 是否 LISTEN"
check_health:
label: "查健康狀態"
emoji: "🔍"
risk: low
callback_format: info
category: external_site
mcp:
provider: prometheus
tool: prometheus_query
params:
query: 'probe_success{{instance="{labels.instance}"}}'
reply_format: text
timeout_sec: 5
description: "Blackbox probe 健康狀態"
check_pod_logs:
label: "查 Pod Log"
emoji: "📋"
risk: low
callback_format: info
category: kubernetes
mcp:
provider: k8s
tool: k8s_get_pod_logs
params:
namespace: "{labels.namespace}"
pod_name: "{labels.pod}"
tail: 50
reply_format: truncated
timeout_sec: 8
description: "Pod 最後 50 行 log"
describe_pod:
label: "詳細 Pod"
emoji: "📜"
risk: low
callback_format: info
category: kubernetes
mcp:
provider: k8s
tool: k8s_describe_pod
params:
namespace: "{labels.namespace}"
pod_name: "{labels.pod}"
reply_format: truncated
timeout_sec: 5
description: "kubectl describe pod 完整資訊"
open_signoz:
label: "查 SignOz"
emoji: "🔍"
risk: low
callback_format: info
category: business
mcp:
provider: internal
tool: build_signoz_url
params:
service: "{labels.service}"
reply_format: url
timeout_sec: 1
description: "返回 SignOz deeplink"
open_flywheel:
label: "飛輪面板"
emoji: "📊"
risk: low
callback_format: info
category: flywheel_health
mcp:
provider: internal
tool: build_flywheel_url
params: {}
reply_format: url
timeout_sec: 1
description: "返回飛輪儀表板 URL"
backup_check_host_disk:
label: "查主機磁碟"
emoji: "💾"
risk: low
callback_format: info
category: backup_failure
mcp:
provider: ssh
tool: ssh_get_disk_usage
params:
host: "{labels.instance}"
reply_format: code
timeout_sec: 8
description: "備份失敗時檢查主機磁碟容量與 Docker 目錄大小"
backup_check_jobs:
label: "查備份 Job"
emoji: "📦"
risk: low
callback_format: info
category: backup_failure
mcp:
provider: k8s
tool: kubectl_get
params:
namespace: "awoooi-prod"
resource: "jobs"
reply_format: truncated
timeout_sec: 8
description: "列出 awoooi-prod 內的備份相關 Job 狀態"
backup_check_velero:
label: "查 Velero"
emoji: "🧰"
risk: low
callback_format: info
category: backup_failure
mcp:
provider: k8s
tool: kubectl_get
params:
namespace: "velero"
resource: "backups.velero.io"
reply_format: truncated
timeout_sec: 8
description: "列出 Velero backup CR 狀態"
# ==========================================================================
# 寫類按鈕有副作用4-part nonce callback
# ==========================================================================
k8s_restart:
label: "重啟"
emoji: "🔄"
risk: medium
callback_format: nonce
category: kubernetes
mcp:
provider: k8s
tool: kubectl_restart
params:
namespace: "{labels.namespace}"
deployment: "{labels.deployment}"
reply_format: text
timeout_sec: 30
description: "kubectl rollout restart deployment"
k8s_scale_up:
label: "擴容 +1"
emoji: "📈"
risk: medium
callback_format: nonce
category: kubernetes
mcp:
provider: k8s
tool: kubectl_scale
params:
namespace: "{labels.namespace}"
deployment: "{labels.deployment}"
replicas_delta: 1
reply_format: text
timeout_sec: 10
description: "副本數 +1單調遞增"
k8s_scale_down:
label: "縮容 -1"
emoji: "📉"
risk: medium
callback_format: nonce
category: kubernetes
mcp:
provider: k8s
tool: kubectl_scale
params:
namespace: "{labels.namespace}"
deployment: "{labels.deployment}"
replicas_delta: -1
min_replicas: 1 # 禁止 scale to 0
reply_format: text
timeout_sec: 10
description: "副本數 -1禁止到 0"
k8s_rollback:
label: "回滾"
emoji: "⏪"
risk: high
callback_format: nonce
category: kubernetes
mcp:
provider: k8s
tool: kubectl_rollout_undo # 需 Sprint 5.3 新增此 MCP tool
params:
namespace: "{labels.namespace}"
deployment: "{labels.deployment}"
reply_format: text
timeout_sec: 60
description: "kubectl rollout undo回到上一版"
host_restart_service:
label: "重啟服務"
emoji: "🔄"
risk: high
callback_format: nonce
category: host_resource
mcp:
provider: ssh
tool: ssh_systemctl_restart
params:
host: "{labels.instance}"
service: "{labels.service}"
reply_format: text
timeout_sec: 30
description: "systemctl restart 服務(主機層)"
host_clear_log:
label: "清 Log"
emoji: "🗑"
risk: low
callback_format: nonce
category: host_resource
mcp:
provider: ssh
tool: ssh_clear_docker_logs
params:
host: "{labels.instance}"
container: "{labels.container}"
reply_format: text
timeout_sec: 10
description: "清空容器 log file"
docker_restart:
label: "重啟容器"
emoji: "🔄"
risk: high
callback_format: nonce
category: devops_tool
mcp:
provider: ssh
tool: ssh_docker_restart
params:
host: "{labels.instance}"
container: "{labels.container}"
reply_format: text
timeout_sec: 30
description: "docker restart <container>"
minio_restart:
label: "重啟 MinIO"
emoji: "🔄"
risk: high
callback_format: nonce
category: storage
mcp:
provider: ssh
tool: ssh_docker_restart
params:
host: "192.168.0.188"
container: "minio"
reply_format: text
timeout_sec: 30
description: "重啟 MinIO Docker 容器"
reload_nginx:
label: "重載 Nginx"
emoji: "🔄"
risk: low
callback_format: nonce
category: network
mcp:
provider: ssh
tool: ssh_reload_nginx
params:
host: "{labels.instance}"
reply_format: text
timeout_sec: 10
description: "nginx -s reload不停機"
renew_cert:
label: "更新憑證"
emoji: "🔐"
risk: medium
callback_format: nonce
category: ssl_cert
mcp:
provider: ssh
tool: ssh_renew_ssl
params:
host: "{labels.instance}"
domain: "{labels.domain}"
reply_format: text
timeout_sec: 60
description: "certbot renew"
# ==========================================================================
# 資安按鈕CRITICALMulti-Sig 必要)
# ==========================================================================
secops_isolate:
label: "申請隔離"
emoji: "🚫"
risk: critical
callback_format: nonce
category: secops
requires_multi_sig: true
mcp:
provider: internal
tool: record_authorization
params:
user_id: "{callback.user_id}"
source: "{labels.instance}"
action: "request_network_isolation"
reply_format: text
timeout_sec: 30
description: "記錄雙人授權隔離請求;實際 NetworkPolicy 變更需走 SecOps Runbook"
secops_block_ip:
label: "封鎖來源 IP"
emoji: "⛔"
risk: critical
callback_format: nonce
category: secops
requires_multi_sig: true
mcp:
provider: internal
tool: record_authorization
params:
user_id: "{callback.user_id}"
source: "{labels.instance}"
source_ip: "{labels.attacker_ip}"
action: "request_ip_block"
reply_format: text
timeout_sec: 30
description: "iptables DROP 來源 IP"
secops_evict:
label: "強制驅逐 Pod"
emoji: "🔄"
risk: critical
callback_format: nonce
category: secops
requires_multi_sig: true
mcp:
provider: k8s
tool: kubectl_delete
params:
namespace: "{labels.namespace}"
resource_type: pod
name: "{labels.pod}"
reply_format: text
timeout_sec: 30
description: "kubectl delete pod會自動重建"
secops_authorize:
label: "確認授權"
emoji: "✅"
risk: high
callback_format: nonce
category: secops
requires_multi_sig: false # 確認授權不需 multi-sig
mcp:
provider: internal
tool: record_authorization
params:
user_id: "{callback.user_id}"
source: "{labels.instance}"
reply_format: text
timeout_sec: 5
description: "記錄授權,後續同 source 不再告警24h"