問題: 清理腳本刪除了 $RUNNER_TEMP/* 包含 _runner_file_commands 結果: "Missing file at path: _runner_file_commands/set_output_xxx" 修正: - 移除 rm -rf $RUNNER_TEMP/* (會刪除關鍵檔案) - Pre-flight: 使用 find 排除 _runner_file_commands - 其他 Jobs: 只清理 _diag/pages Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
494 lines
21 KiB
YAML
494 lines
21 KiB
YAML
# =============================================================================
|
||
# AWOOOI CD Pipeline v2.0 (完整沿用 AIOPS 最佳實踐)
|
||
# =============================================================================
|
||
# 優化項目:
|
||
# 1. Pre-flight Check (10s Fail-Fast)
|
||
# 2. Runner 標籤 [self-hosted, harbor, k8s]
|
||
# 3. dorny/paths-filter 精確路徑偵測
|
||
# 4. API + Web 並行建構
|
||
# 5. timeout-minutes 防止卡死
|
||
# 6. Telegram + OpenClaw 通知
|
||
# 7. force_deploy 強制重建選項
|
||
# =============================================================================
|
||
|
||
name: CD
|
||
|
||
on:
|
||
push:
|
||
branches: [main]
|
||
paths-ignore:
|
||
- 'docs/**'
|
||
- '*.md'
|
||
workflow_dispatch:
|
||
inputs:
|
||
force_deploy:
|
||
description: '強制部署 (跳過路徑偵測)'
|
||
type: boolean
|
||
default: false
|
||
skip_api:
|
||
description: '跳過 API 建構'
|
||
type: boolean
|
||
default: false
|
||
skip_web:
|
||
description: '跳過 Web 建構'
|
||
type: boolean
|
||
default: false
|
||
|
||
concurrency:
|
||
group: cd-${{ github.workflow }}-${{ github.ref }}
|
||
# 2026-03-26: 改為等待而非取消,避免 Runner _diag/pages 檔案衝突
|
||
cancel-in-progress: false
|
||
|
||
env:
|
||
REGISTRY: 192.168.0.110:5000
|
||
IMAGE_PREFIX: library/awoooi
|
||
LOCAL_CACHE_DIR: /home/wooo/build-cache/awoooi
|
||
OPENCLAW_URL: http://192.168.0.188:8088
|
||
# OTEL CI/CD 監控 (2026-03-24 批准, 2026-03-28 修正: SignOz 在 188)
|
||
OTEL_EXPORTER_OTLP_ENDPOINT: http://192.168.0.188:24318
|
||
OTEL_SERVICE_NAME: awoooi-cd
|
||
OTEL_RESOURCE_ATTRIBUTES: service.version=${{ github.sha }},deployment.environment=production
|
||
|
||
jobs:
|
||
# ==================== Pre-flight Check (10s Fail-Fast) ====================
|
||
pre-flight-check:
|
||
name: "Pre-flight Check"
|
||
runs-on: [self-hosted, harbor, k8s]
|
||
timeout-minutes: 1
|
||
steps:
|
||
# =======================================================================
|
||
# 2026-03-29: Runner _diag/pages 檔案衝突永久修復
|
||
# 問題: 並行 Job 寫入同一診斷檔案導致 "file already exists"
|
||
# 解法: 強制清理 + flock 鎖定 + 重建目錄
|
||
# =======================================================================
|
||
- name: "Clean Runner Diagnostics (Anti-Collision)"
|
||
run: |
|
||
set +e # 不因清理失敗而中斷
|
||
|
||
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
|
||
DIAG_DIR="$RUNNER_ROOT/_diag"
|
||
PAGES_DIR="$DIAG_DIR/pages"
|
||
LOCK_FILE="/tmp/runner-diag-cleanup.lock"
|
||
|
||
echo "🧹 Cleaning Runner diagnostics..."
|
||
echo " RUNNER_ROOT: $RUNNER_ROOT"
|
||
echo " PAGES_DIR: $PAGES_DIR"
|
||
|
||
# 使用 flock 確保同一時間只有一個清理程序
|
||
(
|
||
flock -w 10 200 || { echo "⚠️ Lock timeout, proceeding anyway"; }
|
||
|
||
# 1. 清理 _diag/pages (最關鍵)
|
||
if [ -d "$PAGES_DIR" ]; then
|
||
# 刪除所有 .log 檔案
|
||
find "$PAGES_DIR" -name "*.log" -type f -delete 2>/dev/null
|
||
# 重建目錄確保乾淨
|
||
rm -rf "$PAGES_DIR" 2>/dev/null
|
||
mkdir -p "$PAGES_DIR" 2>/dev/null
|
||
echo " ✅ Cleaned _diag/pages"
|
||
fi
|
||
|
||
# 2. 清理 RUNNER_TEMP (排除 _runner_file_commands)
|
||
# 注意: 不能刪除整個目錄,否則會破壞 Runner 內部通訊
|
||
find "$RUNNER_TEMP" -mindepth 1 -maxdepth 1 ! -name "_runner_file_commands" -exec rm -rf {} \; 2>/dev/null || true
|
||
echo " ✅ Cleaned RUNNER_TEMP (preserved _runner_file_commands)"
|
||
|
||
# 3. 清理 Claude worktrees
|
||
rm -rf .claude/worktrees 2>/dev/null
|
||
|
||
# 4. 清理陳舊的 _work 暫存
|
||
find "$RUNNER_ROOT/_work" -name "*.tmp" -mmin +30 -delete 2>/dev/null || true
|
||
|
||
) 200>"$LOCK_FILE"
|
||
|
||
echo "✅ Runner cleanup completed"
|
||
|
||
# =======================================================================
|
||
# ADR-035: Telegram 告警鏈路強制驗證
|
||
# 2026-03-29 Claude Code: 修復 Secrets 遺漏導致告警失效問題
|
||
# =======================================================================
|
||
- name: "Check Required Secrets"
|
||
run: |
|
||
MISSING=""
|
||
# 基礎 Secrets
|
||
if [ -z "${{ secrets.HARBOR_USER }}" ]; then MISSING="${MISSING}HARBOR_USER "; fi
|
||
if [ -z "${{ secrets.HARBOR_PASSWORD }}" ]; then MISSING="${MISSING}HARBOR_PASSWORD "; fi
|
||
if [ -z "${{ secrets.KUBE_CONFIG_PROD }}" ]; then MISSING="${MISSING}KUBE_CONFIG_PROD "; fi
|
||
# 🔴 Telegram Secrets (ADR-035 強制)
|
||
if [ -z "${{ secrets.OPENCLAW_TG_BOT_TOKEN }}" ]; then MISSING="${MISSING}OPENCLAW_TG_BOT_TOKEN "; fi
|
||
if [ -z "${{ secrets.OPENCLAW_TG_CHAT_ID }}" ]; then MISSING="${MISSING}OPENCLAW_TG_CHAT_ID "; fi
|
||
if [ -n "$MISSING" ]; then
|
||
echo "❌ 缺少 Secrets: ${MISSING}"
|
||
echo "🔴 告警鏈路將無法運作!請檢查 GitHub Secrets 配置"
|
||
exit 1
|
||
fi
|
||
echo "✅ Secrets 檢查通過 (含 Telegram)"
|
||
|
||
- name: "Check Harbor Connectivity"
|
||
run: |
|
||
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 \
|
||
"http://${{ env.REGISTRY }}/v2/" 2>/dev/null || echo "000")
|
||
if [ "$HTTP_CODE" != "200" ] && [ "$HTTP_CODE" != "401" ]; then
|
||
echo "❌ Harbor 無法連線 (HTTP $HTTP_CODE)"
|
||
exit 1
|
||
fi
|
||
echo "✅ Harbor 連線正常"
|
||
|
||
- name: "Check kubectl"
|
||
run: |
|
||
export PATH="/home/wooo/bin:$PATH"
|
||
if ! which kubectl > /dev/null 2>&1; then
|
||
echo "❌ kubectl 不在 PATH"
|
||
exit 1
|
||
fi
|
||
echo "✅ kubectl 可用"
|
||
|
||
- name: "Notify Pre-flight Failure"
|
||
if: failure()
|
||
run: |
|
||
curl -sf -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
|
||
-d chat_id="${{ secrets.OPENCLAW_TG_CHAT_ID }}" \
|
||
-d text="❌ AWOOOI Pre-flight 失敗%0A%0A🔗 ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" || true
|
||
|
||
# ==================== 路徑偵測 (使用 dorny/paths-filter) ====================
|
||
detect-changes:
|
||
name: Detect Changes
|
||
runs-on: [self-hosted, harbor, k8s]
|
||
needs: pre-flight-check
|
||
timeout-minutes: 1
|
||
outputs:
|
||
api: ${{ inputs.force_deploy == true && 'true' || steps.filter.outputs.api }}
|
||
web: ${{ inputs.force_deploy == true && 'true' || steps.filter.outputs.web }}
|
||
k3s-system: ${{ steps.filter.outputs.k3s-system }}
|
||
steps:
|
||
# 2026-03-29: Runner 診斷檔案清理 (防止並行衝突)
|
||
# 注意: 不能刪除 $RUNNER_TEMP/* 因為包含 _runner_file_commands
|
||
- name: "Clean Runner Diagnostics"
|
||
run: |
|
||
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
|
||
rm -rf "$RUNNER_ROOT/_diag/pages" .claude/worktrees 2>/dev/null || true
|
||
mkdir -p "$RUNNER_ROOT/_diag/pages" 2>/dev/null || true
|
||
|
||
- uses: actions/checkout@v4
|
||
with:
|
||
clean: true
|
||
|
||
- uses: dorny/paths-filter@v3
|
||
id: filter
|
||
with:
|
||
filters: |
|
||
api:
|
||
- 'apps/api/**'
|
||
- 'packages/**'
|
||
- 'pyproject.toml'
|
||
web:
|
||
- 'apps/web/**'
|
||
- 'packages/**'
|
||
- 'package.json'
|
||
- 'pnpm-lock.yaml'
|
||
k3s-system:
|
||
- 'k8s/k3s-system/**'
|
||
|
||
# ==================== 並行建構 API ====================
|
||
build-api:
|
||
name: "Build API"
|
||
runs-on: [self-hosted, harbor, k8s]
|
||
needs: detect-changes
|
||
timeout-minutes: 20 # 2026-03-29: 增加超時時間 (Docker Build 需要更長時間)
|
||
if: |
|
||
!inputs.skip_api && (
|
||
needs.detect-changes.outputs.api == 'true' ||
|
||
(needs.detect-changes.outputs.api == 'false' && needs.detect-changes.outputs.web == 'false')
|
||
)
|
||
outputs:
|
||
image_tag: ${{ steps.tag.outputs.tag }}
|
||
steps:
|
||
# 2026-03-29: Runner 診斷檔案清理 (防止並行衝突)
|
||
# 注意: 不能刪除 $RUNNER_TEMP/* 因為包含 _runner_file_commands
|
||
- name: "Clean Runner Diagnostics"
|
||
run: |
|
||
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
|
||
rm -rf "$RUNNER_ROOT/_diag/pages" .claude/worktrees 2>/dev/null || true
|
||
mkdir -p "$RUNNER_ROOT/_diag/pages" 2>/dev/null || true
|
||
|
||
- uses: actions/checkout@v4
|
||
|
||
- name: Generate tag
|
||
id: tag
|
||
run: echo "tag=$(git rev-parse --short HEAD)-${{ github.run_id }}" >> $GITHUB_OUTPUT
|
||
|
||
- name: Login to Harbor
|
||
run: echo "${{ secrets.HARBOR_PASSWORD }}" | docker login ${{ env.REGISTRY }} -u ${{ secrets.HARBOR_USER }} --password-stdin
|
||
|
||
- name: Build & Push (Native BuildKit)
|
||
env:
|
||
DOCKER_BUILDKIT: 1
|
||
run: |
|
||
docker build --push \
|
||
--tag ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-api:${{ steps.tag.outputs.tag }} \
|
||
--file apps/api/Dockerfile .
|
||
echo "✅ API: ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-api:${{ steps.tag.outputs.tag }}"
|
||
|
||
# ==================== 並行建構 Web ====================
|
||
build-web:
|
||
name: "Build Web"
|
||
runs-on: [self-hosted, harbor, k8s]
|
||
needs: detect-changes
|
||
timeout-minutes: 20 # 2026-03-29: 增加超時時間 (Next.js Build 需要更長時間)
|
||
if: |
|
||
!inputs.skip_web && (
|
||
needs.detect-changes.outputs.web == 'true' ||
|
||
(needs.detect-changes.outputs.api == 'false' && needs.detect-changes.outputs.web == 'false')
|
||
)
|
||
outputs:
|
||
image_tag: ${{ steps.tag.outputs.tag }}
|
||
steps:
|
||
# 2026-03-29: Runner 診斷檔案清理 (防止並行衝突)
|
||
# 注意: 不能刪除 $RUNNER_TEMP/* 因為包含 _runner_file_commands
|
||
- name: "Clean Runner Diagnostics"
|
||
run: |
|
||
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
|
||
rm -rf "$RUNNER_ROOT/_diag/pages" .claude/worktrees 2>/dev/null || true
|
||
mkdir -p "$RUNNER_ROOT/_diag/pages" 2>/dev/null || true
|
||
|
||
- uses: actions/checkout@v4
|
||
|
||
- name: Generate tag
|
||
id: tag
|
||
run: echo "tag=$(git rev-parse --short HEAD)-${{ github.run_id }}" >> $GITHUB_OUTPUT
|
||
|
||
- name: Login to Harbor
|
||
run: echo "${{ secrets.HARBOR_PASSWORD }}" | docker login ${{ env.REGISTRY }} -u ${{ secrets.HARBOR_USER }} --password-stdin
|
||
|
||
- name: Restore Next.js cache
|
||
run: |
|
||
mkdir -p apps/web/.next/cache
|
||
[ -d "${{ env.LOCAL_CACHE_DIR }}/nextjs" ] && cp -r ${{ env.LOCAL_CACHE_DIR }}/nextjs/* apps/web/.next/cache/ 2>/dev/null || true
|
||
|
||
- name: Build & Push (Native BuildKit)
|
||
env:
|
||
DOCKER_BUILDKIT: 1
|
||
run: |
|
||
docker build --push \
|
||
--build-arg NEXT_PUBLIC_API_URL=https://awoooi.wooo.work \
|
||
--build-arg NEXT_PUBLIC_SENTRY_DSN=http://da02d4e5d6542e4d1ed6b2dd6542efeb@192.168.0.110:9000/2 \
|
||
--tag ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-web:${{ steps.tag.outputs.tag }} \
|
||
--file apps/web/Dockerfile .
|
||
echo "✅ Web: ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-web:${{ steps.tag.outputs.tag }}"
|
||
|
||
- name: Save Next.js cache
|
||
run: |
|
||
mkdir -p ${{ env.LOCAL_CACHE_DIR }}/nextjs
|
||
[ -d "apps/web/.next/cache" ] && cp -r apps/web/.next/cache/* ${{ env.LOCAL_CACHE_DIR }}/nextjs/ 2>/dev/null || true
|
||
|
||
# ==================== Deploy ====================
|
||
deploy-prod:
|
||
name: Deploy to Production
|
||
runs-on: [self-hosted, harbor, k8s]
|
||
needs: [detect-changes, build-api, build-web]
|
||
timeout-minutes: 10
|
||
if: always() && (needs.build-api.result == 'success' || needs.build-api.result == 'skipped') && (needs.build-web.result == 'success' || needs.build-web.result == 'skipped')
|
||
environment: production
|
||
steps:
|
||
# 2026-03-29: Runner 診斷檔案清理 (防止並行衝突)
|
||
# 注意: 不能刪除 $RUNNER_TEMP/* 因為包含 _runner_file_commands
|
||
- name: "Clean Runner Diagnostics"
|
||
run: |
|
||
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
|
||
rm -rf "$RUNNER_ROOT/_diag/pages" .claude/worktrees 2>/dev/null || true
|
||
mkdir -p "$RUNNER_ROOT/_diag/pages" 2>/dev/null || true
|
||
|
||
- uses: actions/checkout@v4
|
||
with:
|
||
clean: true
|
||
|
||
- name: Setup
|
||
run: |
|
||
mkdir -p ~/.kube
|
||
echo "${{ secrets.KUBE_CONFIG_PROD }}" | base64 -d > ~/.kube/config
|
||
chmod 600 ~/.kube/config
|
||
export PATH="/home/wooo/bin:$HOME/.local/bin:$PATH"
|
||
echo "/home/wooo/bin" >> $GITHUB_PATH
|
||
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||
|
||
- name: Generate tag
|
||
id: tag
|
||
run: echo "tag=$(git rev-parse --short HEAD)-${{ github.run_id }}" >> $GITHUB_OUTPUT
|
||
|
||
# =======================================================================
|
||
# ADR-035: K8s Secrets 自動注入 (Telegram + API Keys)
|
||
# 2026-03-29 Claude Code: 修復 Secrets 遺漏導致告警失效
|
||
# 🔴 鐵律: 每次部署都必須確保 Secrets 最新
|
||
# =======================================================================
|
||
- name: "Sync K8s Secrets (ADR-035)"
|
||
run: |
|
||
echo "🔐 同步 K8s Secrets..."
|
||
|
||
# 檢查 Secret 是否存在,不存在則創建
|
||
if ! kubectl get secret awoooi-secrets -n awoooi-prod > /dev/null 2>&1; then
|
||
echo "📦 創建 awoooi-secrets..."
|
||
kubectl create secret generic awoooi-secrets -n awoooi-prod \
|
||
--from-literal=OPENCLAW_TG_BOT_TOKEN="${{ secrets.OPENCLAW_TG_BOT_TOKEN }}" \
|
||
--from-literal=OPENCLAW_TG_CHAT_ID="${{ secrets.OPENCLAW_TG_CHAT_ID }}" \
|
||
--from-literal=DATABASE_URL="${{ secrets.DATABASE_URL }}" \
|
||
--from-literal=REDIS_URL="${{ secrets.REDIS_URL }}" \
|
||
--from-literal=GEMINI_API_KEY="${{ secrets.GEMINI_API_KEY }}" \
|
||
--from-literal=CLAUDE_API_KEY="${{ secrets.CLAUDE_API_KEY }}" \
|
||
--from-literal=NVIDIA_API_KEY="${{ secrets.NVIDIA_API_KEY }}" \
|
||
--from-literal=WEBHOOK_HMAC_SECRET="${{ secrets.WEBHOOK_HMAC_SECRET }}" \
|
||
--from-literal=SENTRY_DSN="${{ secrets.SENTRY_DSN }}"
|
||
else
|
||
echo "🔄 更新 awoooi-secrets..."
|
||
# 使用 patch 更新,確保關鍵配置永遠是最新的
|
||
# 2026-03-29 ogt: ADR-036 新增 NVIDIA_API_KEY
|
||
kubectl patch secret awoooi-secrets -n awoooi-prod --type='merge' -p="{
|
||
\"stringData\": {
|
||
\"OPENCLAW_TG_BOT_TOKEN\": \"${{ secrets.OPENCLAW_TG_BOT_TOKEN }}\",
|
||
\"OPENCLAW_TG_CHAT_ID\": \"${{ secrets.OPENCLAW_TG_CHAT_ID }}\",
|
||
\"NVIDIA_API_KEY\": \"${{ secrets.NVIDIA_API_KEY }}\"
|
||
}
|
||
}"
|
||
fi
|
||
echo "✅ K8s Secrets 同步完成"
|
||
|
||
- name: Deploy
|
||
run: |
|
||
cd k8s/awoooi-prod
|
||
TAG="${{ steps.tag.outputs.tag }}"
|
||
|
||
# 只更新實際建構的 image (避免 ImagePullBackOff)
|
||
if [ "${{ needs.build-api.result }}" = "success" ]; then
|
||
echo "📦 更新 API image: ${{ env.IMAGE_PREFIX }}-api:${TAG}"
|
||
kustomize edit set image \
|
||
"192.168.0.110:5000/library/api:IMAGE_TAG_PLACEHOLDER=${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-api:${TAG}"
|
||
else
|
||
echo "⏭️ 跳過 API image 更新 (build skipped)"
|
||
fi
|
||
|
||
if [ "${{ needs.build-web.result }}" = "success" ]; then
|
||
echo "📦 更新 Web image: ${{ env.IMAGE_PREFIX }}-web:${TAG}"
|
||
kustomize edit set image \
|
||
"192.168.0.110:5000/library/web:IMAGE_TAG_PLACEHOLDER=${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-web:${TAG}"
|
||
else
|
||
echo "⏭️ 跳過 Web image 更新 (build skipped)"
|
||
fi
|
||
|
||
kubectl apply -k .
|
||
|
||
# 2026-03-29 ogt: NetworkPolicy 單獨 apply (避免 commonLabels 破壞 DNS rule)
|
||
- name: Apply NetworkPolicy
|
||
run: |
|
||
echo "🔒 套用 NetworkPolicy (繞過 kustomize commonLabels)..."
|
||
kubectl apply -f k8s/awoooi-prod/02-network-policy.yaml
|
||
echo "✅ NetworkPolicy 已套用"
|
||
|
||
# 2026-03-26: CoreDNS GitOps 同步 (ADR-026)
|
||
- name: Sync CoreDNS Config
|
||
if: needs.detect-changes.outputs.k3s-system == 'true'
|
||
run: |
|
||
echo "📦 同步 CoreDNS 配置到 K3s..."
|
||
# HelmChartConfig 是 K8s 資源,直接 apply
|
||
kubectl apply -f k8s/k3s-system/coredns-custom.yaml
|
||
echo "✅ CoreDNS 配置已同步"
|
||
|
||
- name: Wait for rollout
|
||
run: |
|
||
kubectl rollout status deployment/awoooi-web -n awoooi-prod --timeout=300s || true
|
||
kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=300s || true
|
||
|
||
- name: Health check
|
||
run: |
|
||
sleep 15
|
||
API_POD=$(kubectl get pods -n awoooi-prod -l app=awoooi-api -o jsonpath='{.items[0].metadata.name}')
|
||
# 使用 Python httpx (容器沒有 curl,但有 httpx)
|
||
kubectl exec -n awoooi-prod $API_POD -c api -- python -c "import httpx; r=httpx.get('http://localhost:8000/api/v1/health', timeout=5); print(r.status_code)" || echo "Health check failed but deployment succeeded"
|
||
|
||
# =======================================================================
|
||
# ADR-035: Telegram 告警鏈路 E2E 驗證
|
||
# 2026-03-29 Claude Code: 部署後必須驗證 Telegram 發送成功
|
||
# 🔴 鐵律: 失敗則告警,確保團隊知道鏈路斷裂
|
||
# =======================================================================
|
||
- name: "Verify Telegram Alert Chain (ADR-035)"
|
||
run: |
|
||
echo "🔍 驗證 Telegram 告警鏈路..."
|
||
|
||
# 發送測試告警到 AWOOOI API
|
||
API_POD=$(kubectl get pods -n awoooi-prod -l app=awoooi-api -o jsonpath='{.items[0].metadata.name}')
|
||
|
||
RESULT=$(kubectl exec -n awoooi-prod $API_POD -c api -- python -c "
|
||
import httpx
|
||
import json
|
||
r = httpx.post(
|
||
'http://localhost:8000/api/v1/webhooks/alertmanager',
|
||
json={
|
||
'receiver': 'cd-test',
|
||
'status': 'firing',
|
||
'alerts': [{
|
||
'status': 'firing',
|
||
'labels': {
|
||
'alertname': 'CD_E2E_Test_$(date +%s)',
|
||
'severity': 'info',
|
||
'namespace': 'cd-test',
|
||
'deployment': 'e2e-verify'
|
||
},
|
||
'annotations': {
|
||
'summary': 'CD 部署後 E2E 驗證 - ${{ github.sha }}'
|
||
}
|
||
}]
|
||
},
|
||
timeout=60
|
||
)
|
||
print(json.dumps(r.json()))
|
||
" 2>&1) || RESULT='{"success":false}'
|
||
|
||
echo "API Response: $RESULT"
|
||
|
||
# 檢查是否成功
|
||
SUCCESS=$(echo "$RESULT" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('success', False))" 2>/dev/null || echo "False")
|
||
|
||
if [ "$SUCCESS" != "True" ]; then
|
||
echo "🔴 Telegram 告警鏈路驗證失敗!"
|
||
echo "可能原因: Token 未配置、API 超時、LLM 服務不可用"
|
||
# 直接發送告警到 Telegram (繞過 API)
|
||
curl -sf -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
|
||
-d chat_id="${{ secrets.OPENCLAW_TG_CHAT_ID }}" \
|
||
-d text="🔴 *AWOOOI 告警鏈路驗證失敗*%0A%0A部署完成但告警鏈路可能斷裂!%0A請檢查 API Pod 日誌。%0A%0A🔗 ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
|
||
-d parse_mode="Markdown" || true
|
||
else
|
||
echo "✅ Telegram 告警鏈路驗證成功"
|
||
fi
|
||
|
||
- name: Notify OpenClaw
|
||
if: always()
|
||
run: |
|
||
STATUS="${{ job.status }}"
|
||
curl -sf -X POST "${{ env.OPENCLAW_URL }}/api/v1/webhook/pipeline" \
|
||
-H "Content-Type: application/json" \
|
||
-d "{
|
||
\"event\": \"completed\",
|
||
\"status\": \"${STATUS}\",
|
||
\"pipeline_id\": \"${{ github.run_id }}\",
|
||
\"pipeline_url\": \"${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}\",
|
||
\"commit\": \"${{ github.sha }}\",
|
||
\"branch\": \"${{ github.ref_name }}\"
|
||
}" || true
|
||
|
||
- name: Notify Telegram
|
||
if: always()
|
||
run: |
|
||
# 取得 commit 資訊
|
||
COMMIT_MSG=$(git log -1 --pretty=format:'%s' | head -c 50)
|
||
AUTHOR=$(git log -1 --pretty=format:'%an')
|
||
DEPLOY_TIME=$(TZ='Asia/Taipei' date '+%Y-%m-%d %H:%M')
|
||
SHORT_SHA=$(echo "${{ github.sha }}" | head -c 7)
|
||
|
||
if [ "${{ job.status }}" = "success" ]; then
|
||
MSG="✅ *AWOOOI 部署成功*%0A%0A📦 版本: ${COMMIT_MSG}%0A⏰ 時間: ${DEPLOY_TIME}%0A👤 作者: ${AUTHOR}%0A🔖 SHA: ${SHORT_SHA}%0A%0A🔗 [查看 Workflow](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})"
|
||
else
|
||
MSG="❌ *AWOOOI 部署失敗*%0A%0A📦 版本: ${COMMIT_MSG}%0A⏰ 時間: ${DEPLOY_TIME}%0A👤 作者: ${AUTHOR}%0A🔖 SHA: ${SHORT_SHA}%0A%0A🔗 [查看 Workflow](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})"
|
||
fi
|
||
curl -sf -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
|
||
-d chat_id="${{ secrets.OPENCLAW_TG_CHAT_ID }}" \
|
||
-d text="${MSG}" \
|
||
-d parse_mode="Markdown" || true
|