Files
awoooi/.github/workflows/cd.yaml.disabled
OG T 25e69e6870
Some checks failed
E2E Health Check / e2e-health (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled
feat(cicd): ADR-039 完成 - GitHub Actions 停用,Gitea 主倉
- 停用所有 GitHub Actions workflows (.disabled)
- 更新 CLAUDE.md 添加 Gitea CI/CD 章節
- 更新 LOGBOOK.md 記錄遷移狀態
- Gitea 版本: 1.25.5
- Runner 版本: v0.3.1 (host 網絡模式)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-30 01:07:32 +08:00

609 lines
27 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# =============================================================================
# AWOOOI CD Pipeline v2.1 (序列建構修復 Runner 衝突)
# =============================================================================
# 優化項目:
# 1. Pre-flight Check (10s Fail-Fast)
# 2. Runner 標籤 [self-hosted, harbor, k8s]
# 3. dorny/paths-filter 精確路徑偵測
# 4. Web → API 序列建構 (2026-03-29 修復 _runner_file_commands 衝突)
# 5. timeout-minutes 防止卡死
# 6. Telegram + OpenClaw 通知
# 7. force_deploy 強制重建選項
# =============================================================================
name: CD
on:
push:
branches: [main]
paths-ignore:
- 'docs/**'
- '*.md'
workflow_dispatch:
inputs:
force_deploy:
description: '強制部署 (跳過路徑偵測)'
type: boolean
default: false
skip_api:
description: '跳過 API 建構'
type: boolean
default: false
skip_web:
description: '跳過 Web 建構'
type: boolean
default: false
concurrency:
# =======================================================================
# 2026-03-29 首席架構師審查: P0 並發治理
# 問題: cancel-in-progress: true 導致 force_deploy 被新 push 取消
# 解決: force_deploy 使用獨立 concurrency group不會被普通 push 取消
# =======================================================================
group: ${{ inputs.force_deploy && 'cd-force-deploy' || format('cd-{0}-{1}', github.workflow, github.ref) }}
# force_deploy 使用獨立 group普通 push 使用原 group
# 普通 push 互相取消 (防止 Runner 衝突)
# force_deploy 不會被普通 push 取消
cancel-in-progress: ${{ !inputs.force_deploy }}
env:
REGISTRY: 192.168.0.110:5000
IMAGE_PREFIX: library/awoooi
LOCAL_CACHE_DIR: /home/wooo/build-cache/awoooi
OPENCLAW_URL: http://192.168.0.188:8088
# OTEL CI/CD 監控 (2026-03-24 批准, 2026-03-28 修正: SignOz 在 188)
OTEL_EXPORTER_OTLP_ENDPOINT: http://192.168.0.188:24318
OTEL_SERVICE_NAME: awoooi-cd
OTEL_RESOURCE_ATTRIBUTES: service.version=${{ github.sha }},deployment.environment=production
jobs:
# ==================== Pre-flight Check (10s Fail-Fast) ====================
pre-flight-check:
name: "Pre-flight Check"
runs-on: [self-hosted, harbor, k8s]
timeout-minutes: 1
steps:
# =======================================================================
# 2026-03-29: Runner _diag/pages 檔案衝突修復 (v3)
# 重要: 只清理 _diag/pages不碰 RUNNER_TEMP
# 原因: RUNNER_TEMP 在所有 Jobs 之間共享,清理會破壞其他 Job
# =======================================================================
- name: "Clean Runner Diagnostics"
run: |
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
PAGES_DIR="$RUNNER_ROOT/_diag/pages"
# 只清理 _diag/pages (唯一需要清理的目錄)
rm -rf "$PAGES_DIR" 2>/dev/null || true
mkdir -p "$PAGES_DIR" 2>/dev/null || true
# 清理 Claude worktrees (本地專案目錄)
rm -rf .claude/worktrees 2>/dev/null || true
echo "✅ Cleaned _diag/pages"
# =======================================================================
# ADR-035: Telegram 告警鏈路強制驗證
# 2026-03-29 Claude Code: 修復 Secrets 遺漏導致告警失效問題
# =======================================================================
- name: "Check Required Secrets"
run: |
MISSING=""
# 基礎 Secrets
if [ -z "${{ secrets.HARBOR_USER }}" ]; then MISSING="${MISSING}HARBOR_USER "; fi
if [ -z "${{ secrets.HARBOR_PASSWORD }}" ]; then MISSING="${MISSING}HARBOR_PASSWORD "; fi
if [ -z "${{ secrets.KUBE_CONFIG_PROD }}" ]; then MISSING="${MISSING}KUBE_CONFIG_PROD "; fi
# 🔴 Telegram Secrets (ADR-035 強制)
if [ -z "${{ secrets.OPENCLAW_TG_BOT_TOKEN }}" ]; then MISSING="${MISSING}OPENCLAW_TG_BOT_TOKEN "; fi
if [ -z "${{ secrets.OPENCLAW_TG_CHAT_ID }}" ]; then MISSING="${MISSING}OPENCLAW_TG_CHAT_ID "; fi
if [ -n "$MISSING" ]; then
echo "❌ 缺少 Secrets: ${MISSING}"
echo "🔴 告警鏈路將無法運作!請檢查 GitHub Secrets 配置"
exit 1
fi
echo "✅ Secrets 檢查通過 (含 Telegram)"
- name: "Check Harbor Connectivity"
run: |
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 \
"http://${{ env.REGISTRY }}/v2/" 2>/dev/null || echo "000")
if [ "$HTTP_CODE" != "200" ] && [ "$HTTP_CODE" != "401" ]; then
echo "❌ Harbor 無法連線 (HTTP $HTTP_CODE)"
exit 1
fi
echo "✅ Harbor 連線正常"
- name: "Check kubectl"
run: |
export PATH="/home/wooo/bin:$PATH"
if ! which kubectl > /dev/null 2>&1; then
echo "❌ kubectl 不在 PATH"
exit 1
fi
echo "✅ kubectl 可用"
- name: "Notify Pre-flight Failure"
if: failure()
run: |
curl -sf -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
-d chat_id="${{ secrets.OPENCLAW_TG_CHAT_ID }}" \
-d text="❌ AWOOOI Pre-flight 失敗%0A%0A🔗 ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" || true
# ==================== Wave C.2: 監控覆蓋率檢查 (ADR-037) ====================
# 2026-03-29 Claude Code: 確保監控覆蓋率 >= 90%
monitoring-coverage:
name: "Monitoring Coverage"
runs-on: [self-hosted, harbor, k8s]
needs: pre-flight-check
timeout-minutes: 2
steps:
- uses: actions/checkout@v4
- name: "Check Monitoring Coverage"
run: |
python3 ops/monitoring/generate_monitoring.py --validate-only --ci
echo "✅ 監控覆蓋率檢查通過 (>= 90%)"
- name: "Notify Coverage Failure"
if: failure()
run: |
curl -sf -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
-d chat_id="${{ secrets.OPENCLAW_TG_CHAT_ID }}" \
-d text="⚠️ 監控覆蓋率低於 90%%,請更新 service-registry.yaml%0A%0A🔗 ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" || true
# ==================== 路徑偵測 (使用 dorny/paths-filter) ====================
detect-changes:
name: Detect Changes
runs-on: [self-hosted, harbor, k8s]
needs: [pre-flight-check, monitoring-coverage]
timeout-minutes: 1
outputs:
api: ${{ inputs.force_deploy == true && 'true' || steps.filter.outputs.api }}
web: ${{ inputs.force_deploy == true && 'true' || steps.filter.outputs.web }}
k3s-system: ${{ steps.filter.outputs.k3s-system }}
steps:
# 2026-03-29: Runner 診斷檔案清理 (防止並行衝突)
# 注意: 不能刪除 $RUNNER_TEMP/* 因為包含 _runner_file_commands
- name: "Clean Runner Diagnostics"
run: |
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
rm -rf "$RUNNER_ROOT/_diag/pages" .claude/worktrees 2>/dev/null || true
mkdir -p "$RUNNER_ROOT/_diag/pages" 2>/dev/null || true
- uses: actions/checkout@v4
with:
clean: true
- uses: dorny/paths-filter@v3
id: filter
with:
filters: |
api:
- 'apps/api/**'
- 'packages/**'
- 'pyproject.toml'
web:
- 'apps/web/**'
- 'packages/**'
- 'package.json'
- 'pnpm-lock.yaml'
k3s-system:
- 'k8s/k3s-system/**'
# ==================== 序列建構 API (必須等 Web 完成) ====================
# 2026-03-29 Claude Code: 改為序列執行,修復 Runner _runner_file_commands 衝突
# 根因: 並行 Job 的 "Set up job" 階段會互相覆寫 RUNNER_TEMP 檔案
# 參考: ops/runner/README.md
build-api:
name: "Build API"
runs-on: [self-hosted, harbor, k8s]
needs: [detect-changes, build-web]
timeout-minutes: 20
if: |
!inputs.skip_api && (
needs.detect-changes.outputs.api == 'true' ||
(needs.detect-changes.outputs.api == 'false' && needs.detect-changes.outputs.web == 'false')
)
outputs:
image_tag: ${{ steps.tag.outputs.tag }}
steps:
# 2026-03-29: Runner 診斷檔案清理 (防止並行衝突)
# 注意: 不能刪除 $RUNNER_TEMP/* 因為包含 _runner_file_commands
- name: "Clean Runner Diagnostics"
run: |
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
rm -rf "$RUNNER_ROOT/_diag/pages" .claude/worktrees 2>/dev/null || true
mkdir -p "$RUNNER_ROOT/_diag/pages" 2>/dev/null || true
- uses: actions/checkout@v4
- name: Generate tag
id: tag
run: echo "tag=$(git rev-parse --short HEAD)-${{ github.run_id }}" >> $GITHUB_OUTPUT
- name: Login to Harbor
run: echo "${{ secrets.HARBOR_PASSWORD }}" | docker login ${{ env.REGISTRY }} -u ${{ secrets.HARBOR_USER }} --password-stdin
- name: Build & Push (Native BuildKit)
env:
DOCKER_BUILDKIT: 1
run: |
docker build --push \
--tag ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-api:${{ steps.tag.outputs.tag }} \
--file apps/api/Dockerfile .
echo "✅ API: ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-api:${{ steps.tag.outputs.tag }}"
# ==================== 建構 Web (先執行) ====================
# 2026-03-29 Claude Code: API 依賴 Web 完成,確保序列執行
build-web:
name: "Build Web"
runs-on: [self-hosted, harbor, k8s]
needs: detect-changes
timeout-minutes: 20
if: |
!inputs.skip_web && (
needs.detect-changes.outputs.web == 'true' ||
(needs.detect-changes.outputs.api == 'false' && needs.detect-changes.outputs.web == 'false')
)
outputs:
image_tag: ${{ steps.tag.outputs.tag }}
steps:
# 2026-03-29: Runner 診斷檔案清理 (防止並行衝突)
# 注意: 不能刪除 $RUNNER_TEMP/* 因為包含 _runner_file_commands
- name: "Clean Runner Diagnostics"
run: |
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
rm -rf "$RUNNER_ROOT/_diag/pages" .claude/worktrees 2>/dev/null || true
mkdir -p "$RUNNER_ROOT/_diag/pages" 2>/dev/null || true
- uses: actions/checkout@v4
- name: Generate tag
id: tag
run: echo "tag=$(git rev-parse --short HEAD)-${{ github.run_id }}" >> $GITHUB_OUTPUT
- name: Login to Harbor
run: echo "${{ secrets.HARBOR_PASSWORD }}" | docker login ${{ env.REGISTRY }} -u ${{ secrets.HARBOR_USER }} --password-stdin
- name: Restore Next.js cache
run: |
mkdir -p apps/web/.next/cache
[ -d "${{ env.LOCAL_CACHE_DIR }}/nextjs" ] && cp -r ${{ env.LOCAL_CACHE_DIR }}/nextjs/* apps/web/.next/cache/ 2>/dev/null || true
- name: Build & Push (Native BuildKit)
env:
DOCKER_BUILDKIT: 1
run: |
docker build --push \
--build-arg NEXT_PUBLIC_API_URL=https://awoooi.wooo.work \
--build-arg NEXT_PUBLIC_SENTRY_DSN=http://da02d4e5d6542e4d1ed6b2dd6542efeb@192.168.0.110:9000/2 \
--tag ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-web:${{ steps.tag.outputs.tag }} \
--file apps/web/Dockerfile .
echo "✅ Web: ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-web:${{ steps.tag.outputs.tag }}"
- name: Save Next.js cache
run: |
mkdir -p ${{ env.LOCAL_CACHE_DIR }}/nextjs
[ -d "apps/web/.next/cache" ] && cp -r apps/web/.next/cache/* ${{ env.LOCAL_CACHE_DIR }}/nextjs/ 2>/dev/null || true
# ==================== Deploy ====================
deploy-prod:
name: Deploy to Production
runs-on: [self-hosted, harbor, k8s]
needs: [detect-changes, build-api, build-web]
timeout-minutes: 10
concurrency:
group: runner-awoooi-cd-mutex
cancel-in-progress: false
if: always() && (needs.build-api.result == 'success' || needs.build-api.result == 'skipped') && (needs.build-web.result == 'success' || needs.build-web.result == 'skipped')
environment: production
steps:
# 2026-03-29: Runner 診斷檔案清理 (防止並行衝突)
# 注意: 不能刪除 $RUNNER_TEMP/* 因為包含 _runner_file_commands
- name: "Clean Runner Diagnostics"
run: |
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
rm -rf "$RUNNER_ROOT/_diag/pages" .claude/worktrees 2>/dev/null || true
mkdir -p "$RUNNER_ROOT/_diag/pages" 2>/dev/null || true
- uses: actions/checkout@v4
with:
clean: true
- name: Setup
run: |
mkdir -p ~/.kube
echo "${{ secrets.KUBE_CONFIG_PROD }}" | base64 -d > ~/.kube/config
chmod 600 ~/.kube/config
export PATH="/home/wooo/bin:$HOME/.local/bin:$PATH"
echo "/home/wooo/bin" >> $GITHUB_PATH
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Generate tag
id: tag
run: echo "tag=$(git rev-parse --short HEAD)-${{ github.run_id }}" >> $GITHUB_OUTPUT
# =======================================================================
# ADR-035: K8s Secrets 自動注入 (Telegram + API Keys)
# 2026-03-29 Claude Code: 修復 Secrets 遺漏導致告警失效
# 🔴 鐵律: 每次部署都必須確保 Secrets 最新
# =======================================================================
- name: "Sync K8s Secrets (ADR-035)"
run: |
echo "🔐 同步 K8s Secrets..."
# 檢查 Secret 是否存在,不存在則創建
if ! kubectl get secret awoooi-secrets -n awoooi-prod > /dev/null 2>&1; then
echo "📦 創建 awoooi-secrets..."
kubectl create secret generic awoooi-secrets -n awoooi-prod \
--from-literal=OPENCLAW_TG_BOT_TOKEN="${{ secrets.OPENCLAW_TG_BOT_TOKEN }}" \
--from-literal=OPENCLAW_TG_CHAT_ID="${{ secrets.OPENCLAW_TG_CHAT_ID }}" \
--from-literal=DATABASE_URL="${{ secrets.DATABASE_URL }}" \
--from-literal=REDIS_URL="${{ secrets.REDIS_URL }}" \
--from-literal=GEMINI_API_KEY="${{ secrets.GEMINI_API_KEY }}" \
--from-literal=CLAUDE_API_KEY="${{ secrets.CLAUDE_API_KEY }}" \
--from-literal=NVIDIA_API_KEY="${{ secrets.NVIDIA_API_KEY }}" \
--from-literal=WEBHOOK_HMAC_SECRET="${{ secrets.WEBHOOK_HMAC_SECRET }}" \
--from-literal=SENTRY_DSN="${{ secrets.SENTRY_DSN }}" \
--from-literal=SENTRY_AUTH_TOKEN="${{ secrets.SENTRY_AUTH_TOKEN }}"
else
echo "🔄 更新 awoooi-secrets..."
# 使用 patch 更新,確保關鍵配置永遠是最新的
# 2026-03-29 ogt: ADR-036 NVIDIA + GEMINI + SENTRY secrets
kubectl patch secret awoooi-secrets -n awoooi-prod --type='merge' -p="{
\"stringData\": {
\"OPENCLAW_TG_BOT_TOKEN\": \"${{ secrets.OPENCLAW_TG_BOT_TOKEN }}\",
\"OPENCLAW_TG_CHAT_ID\": \"${{ secrets.OPENCLAW_TG_CHAT_ID }}\",
\"GEMINI_API_KEY\": \"${{ secrets.GEMINI_API_KEY }}\",
\"NVIDIA_API_KEY\": \"${{ secrets.NVIDIA_API_KEY }}\",
\"SENTRY_AUTH_TOKEN\": \"${{ secrets.SENTRY_AUTH_TOKEN }}\"
}
}"
fi
echo "✅ K8s Secrets 同步完成"
# 2026-03-29 Claude Code: Secret 更新後必須重啟 Pod 才能讀取新值
echo "🔄 重啟 API Pod 以讀取新 Secrets..."
kubectl rollout restart deployment/awoooi-api -n awoooi-prod || true
kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=120s || echo "⚠️ Rollout 超時,繼續部署"
# =======================================================================
# 2026-03-29 首席架構師審查: P0 修復 - 雙跳過保護機制
# 問題: 當 API 和 Web build 都跳過時kustomize 仍包含 IMAGE_TAG_PLACEHOLDER
# 導致 kubectl apply 部署無效映像 → ImagePullBackOff
# 修復: 檢測雙跳過情況,只做 Secrets 同步,跳過 Deployment apply
# =======================================================================
- name: Deploy
run: |
cd k8s/awoooi-prod
TAG="${{ steps.tag.outputs.tag }}"
IMAGES_UPDATED=0
# 只更新實際建構的 image (避免 ImagePullBackOff)
if [ "${{ needs.build-api.result }}" = "success" ]; then
echo "📦 更新 API image: ${{ env.IMAGE_PREFIX }}-api:${TAG}"
kustomize edit set image \
"192.168.0.110:5000/library/api:IMAGE_TAG_PLACEHOLDER=${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-api:${TAG}"
IMAGES_UPDATED=$((IMAGES_UPDATED + 1))
else
echo "⏭️ 跳過 API image 更新 (build skipped)"
fi
if [ "${{ needs.build-web.result }}" = "success" ]; then
echo "📦 更新 Web image: ${{ env.IMAGE_PREFIX }}-web:${TAG}"
kustomize edit set image \
"192.168.0.110:5000/library/web:IMAGE_TAG_PLACEHOLDER=${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-web:${TAG}"
IMAGES_UPDATED=$((IMAGES_UPDATED + 1))
else
echo "⏭️ 跳過 Web image 更新 (build skipped)"
fi
# 🔴 P0 保護: 雙跳過時不執行 kubectl apply (防止 IMAGE_TAG_PLACEHOLDER 被部署)
if [ "$IMAGES_UPDATED" -eq 0 ]; then
echo "⚠️ 雙 Build 都跳過,跳過 Deployment apply (防止 ImagePullBackOff)"
echo " 只同步了 Secrets/ConfigMapPod 保持現有版本"
exit 0
fi
kubectl apply -k .
# 2026-03-29 ogt: NetworkPolicy 單獨 apply (避免 commonLabels 破壞 DNS rule)
- name: Apply NetworkPolicy
run: |
echo "🔒 套用 NetworkPolicy (繞過 kustomize commonLabels)..."
kubectl apply -f k8s/awoooi-prod/02-network-policy.yaml
echo "✅ NetworkPolicy 已套用"
# 2026-03-26: CoreDNS GitOps 同步 (ADR-026)
- name: Sync CoreDNS Config
if: needs.detect-changes.outputs.k3s-system == 'true'
run: |
echo "📦 同步 CoreDNS 配置到 K3s..."
# HelmChartConfig 是 K8s 資源,直接 apply
kubectl apply -f k8s/k3s-system/coredns-custom.yaml
echo "✅ CoreDNS 配置已同步"
- name: Wait for rollout
run: |
kubectl rollout status deployment/awoooi-web -n awoooi-prod --timeout=300s || true
kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=300s || true
- name: Health check
run: |
sleep 15
API_POD=$(kubectl get pods -n awoooi-prod -l app=awoooi-api -o jsonpath='{.items[0].metadata.name}')
# 使用 Python httpx (容器沒有 curl但有 httpx)
kubectl exec -n awoooi-prod $API_POD -c api -- python -c "import httpx; r=httpx.get('http://localhost:8000/api/v1/health', timeout=5); print(r.status_code)" || echo "Health check failed but deployment succeeded"
# =======================================================================
# ADR-037 Wave B.2: Alert Chain Smoke Test
# 2026-03-29: 告警鏈路端到端驗證 (Wave A.6 腳本整合)
# =======================================================================
- name: "Alert Chain Smoke Test (ADR-037)"
run: |
echo "🔍 執行告警鏈路 Smoke Test..."
API_POD=$(kubectl get pods -n awoooi-prod -l app=awoooi-api -o jsonpath='{.items[0].metadata.name}')
# 測試各 Webhook Endpoint
kubectl exec -n awoooi-prod $API_POD -c api -- python -c "
import httpx
import sys
BASE = 'http://localhost:8000'
TIMEOUT = 30
results = []
# 1. Health
try:
r = httpx.get(f'{BASE}/api/v1/health', timeout=TIMEOUT)
results.append(('health', r.status_code == 200))
except Exception as e:
results.append(('health', False))
print(f'Health: {e}')
# 2. Alertmanager Webhook
try:
r = httpx.post(f'{BASE}/api/v1/webhooks/alertmanager', json={
'version': '4', 'status': 'firing',
'alerts': [{'status': 'firing', 'labels': {'alertname': 'E2E_CD_TEST', 'severity': 'info'}}]
}, timeout=TIMEOUT)
results.append(('alertmanager', r.status_code == 200))
except Exception as e:
results.append(('alertmanager', False))
print(f'Alertmanager: {e}')
# 3. SignOz Webhook Health
try:
r = httpx.get(f'{BASE}/api/v1/webhooks/signoz/health', timeout=TIMEOUT)
results.append(('signoz_health', r.status_code == 200))
except Exception as e:
results.append(('signoz_health', False))
print(f'SignOz Health: {e}')
# 4. SignOz Webhook POST (P0-1 修復 2026-03-29)
try:
r = httpx.post(f'{BASE}/api/v1/webhooks/signoz/alert', json={
'alertname': 'E2E_CD_TEST', 'status': 'firing',
'labels': {'severity': 'info', 'service_name': 'cd-test'},
'annotations': {'summary': 'CD Pipeline E2E Test'}
}, timeout=TIMEOUT)
results.append(('signoz_post', r.status_code == 200))
except Exception as e:
results.append(('signoz_post', False))
print(f'SignOz POST: {e}')
# Summary
passed = sum(1 for _, ok in results if ok)
total = len(results)
print(f'Smoke Test: {passed}/{total} passed')
for name, ok in results:
print(f' {\"✅\" if ok else \"❌\"} {name}')
sys.exit(0 if passed == total else 1)
" || {
echo "⚠️ Smoke Test 部分失敗,但不阻擋部署"
# 發送告警
curl -sf -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
-d chat_id="${{ secrets.OPENCLAW_TG_CHAT_ID }}" \
-d text="⚠️ *AWOOOI Alert Chain Smoke Test 部分失敗*%0A%0A部署已完成但部分 Webhook 可能有問題。%0A%0A🔗 ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
-d parse_mode="Markdown" || true
}
# =======================================================================
# ADR-035: Telegram 告警鏈路 E2E 驗證
# 2026-03-29 Claude Code: 部署後必須驗證 Telegram 發送成功
# 🔴 鐵律: 失敗則告警,確保團隊知道鏈路斷裂
# =======================================================================
- name: "Verify Telegram Alert Chain (ADR-035)"
run: |
echo "🔍 驗證 Telegram 告警鏈路..."
# 發送測試告警到 AWOOOI API
API_POD=$(kubectl get pods -n awoooi-prod -l app=awoooi-api -o jsonpath='{.items[0].metadata.name}')
RESULT=$(kubectl exec -n awoooi-prod $API_POD -c api -- python -c "
import httpx
import json
r = httpx.post(
'http://localhost:8000/api/v1/webhooks/alertmanager',
json={
'receiver': 'cd-test',
'status': 'firing',
'alerts': [{
'status': 'firing',
'labels': {
'alertname': 'CD_E2E_Test_$(date +%s)',
'severity': 'info',
'namespace': 'cd-test',
'deployment': 'e2e-verify'
},
'annotations': {
'summary': 'CD 部署後 E2E 驗證 - ${{ github.sha }}'
}
}]
},
timeout=60
)
print(json.dumps(r.json()))
" 2>&1) || RESULT='{"success":false}'
echo "API Response: $RESULT"
# 檢查是否成功
SUCCESS=$(echo "$RESULT" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('success', False))" 2>/dev/null || echo "False")
if [ "$SUCCESS" != "True" ]; then
echo "🔴 Telegram 告警鏈路驗證失敗!"
echo "可能原因: Token 未配置、API 超時、LLM 服務不可用"
# 直接發送告警到 Telegram (繞過 API)
curl -sf -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
-d chat_id="${{ secrets.OPENCLAW_TG_CHAT_ID }}" \
-d text="🔴 *AWOOOI 告警鏈路驗證失敗*%0A%0A部署完成但告警鏈路可能斷裂%0A請檢查 API Pod 日誌。%0A%0A🔗 ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
-d parse_mode="Markdown" || true
else
echo "✅ Telegram 告警鏈路驗證成功"
fi
- name: Notify OpenClaw
if: always()
run: |
STATUS="${{ job.status }}"
curl -sf -X POST "${{ env.OPENCLAW_URL }}/api/v1/webhook/pipeline" \
-H "Content-Type: application/json" \
-d "{
\"event\": \"completed\",
\"status\": \"${STATUS}\",
\"pipeline_id\": \"${{ github.run_id }}\",
\"pipeline_url\": \"${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}\",
\"commit\": \"${{ github.sha }}\",
\"branch\": \"${{ github.ref_name }}\"
}" || true
- name: Notify Telegram
if: always()
run: |
# 取得 commit 資訊
COMMIT_MSG=$(git log -1 --pretty=format:'%s' | head -c 50)
AUTHOR=$(git log -1 --pretty=format:'%an')
DEPLOY_TIME=$(TZ='Asia/Taipei' date '+%Y-%m-%d %H:%M')
SHORT_SHA=$(echo "${{ github.sha }}" | head -c 7)
if [ "${{ job.status }}" = "success" ]; then
MSG="✅ *AWOOOI 部署成功*%0A%0A📦 版本: ${COMMIT_MSG}%0A⏰ 時間: ${DEPLOY_TIME}%0A👤 作者: ${AUTHOR}%0A🔖 SHA: ${SHORT_SHA}%0A%0A🔗 [查看 Workflow](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})"
else
MSG="❌ *AWOOOI 部署失敗*%0A%0A📦 版本: ${COMMIT_MSG}%0A⏰ 時間: ${DEPLOY_TIME}%0A👤 作者: ${AUTHOR}%0A🔖 SHA: ${SHORT_SHA}%0A%0A🔗 [查看 Workflow](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})"
fi
curl -sf -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
-d chat_id="${{ secrets.OPENCLAW_TG_CHAT_ID }}" \
-d text="${MSG}" \
-d parse_mode="Markdown" || true
# =======================================================================
# 2026-03-29 ogt: Gitea Mirror (B2 備份策略)
# 🔴 鐵律: GitHub 是主倉Gitea 只讀備份 (無 CI)
# =======================================================================
- name: "Mirror to Gitea (B2 Backup)"
if: success()
run: |
echo "🪞 Mirror to Gitea..."
git remote add gitea "http://wooo:${{ secrets.GITEA_MIRROR_TOKEN }}@192.168.0.110:3001/wooo/awoooi.git" || true
git push gitea main --force || echo "⚠️ Gitea mirror failed (non-blocking)"