Files
awoooi/.github/workflows/daily-e2e-health.yaml
2026-05-02 15:20:01 +08:00

188 lines
6.4 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# =============================================================================
# AWOOOI Daily E2E Health Check (Phase 18.3)
# =============================================================================
# 🎯 每日端到端驗證Alert → AI → Approval → Execution
#
# 觸發時機:
# - 每日 00:30 UTC (08:30 台北)
# - 手動觸發
#
# 驗證內容:
# - E2E Tool Call 完整流程
# - Safe Mode 防護機制
# - 目標資源驗證
#
# 失敗通知: Telegram
name: Daily E2E Health Check
on:
schedule:
- cron: '30 0 * * *' # 每日 00:30 UTC (08:30 台北)
workflow_dispatch:
inputs:
api_url:
description: 'API URL to test'
required: false
default: 'http://192.168.0.121:32334'
dry_run:
description: 'Dry run mode (skip actual approval)'
required: false
default: 'true'
type: choice
options:
- 'true'
- 'false'
concurrency:
group: daily-e2e
cancel-in-progress: true
env:
PYTHON_VERSION: '3.11'
# 2026-03-29 Claude Code: VIP 不穩定,暫用 node 121 直連
DEFAULT_API_URL: http://192.168.0.121:32334
jobs:
e2e-health-check:
name: E2E Health Check
runs-on: [self-hosted, harbor, k8s]
timeout-minutes: 15
steps:
# 2026-03-29 Claude Code: 修復 _diag/pages 檔案衝突
- name: "Clean Runner Diagnostics"
run: |
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
rm -rf "$RUNNER_ROOT/_diag/pages" 2>/dev/null || true
mkdir -p "$RUNNER_ROOT/_diag/pages" 2>/dev/null || true
- uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install uv
uses: astral-sh/setup-uv@v3
- name: Install dependencies
working-directory: apps/api
run: uv sync
- name: Check K8s Status
run: |
echo "🔍 檢查 K8s 服務狀態..."
# 使用 runner 本地的 kubectl 配置 (與 CD 一致)
# 如果沒有配置,跳過 K8s 診斷
if ! kubectl cluster-info > /dev/null 2>&1; then
echo "⚠️ kubectl 無法連線叢集,跳過 K8s 診斷"
echo " (Runner 可能沒有 kubectl 存取權限)"
exit 0
fi
echo "📦 Pod 狀態:"
kubectl get pods -n awoooi-prod -l app=awoooi-api -o wide || echo "⚠️ kubectl 失敗"
echo ""
echo "🌐 Service 狀態:"
# 2026-03-29 Claude Code: 修正 Service 名稱為 awoooi-api-svc
kubectl get svc -n awoooi-prod awoooi-api-svc -o wide || echo "⚠️ kubectl 失敗"
echo ""
echo "📋 Endpoints:"
kubectl get endpoints -n awoooi-prod awoooi-api-svc || echo "⚠️ kubectl 失敗"
- name: Check API Health
id: health
run: |
API_URL="${{ github.event.inputs.api_url || env.DEFAULT_API_URL }}"
echo "🔗 檢查 API 健康狀態..."
echo "📍 Runner: $(hostname)"
echo "🌐 Target: $API_URL"
echo "🕐 Time: $(date '+%H:%M:%S')"
# 2026-03-29 Claude Code: 重試機制 (最多 3 次,間隔 2 秒)
for i in 1 2 3; do
echo "📡 嘗試連接 #$i..."
HTTP_CODE=$(curl -s -w "%{http_code}" -o /tmp/health_response.txt --connect-timeout 10 "$API_URL/api/v1/health" 2>&1) || true
echo "📊 HTTP Code: [$HTTP_CODE]"
if [ "$HTTP_CODE" = "200" ]; then
echo "✅ API 可用: $API_URL"
cat /tmp/health_response.txt
echo ""
echo "working_api_url=$API_URL" >> $GITHUB_OUTPUT
exit 0
fi
if [ $i -lt 3 ]; then
echo "⏳ 等待 2 秒後重試..."
sleep 2
fi
done
echo "❌ API 無法連線: $API_URL (HTTP $HTTP_CODE)"
echo "📋 Response:"
cat /tmp/health_response.txt 2>/dev/null || echo "(empty)"
exit 1
- name: Run E2E Verification
id: e2e
working-directory: apps/api
env:
PYTHONPATH: ${{ github.workspace }}/apps/api
WEBHOOK_HMAC_SECRET: ${{ secrets.WEBHOOK_HMAC_SECRET }}
run: |
# 使用 Health Check 驗證過的可用 URL
API_URL="${{ steps.health.outputs.working_api_url }}"
DRY_RUN="${{ github.event.inputs.dry_run || 'true' }}"
echo "🎯 執行 E2E Tool Call Verification v2.0"
echo " API: $API_URL (verified working)"
echo " Dry Run: $DRY_RUN"
# 2026-03-29 Claude Code: E2E 重試機制 (最多 3 次)
for i in 1 2 3; do
echo "📡 E2E 嘗試 #$i..."
if [ "$DRY_RUN" = "true" ]; then
if uv run python -m scripts.e2e_tool_call_verification --api-url "$API_URL" --dry-run; then
echo "✅ E2E 驗證通過"
exit 0
fi
else
if uv run python -m scripts.e2e_tool_call_verification --api-url "$API_URL" --no-dry-run; then
echo "✅ E2E 驗證通過"
exit 0
fi
fi
if [ $i -lt 3 ]; then
echo "⏳ 等待 5 秒後重試..."
sleep 5
fi
done
echo "❌ E2E 驗證失敗 (3 次嘗試後)"
exit 1
- name: Summary
if: always()
run: |
echo "## Daily E2E Health Check 完成" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| 項目 | 值 |" >> $GITHUB_STEP_SUMMARY
echo "|------|-----|" >> $GITHUB_STEP_SUMMARY
echo "| API URL | ${{ github.event.inputs.api_url || env.DEFAULT_API_URL }} |" >> $GITHUB_STEP_SUMMARY
echo "| 時間 | $(TZ='Asia/Taipei' date '+%Y-%m-%d %H:%M:%S') |" >> $GITHUB_STEP_SUMMARY
echo "| 結果 | ${{ steps.e2e.outcome }} |" >> $GITHUB_STEP_SUMMARY
# Phase 18.3.2: 失敗時 Telegram 通知
- name: Notify on Failure
if: failure()
run: |
curl -s -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
-d "chat_id=${{ secrets.OPENCLAW_TG_CHAT_ID }}" \
-d "text=❌ Daily E2E Health Check 失敗%0A時間: $(TZ='Asia/Taipei' date '+%Y-%m-%d %H:%M')%0A詳情: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"