188 lines
6.4 KiB
YAML
188 lines
6.4 KiB
YAML
# =============================================================================
|
||
# AWOOOI Daily E2E Health Check (Phase 18.3)
|
||
# =============================================================================
|
||
# 🎯 每日端到端驗證:Alert → AI → Approval → Execution
|
||
#
|
||
# 觸發時機:
|
||
# - 每日 00:30 UTC (08:30 台北)
|
||
# - 手動觸發
|
||
#
|
||
# 驗證內容:
|
||
# - E2E Tool Call 完整流程
|
||
# - Safe Mode 防護機制
|
||
# - 目標資源驗證
|
||
#
|
||
# 失敗通知: Telegram
|
||
|
||
name: Daily E2E Health Check
|
||
|
||
on:
|
||
schedule:
|
||
- cron: '30 0 * * *' # 每日 00:30 UTC (08:30 台北)
|
||
workflow_dispatch:
|
||
inputs:
|
||
api_url:
|
||
description: 'API URL to test'
|
||
required: false
|
||
default: 'http://192.168.0.121:32334'
|
||
dry_run:
|
||
description: 'Dry run mode (skip actual approval)'
|
||
required: false
|
||
default: 'true'
|
||
type: choice
|
||
options:
|
||
- 'true'
|
||
- 'false'
|
||
|
||
concurrency:
|
||
group: daily-e2e
|
||
cancel-in-progress: true
|
||
|
||
env:
|
||
PYTHON_VERSION: '3.11'
|
||
# 2026-03-29 Claude Code: VIP 不穩定,暫用 node 121 直連
|
||
DEFAULT_API_URL: http://192.168.0.121:32334
|
||
|
||
jobs:
|
||
e2e-health-check:
|
||
name: E2E Health Check
|
||
runs-on: [self-hosted, harbor, k8s]
|
||
timeout-minutes: 15
|
||
steps:
|
||
# 2026-03-29 Claude Code: 修復 _diag/pages 檔案衝突
|
||
- name: "Clean Runner Diagnostics"
|
||
run: |
|
||
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
|
||
rm -rf "$RUNNER_ROOT/_diag/pages" 2>/dev/null || true
|
||
mkdir -p "$RUNNER_ROOT/_diag/pages" 2>/dev/null || true
|
||
|
||
- uses: actions/checkout@v4
|
||
|
||
- name: Setup Python
|
||
uses: actions/setup-python@v5
|
||
with:
|
||
python-version: ${{ env.PYTHON_VERSION }}
|
||
|
||
- name: Install uv
|
||
uses: astral-sh/setup-uv@v3
|
||
|
||
- name: Install dependencies
|
||
working-directory: apps/api
|
||
run: uv sync
|
||
|
||
- name: Check K8s Status
|
||
run: |
|
||
echo "🔍 檢查 K8s 服務狀態..."
|
||
|
||
# 使用 runner 本地的 kubectl 配置 (與 CD 一致)
|
||
# 如果沒有配置,跳過 K8s 診斷
|
||
if ! kubectl cluster-info > /dev/null 2>&1; then
|
||
echo "⚠️ kubectl 無法連線叢集,跳過 K8s 診斷"
|
||
echo " (Runner 可能沒有 kubectl 存取權限)"
|
||
exit 0
|
||
fi
|
||
|
||
echo "📦 Pod 狀態:"
|
||
kubectl get pods -n awoooi-prod -l app=awoooi-api -o wide || echo "⚠️ kubectl 失敗"
|
||
|
||
echo ""
|
||
echo "🌐 Service 狀態:"
|
||
# 2026-03-29 Claude Code: 修正 Service 名稱為 awoooi-api-svc
|
||
kubectl get svc -n awoooi-prod awoooi-api-svc -o wide || echo "⚠️ kubectl 失敗"
|
||
|
||
echo ""
|
||
echo "📋 Endpoints:"
|
||
kubectl get endpoints -n awoooi-prod awoooi-api-svc || echo "⚠️ kubectl 失敗"
|
||
|
||
- name: Check API Health
|
||
id: health
|
||
run: |
|
||
API_URL="${{ github.event.inputs.api_url || env.DEFAULT_API_URL }}"
|
||
echo "🔗 檢查 API 健康狀態..."
|
||
echo "📍 Runner: $(hostname)"
|
||
echo "🌐 Target: $API_URL"
|
||
echo "🕐 Time: $(date '+%H:%M:%S')"
|
||
|
||
# 2026-03-29 Claude Code: 重試機制 (最多 3 次,間隔 2 秒)
|
||
for i in 1 2 3; do
|
||
echo "📡 嘗試連接 #$i..."
|
||
HTTP_CODE=$(curl -s -w "%{http_code}" -o /tmp/health_response.txt --connect-timeout 10 "$API_URL/api/v1/health" 2>&1) || true
|
||
echo "📊 HTTP Code: [$HTTP_CODE]"
|
||
|
||
if [ "$HTTP_CODE" = "200" ]; then
|
||
echo "✅ API 可用: $API_URL"
|
||
cat /tmp/health_response.txt
|
||
echo ""
|
||
echo "working_api_url=$API_URL" >> $GITHUB_OUTPUT
|
||
exit 0
|
||
fi
|
||
|
||
if [ $i -lt 3 ]; then
|
||
echo "⏳ 等待 2 秒後重試..."
|
||
sleep 2
|
||
fi
|
||
done
|
||
|
||
echo "❌ API 無法連線: $API_URL (HTTP $HTTP_CODE)"
|
||
echo "📋 Response:"
|
||
cat /tmp/health_response.txt 2>/dev/null || echo "(empty)"
|
||
exit 1
|
||
|
||
- name: Run E2E Verification
|
||
id: e2e
|
||
working-directory: apps/api
|
||
env:
|
||
PYTHONPATH: ${{ github.workspace }}/apps/api
|
||
WEBHOOK_HMAC_SECRET: ${{ secrets.WEBHOOK_HMAC_SECRET }}
|
||
run: |
|
||
# 使用 Health Check 驗證過的可用 URL
|
||
API_URL="${{ steps.health.outputs.working_api_url }}"
|
||
DRY_RUN="${{ github.event.inputs.dry_run || 'true' }}"
|
||
|
||
echo "🎯 執行 E2E Tool Call Verification v2.0"
|
||
echo " API: $API_URL (verified working)"
|
||
echo " Dry Run: $DRY_RUN"
|
||
|
||
# 2026-03-29 Claude Code: E2E 重試機制 (最多 3 次)
|
||
for i in 1 2 3; do
|
||
echo "📡 E2E 嘗試 #$i..."
|
||
if [ "$DRY_RUN" = "true" ]; then
|
||
if uv run python -m scripts.e2e_tool_call_verification --api-url "$API_URL" --dry-run; then
|
||
echo "✅ E2E 驗證通過"
|
||
exit 0
|
||
fi
|
||
else
|
||
if uv run python -m scripts.e2e_tool_call_verification --api-url "$API_URL" --no-dry-run; then
|
||
echo "✅ E2E 驗證通過"
|
||
exit 0
|
||
fi
|
||
fi
|
||
|
||
if [ $i -lt 3 ]; then
|
||
echo "⏳ 等待 5 秒後重試..."
|
||
sleep 5
|
||
fi
|
||
done
|
||
|
||
echo "❌ E2E 驗證失敗 (3 次嘗試後)"
|
||
exit 1
|
||
|
||
- name: Summary
|
||
if: always()
|
||
run: |
|
||
echo "## Daily E2E Health Check 完成" >> $GITHUB_STEP_SUMMARY
|
||
echo "" >> $GITHUB_STEP_SUMMARY
|
||
echo "| 項目 | 值 |" >> $GITHUB_STEP_SUMMARY
|
||
echo "|------|-----|" >> $GITHUB_STEP_SUMMARY
|
||
echo "| API URL | ${{ github.event.inputs.api_url || env.DEFAULT_API_URL }} |" >> $GITHUB_STEP_SUMMARY
|
||
echo "| 時間 | $(TZ='Asia/Taipei' date '+%Y-%m-%d %H:%M:%S') |" >> $GITHUB_STEP_SUMMARY
|
||
echo "| 結果 | ${{ steps.e2e.outcome }} |" >> $GITHUB_STEP_SUMMARY
|
||
|
||
# Phase 18.3.2: 失敗時 Telegram 通知
|
||
- name: Notify on Failure
|
||
if: failure()
|
||
run: |
|
||
curl -s -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
|
||
-d "chat_id=${{ secrets.OPENCLAW_TG_CHAT_ID }}" \
|
||
-d "text=❌ Daily E2E Health Check 失敗%0A時間: $(TZ='Asia/Taipei' date '+%Y-%m-%d %H:%M')%0A詳情: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|