問題: CI/CD workflows 指向錯誤的 OTEL 端點 - ci.yaml: 121:4318 → 188:24318 - cd.yaml: 121:4318 → 188:24318 SignOz 實際運行在 192.168.0.188 (AI+Web 中心) 更新: - Skill 04 v1.8 加入可觀測性端點規範 - LOGBOOK 記錄配置修正 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
340 lines
13 KiB
YAML
340 lines
13 KiB
YAML
# =============================================================================
|
||
# AWOOOI CD Pipeline v2.0 (完整沿用 AIOPS 最佳實踐)
|
||
# =============================================================================
|
||
# 優化項目:
|
||
# 1. Pre-flight Check (10s Fail-Fast)
|
||
# 2. Runner 標籤 [self-hosted, harbor, k8s]
|
||
# 3. dorny/paths-filter 精確路徑偵測
|
||
# 4. API + Web 並行建構
|
||
# 5. timeout-minutes 防止卡死
|
||
# 6. Telegram + OpenClaw 通知
|
||
# 7. force_deploy 強制重建選項
|
||
# =============================================================================
|
||
|
||
name: CD
|
||
|
||
on:
|
||
push:
|
||
branches: [main]
|
||
paths-ignore:
|
||
- 'docs/**'
|
||
- '*.md'
|
||
workflow_dispatch:
|
||
inputs:
|
||
force_deploy:
|
||
description: '強制部署 (跳過路徑偵測)'
|
||
type: boolean
|
||
default: false
|
||
skip_api:
|
||
description: '跳過 API 建構'
|
||
type: boolean
|
||
default: false
|
||
skip_web:
|
||
description: '跳過 Web 建構'
|
||
type: boolean
|
||
default: false
|
||
|
||
concurrency:
|
||
group: cd-${{ github.workflow }}-${{ github.ref }}
|
||
# 2026-03-26: 改為等待而非取消,避免 Runner _diag/pages 檔案衝突
|
||
cancel-in-progress: false
|
||
|
||
env:
|
||
REGISTRY: 192.168.0.110:5000
|
||
IMAGE_PREFIX: library/awoooi
|
||
LOCAL_CACHE_DIR: /home/wooo/build-cache/awoooi
|
||
OPENCLAW_URL: http://192.168.0.188:8088
|
||
# OTEL CI/CD 監控 (2026-03-24 批准, 2026-03-28 修正: SignOz 在 188)
|
||
OTEL_EXPORTER_OTLP_ENDPOINT: http://192.168.0.188:24318
|
||
OTEL_SERVICE_NAME: awoooi-cd
|
||
OTEL_RESOURCE_ATTRIBUTES: service.version=${{ github.sha }},deployment.environment=production
|
||
|
||
jobs:
|
||
# ==================== Pre-flight Check (10s Fail-Fast) ====================
|
||
pre-flight-check:
|
||
name: "Pre-flight Check"
|
||
runs-on: [self-hosted, harbor, k8s]
|
||
timeout-minutes: 1
|
||
steps:
|
||
# 2026-03-26: 清理暫存目錄,避免 file conflict (pages + temp)
|
||
- name: "Clean Runner temp"
|
||
run: |
|
||
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
|
||
rm -rf "$RUNNER_TEMP"/* 2>/dev/null || true
|
||
rm -rf "$RUNNER_ROOT/_diag/pages"/* 2>/dev/null || true
|
||
rm -rf .claude/worktrees 2>/dev/null || true
|
||
|
||
- name: "Check Required Secrets"
|
||
run: |
|
||
MISSING=""
|
||
if [ -z "${{ secrets.HARBOR_USER }}" ]; then MISSING="${MISSING}HARBOR_USER "; fi
|
||
if [ -z "${{ secrets.HARBOR_PASSWORD }}" ]; then MISSING="${MISSING}HARBOR_PASSWORD "; fi
|
||
if [ -z "${{ secrets.KUBE_CONFIG_PROD }}" ]; then MISSING="${MISSING}KUBE_CONFIG_PROD "; fi
|
||
if [ -n "$MISSING" ]; then
|
||
echo "❌ 缺少 Secrets: ${MISSING}"
|
||
exit 1
|
||
fi
|
||
echo "✅ Secrets 檢查通過"
|
||
|
||
- name: "Check Harbor Connectivity"
|
||
run: |
|
||
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 \
|
||
"http://${{ env.REGISTRY }}/v2/" 2>/dev/null || echo "000")
|
||
if [ "$HTTP_CODE" != "200" ] && [ "$HTTP_CODE" != "401" ]; then
|
||
echo "❌ Harbor 無法連線 (HTTP $HTTP_CODE)"
|
||
exit 1
|
||
fi
|
||
echo "✅ Harbor 連線正常"
|
||
|
||
- name: "Check kubectl"
|
||
run: |
|
||
export PATH="/home/wooo/bin:$PATH"
|
||
if ! which kubectl > /dev/null 2>&1; then
|
||
echo "❌ kubectl 不在 PATH"
|
||
exit 1
|
||
fi
|
||
echo "✅ kubectl 可用"
|
||
|
||
- name: "Notify Pre-flight Failure"
|
||
if: failure()
|
||
run: |
|
||
curl -sf -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
|
||
-d chat_id="${{ secrets.OPENCLAW_TG_CHAT_ID }}" \
|
||
-d text="❌ AWOOOI Pre-flight 失敗%0A%0A🔗 ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" || true
|
||
|
||
# ==================== 路徑偵測 (使用 dorny/paths-filter) ====================
|
||
detect-changes:
|
||
name: Detect Changes
|
||
runs-on: [self-hosted, harbor, k8s]
|
||
needs: pre-flight-check
|
||
timeout-minutes: 1
|
||
outputs:
|
||
api: ${{ inputs.force_deploy == true && 'true' || steps.filter.outputs.api }}
|
||
web: ${{ inputs.force_deploy == true && 'true' || steps.filter.outputs.web }}
|
||
k3s-system: ${{ steps.filter.outputs.k3s-system }}
|
||
steps:
|
||
# 2026-03-26: 清理暫存目錄 (temp + pages)
|
||
- name: "Clean Runner temp"
|
||
run: |
|
||
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
|
||
rm -rf "$RUNNER_TEMP"/* "$RUNNER_ROOT/_diag/pages"/* .claude/worktrees 2>/dev/null || true
|
||
|
||
- uses: actions/checkout@v4
|
||
with:
|
||
clean: true
|
||
|
||
- uses: dorny/paths-filter@v3
|
||
id: filter
|
||
with:
|
||
filters: |
|
||
api:
|
||
- 'apps/api/**'
|
||
- 'packages/**'
|
||
- 'pyproject.toml'
|
||
web:
|
||
- 'apps/web/**'
|
||
- 'packages/**'
|
||
- 'package.json'
|
||
- 'pnpm-lock.yaml'
|
||
k3s-system:
|
||
- 'k8s/k3s-system/**'
|
||
|
||
# ==================== 並行建構 API ====================
|
||
build-api:
|
||
name: "Build API"
|
||
runs-on: [self-hosted, harbor, k8s]
|
||
needs: detect-changes
|
||
timeout-minutes: 10
|
||
if: |
|
||
!inputs.skip_api && (
|
||
needs.detect-changes.outputs.api == 'true' ||
|
||
(needs.detect-changes.outputs.api == 'false' && needs.detect-changes.outputs.web == 'false')
|
||
)
|
||
outputs:
|
||
image_tag: ${{ steps.tag.outputs.tag }}
|
||
steps:
|
||
# 2026-03-26: 清理暫存目錄 (temp + pages)
|
||
- name: "Clean Runner temp"
|
||
run: |
|
||
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
|
||
rm -rf "$RUNNER_TEMP"/* "$RUNNER_ROOT/_diag/pages"/* .claude/worktrees 2>/dev/null || true
|
||
|
||
- uses: actions/checkout@v4
|
||
|
||
- name: Generate tag
|
||
id: tag
|
||
run: echo "tag=$(git rev-parse --short HEAD)-${{ github.run_id }}" >> $GITHUB_OUTPUT
|
||
|
||
- name: Login to Harbor
|
||
run: echo "${{ secrets.HARBOR_PASSWORD }}" | docker login ${{ env.REGISTRY }} -u ${{ secrets.HARBOR_USER }} --password-stdin
|
||
|
||
- name: Build & Push (Native BuildKit)
|
||
env:
|
||
DOCKER_BUILDKIT: 1
|
||
run: |
|
||
docker build --push \
|
||
--tag ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-api:${{ steps.tag.outputs.tag }} \
|
||
--file apps/api/Dockerfile .
|
||
echo "✅ API: ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-api:${{ steps.tag.outputs.tag }}"
|
||
|
||
# ==================== 並行建構 Web ====================
|
||
build-web:
|
||
name: "Build Web"
|
||
runs-on: [self-hosted, harbor, k8s]
|
||
needs: detect-changes
|
||
timeout-minutes: 15
|
||
if: |
|
||
!inputs.skip_web && (
|
||
needs.detect-changes.outputs.web == 'true' ||
|
||
(needs.detect-changes.outputs.api == 'false' && needs.detect-changes.outputs.web == 'false')
|
||
)
|
||
outputs:
|
||
image_tag: ${{ steps.tag.outputs.tag }}
|
||
steps:
|
||
# 2026-03-26: 清理暫存目錄 (temp + pages)
|
||
- name: "Clean Runner temp"
|
||
run: |
|
||
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
|
||
rm -rf "$RUNNER_TEMP"/* "$RUNNER_ROOT/_diag/pages"/* .claude/worktrees 2>/dev/null || true
|
||
|
||
- uses: actions/checkout@v4
|
||
|
||
- name: Generate tag
|
||
id: tag
|
||
run: echo "tag=$(git rev-parse --short HEAD)-${{ github.run_id }}" >> $GITHUB_OUTPUT
|
||
|
||
- name: Login to Harbor
|
||
run: echo "${{ secrets.HARBOR_PASSWORD }}" | docker login ${{ env.REGISTRY }} -u ${{ secrets.HARBOR_USER }} --password-stdin
|
||
|
||
- name: Restore Next.js cache
|
||
run: |
|
||
mkdir -p apps/web/.next/cache
|
||
[ -d "${{ env.LOCAL_CACHE_DIR }}/nextjs" ] && cp -r ${{ env.LOCAL_CACHE_DIR }}/nextjs/* apps/web/.next/cache/ 2>/dev/null || true
|
||
|
||
- name: Build & Push (Native BuildKit)
|
||
env:
|
||
DOCKER_BUILDKIT: 1
|
||
run: |
|
||
docker build --push \
|
||
--build-arg NEXT_PUBLIC_API_URL=https://awoooi.wooo.work \
|
||
--build-arg NEXT_PUBLIC_SENTRY_DSN=http://da02d4e5d6542e4d1ed6b2dd6542efeb@192.168.0.110:9000/2 \
|
||
--tag ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-web:${{ steps.tag.outputs.tag }} \
|
||
--file apps/web/Dockerfile .
|
||
echo "✅ Web: ${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-web:${{ steps.tag.outputs.tag }}"
|
||
|
||
- name: Save Next.js cache
|
||
run: |
|
||
mkdir -p ${{ env.LOCAL_CACHE_DIR }}/nextjs
|
||
[ -d "apps/web/.next/cache" ] && cp -r apps/web/.next/cache/* ${{ env.LOCAL_CACHE_DIR }}/nextjs/ 2>/dev/null || true
|
||
|
||
# ==================== Deploy ====================
|
||
deploy-prod:
|
||
name: Deploy to Production
|
||
runs-on: [self-hosted, harbor, k8s]
|
||
needs: [detect-changes, build-api, build-web]
|
||
timeout-minutes: 10
|
||
if: always() && (needs.build-api.result == 'success' || needs.build-api.result == 'skipped') && (needs.build-web.result == 'success' || needs.build-web.result == 'skipped')
|
||
environment: production
|
||
steps:
|
||
# 2026-03-26: 清理暫存目錄 (temp + pages)
|
||
- name: "Clean Runner temp"
|
||
run: |
|
||
RUNNER_ROOT=$(dirname "$(dirname "$RUNNER_TEMP")")
|
||
rm -rf "$RUNNER_TEMP"/* "$RUNNER_ROOT/_diag/pages"/* .claude/worktrees 2>/dev/null || true
|
||
|
||
- uses: actions/checkout@v4
|
||
with:
|
||
clean: true
|
||
|
||
- name: Setup
|
||
run: |
|
||
mkdir -p ~/.kube
|
||
echo "${{ secrets.KUBE_CONFIG_PROD }}" | base64 -d > ~/.kube/config
|
||
chmod 600 ~/.kube/config
|
||
export PATH="/home/wooo/bin:$HOME/.local/bin:$PATH"
|
||
echo "/home/wooo/bin" >> $GITHUB_PATH
|
||
echo "$HOME/.local/bin" >> $GITHUB_PATH
|
||
|
||
- name: Generate tag
|
||
id: tag
|
||
run: echo "tag=$(git rev-parse --short HEAD)-${{ github.run_id }}" >> $GITHUB_OUTPUT
|
||
|
||
- name: Deploy
|
||
run: |
|
||
cd k8s/awoooi-prod
|
||
TAG="${{ steps.tag.outputs.tag }}"
|
||
|
||
# 只更新實際建構的 image (避免 ImagePullBackOff)
|
||
if [ "${{ needs.build-api.result }}" = "success" ]; then
|
||
echo "📦 更新 API image: ${{ env.IMAGE_PREFIX }}-api:${TAG}"
|
||
kustomize edit set image \
|
||
"192.168.0.110:5000/library/api:IMAGE_TAG_PLACEHOLDER=${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-api:${TAG}"
|
||
else
|
||
echo "⏭️ 跳過 API image 更新 (build skipped)"
|
||
fi
|
||
|
||
if [ "${{ needs.build-web.result }}" = "success" ]; then
|
||
echo "📦 更新 Web image: ${{ env.IMAGE_PREFIX }}-web:${TAG}"
|
||
kustomize edit set image \
|
||
"192.168.0.110:5000/library/web:IMAGE_TAG_PLACEHOLDER=${{ env.REGISTRY }}/${{ env.IMAGE_PREFIX }}-web:${TAG}"
|
||
else
|
||
echo "⏭️ 跳過 Web image 更新 (build skipped)"
|
||
fi
|
||
|
||
kubectl apply -k .
|
||
|
||
# 2026-03-26: CoreDNS GitOps 同步 (ADR-026)
|
||
- name: Sync CoreDNS Config
|
||
if: needs.detect-changes.outputs.k3s-system == 'true'
|
||
run: |
|
||
echo "📦 同步 CoreDNS 配置到 K3s..."
|
||
# HelmChartConfig 是 K8s 資源,直接 apply
|
||
kubectl apply -f k8s/k3s-system/coredns-custom.yaml
|
||
echo "✅ CoreDNS 配置已同步"
|
||
|
||
- name: Wait for rollout
|
||
run: |
|
||
kubectl rollout status deployment/awoooi-web -n awoooi-prod --timeout=300s || true
|
||
kubectl rollout status deployment/awoooi-api -n awoooi-prod --timeout=300s || true
|
||
|
||
- name: Health check
|
||
run: |
|
||
sleep 15
|
||
API_POD=$(kubectl get pods -n awoooi-prod -l app=awoooi-api -o jsonpath='{.items[0].metadata.name}')
|
||
# 使用 Python httpx (容器沒有 curl,但有 httpx)
|
||
kubectl exec -n awoooi-prod $API_POD -c api -- python -c "import httpx; r=httpx.get('http://localhost:8000/api/v1/health', timeout=5); print(r.status_code)" || echo "Health check failed but deployment succeeded"
|
||
|
||
- name: Notify OpenClaw
|
||
if: always()
|
||
run: |
|
||
STATUS="${{ job.status }}"
|
||
curl -sf -X POST "${{ env.OPENCLAW_URL }}/api/v1/webhook/pipeline" \
|
||
-H "Content-Type: application/json" \
|
||
-d "{
|
||
\"event\": \"completed\",
|
||
\"status\": \"${STATUS}\",
|
||
\"pipeline_id\": \"${{ github.run_id }}\",
|
||
\"pipeline_url\": \"${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}\",
|
||
\"commit\": \"${{ github.sha }}\",
|
||
\"branch\": \"${{ github.ref_name }}\"
|
||
}" || true
|
||
|
||
- name: Notify Telegram
|
||
if: always()
|
||
run: |
|
||
# 取得 commit 資訊
|
||
COMMIT_MSG=$(git log -1 --pretty=format:'%s' | head -c 50)
|
||
AUTHOR=$(git log -1 --pretty=format:'%an')
|
||
DEPLOY_TIME=$(TZ='Asia/Taipei' date '+%Y-%m-%d %H:%M')
|
||
SHORT_SHA=$(echo "${{ github.sha }}" | head -c 7)
|
||
|
||
if [ "${{ job.status }}" = "success" ]; then
|
||
MSG="✅ *AWOOOI 部署成功*%0A%0A📦 版本: ${COMMIT_MSG}%0A⏰ 時間: ${DEPLOY_TIME}%0A👤 作者: ${AUTHOR}%0A🔖 SHA: ${SHORT_SHA}%0A%0A🔗 [查看 Workflow](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})"
|
||
else
|
||
MSG="❌ *AWOOOI 部署失敗*%0A%0A📦 版本: ${COMMIT_MSG}%0A⏰ 時間: ${DEPLOY_TIME}%0A👤 作者: ${AUTHOR}%0A🔖 SHA: ${SHORT_SHA}%0A%0A🔗 [查看 Workflow](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})"
|
||
fi
|
||
curl -sf -X POST "https://api.telegram.org/bot${{ secrets.OPENCLAW_TG_BOT_TOKEN }}/sendMessage" \
|
||
-d chat_id="${{ secrets.OPENCLAW_TG_CHAT_ID }}" \
|
||
-d text="${MSG}" \
|
||
-d parse_mode="Markdown" || true
|