Files
ewoooc/.gitea/workflows/cd.yaml
OoO 6bce46bbc7
All checks were successful
CD Pipeline / deploy (push) Successful in 2m29s
fix(runtime): 強化健康檢查監控韌性
2026-05-01 14:46:49 +08:00

356 lines
17 KiB
YAML
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# =============================================================================
# EwoooC CD Pipeline (Gitea Actions)
# =============================================================================
# 流程: Sync Files → 188 Docker Restart → Health Check
# 部署架構: Docker Compose on ollama@192.168.0.188 (Volume Mount)
# 加速措施: Python 檔案走 rsync僅 Dockerfile/requirements 變動才重建 image
# 參考: AWOOOI cd.yaml pattern (ADR-008 — Docker Compose 非 K8s)
# 注意: runner image 仍可能顯示 awoooi/ci-runner判斷專案請看 task repo/workspace 是否為 wooo/ewoooc。
# Runner 必須使用 ewoooc-host 專屬 label避免 user-level runner 混接 AWOOOI workflow。
#
# 安全注意:
# SSH_HOST_KEY secret: 請執行 ssh-keyscan 192.168.0.188 並將輸出存入 Gitea Secret
# 若未設定,自動 ssh-keyscan私有網段可接受的降級
#
# 已知風險:
# cancel-in-progress: rsync 非原子,若新 push 在傳輸中取消188 可能半更新狀態
name: CD Pipeline
on:
push:
branches: [main]
paths:
# 應用程式碼volume-mounted
- 'app.py'
- 'auth.py'
- 'config.py'
- 'scheduler.py'
- 'run_scheduler.py'
- 'run_telegram_bot.py'
- 'gunicorn.conf.py'
- 'services/**'
- 'routes/**'
- 'database/**'
- 'templates/**'
- 'static/**'
- 'monitoring/prometheus.yml'
- 'monitoring/blackbox.yml'
# 需重建 image 的檔案
- 'Dockerfile'
- 'requirements.txt'
- 'docker-compose.yml'
# 腳本工具
- 'scripts/**'
# Claude Code 指令 / Hooks
- '.claude/**'
# 工作流程本身
- '.gitea/workflows/**'
# docs/、memory/、ADR、k8s/ 等不觸發
workflow_dispatch:
inputs:
force_rebuild:
description: '強制重建 Docker Image不論變更檔案'
type: boolean
default: false
# 新 push 立即取消舊 job只部署最新版本
concurrency:
group: cd-deploy-${{ github.ref }}
cancel-in-progress: true
jobs:
deploy:
timeout-minutes: 20
runs-on: ewoooc-host
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2
- name: 取得 Commit 資訊
id: commit
run: |
echo "short_sha=${GITHUB_SHA::7}" >> $GITHUB_OUTPUT
echo "message=$(git log -1 --pretty=%s | head -c 60)" >> $GITHUB_OUTPUT
echo "start_time=$(date +%s)" >> $GITHUB_OUTPUT
# 偵測是否需重建 Docker imageforce_rebuild 優先,其次看變更檔案)
- name: 偵測部署類型
id: deploy_type
run: |
if [[ "${{ github.event.inputs.force_rebuild }}" == "true" ]]; then
echo "type=rebuild" >> $GITHUB_OUTPUT
echo "label=🔨 強制重建 Docker Image" >> $GITHUB_OUTPUT
exit 0
fi
CHANGED=$(git diff --name-only HEAD~1 HEAD 2>/dev/null || echo "")
if echo "$CHANGED" | grep -qE '^(Dockerfile|requirements\.txt|docker-compose\.yml)$'; then
echo "type=rebuild" >> $GITHUB_OUTPUT
echo "label=🔨 重建 Docker Image" >> $GITHUB_OUTPUT
else
echo "type=sync" >> $GITHUB_OUTPUT
echo "label=📁 同步 Python 檔案" >> $GITHUB_OUTPUT
fi
# 設定 SSH 金鑰 + 主機驗證C2 fix: 移除 StrictHostKeyChecking no
- name: 設定 SSH 金鑰
env:
DEPLOY_SSH_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
SSH_HOST_KEY: ${{ secrets.SSH_HOST_KEY }}
run: |
mkdir -p ~/.ssh
printf '%s\n' "$DEPLOY_SSH_KEY" > ~/.ssh/id_deploy
chmod 600 ~/.ssh/id_deploy
# 主機驗證:優先使用 SSH_HOST_KEY secret否則動態掃描私有網段降級
if [[ -n "$SSH_HOST_KEY" ]]; then
echo "$SSH_HOST_KEY" >> ~/.ssh/known_hosts
else
ssh-keyscan -H 192.168.0.188 >> ~/.ssh/known_hosts 2>/dev/null
fi
chmod 644 ~/.ssh/known_hosts
cat > ~/.ssh/config << 'EOF'
Host 192.168.0.188
HostName 192.168.0.188
User ollama
IdentityFile ~/.ssh/id_deploy
ConnectTimeout 10
EOF
# 通知部署開始C1 fix: 所有 ${{ }} 值改走 env: 區塊,不直接嵌入 shell
- name: 通知部署開始
env:
COMMIT_MSG: ${{ steps.commit.outputs.message }}
COMMIT_SHA: ${{ steps.commit.outputs.short_sha }}
COMMIT_ACTOR: ${{ github.actor }}
DEPLOY_LABEL: ${{ steps.deploy_type.outputs.label }}
TG_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TG_CHAT: ${{ secrets.TELEGRAM_CHAT_ID }}
run: |
COMMIT_ESC=$(printf '%s' "$COMMIT_MSG" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
MSG=$(printf '🚀 <b>EwoooC 部署開始</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n├ 👤 %s\n└ %s' \
"${COMMIT_ESC}" "${COMMIT_SHA}" "${COMMIT_ACTOR}" "${DEPLOY_LABEL}")
curl -fS -X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg c "$TG_CHAT" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML"}')"
# ── 安裝部署工具 ────────────────────────────────────────────────────
# rsync --ignore-errors 防止單一不可寫 attr 中斷整個部署
# rsync --inplace 避免 app.py/config.py 單檔 bind mount 因 inode replacement 卡住舊版本
- name: 安裝 rsync / ssh
run: |
apt-get update -qq && apt-get install -y -qq rsync openssh-client
# ── 模式 A僅同步 Python 檔案(最常見,~10s ────────────────────────
- name: 同步 Python 檔案至 188
if: steps.deploy_type.outputs.type == 'sync'
run: |
rsync -avz --ignore-errors \
--inplace \
-e "ssh -i ~/.ssh/id_deploy" \
--exclude='.git/' \
--exclude='.gitea/' \
--exclude='.claude/' \
--exclude='data/' \
--exclude='logs/' \
--exclude='backups/' \
--exclude='config/google_credentials.json' \
--exclude='config/google_token.pickle' \
--exclude='venv/' \
--exclude='__pycache__/' \
--exclude='*.pyc' \
--exclude='.env' \
--exclude='*.db' \
--exclude='*.db-journal' \
--exclude='*.md' \
--exclude='docs/' \
--exclude='memory/' \
--exclude='k8s/' \
--exclude='n8n-workflows/' \
--exclude='aiops-core/' \
--exclude='monitoring/alertmanager/' \
--exclude='._*' \
./ ollama@192.168.0.188:/home/ollama/momo-pro/ || true
- name: 重啟容器Sync 模式)
if: steps.deploy_type.outputs.type == 'sync'
run: |
# ROOT CAUSE FIX (2026-04-28): `up -d` 確保容器不存在時可自動 create。
# ZERO-DOWNTIME SYNC (2026-04-30): app 用 Gunicorn HUP 熱重載 workers
# 不重啟 momo-pro-system 容器,避免一般 Python/模板同步造成短暫 502。
# MOUNT DRIFT GUARD (2026-04-30): 舊容器若尚未套用 app.py/config.py bind mount
# health/version 會卡在 image 內舊檔;只在偵測到 drift 時 force-recreate app。
ssh -i ~/.ssh/id_deploy ollama@192.168.0.188 '
set -e
cd /home/ollama/momo-pro
docker compose up -d --no-deps scheduler telegram-bot
if docker inspect momo-pro-system --format "{{range .Mounts}}{{println .Destination}}{{end}}" | grep -qx "/app/app.py" \
&& docker inspect momo-pro-system --format "{{range .Mounts}}{{println .Destination}}{{end}}" | grep -qx "/app/config.py"; then
docker compose up -d --no-deps momo-app
else
echo "⚠️ momo-app mount drift detected; recreating app container once to apply bind mounts"
docker compose up -d --no-deps --force-recreate momo-app
fi
docker kill -s HUP momo-pro-system
docker compose restart scheduler telegram-bot
echo "✅ Sync 已套用app 熱重載scheduler/telegram-bot 已重啟)"
'
# ── 模式 B重建 Docker ImageDockerfile / requirements.txt 變動) ──
- name: 同步所有檔案並重建 Image
if: steps.deploy_type.outputs.type == 'rebuild'
run: |
# H5: ADR-011 守衛 — momo-db 必須存活才允許 rebuild
ssh -i ~/.ssh/id_deploy ollama@192.168.0.188 \
"docker ps --format '{{.Names}}' | grep -q '^momo-db$' || \
(echo 'ABORT: momo-db not running' && exit 1)"
# H1: 與 Sync 模式對齊的完整 excludes含 .gitea/ .claude/ docs/ *.md
rsync -avz --ignore-errors \
--inplace \
-e "ssh -i ~/.ssh/id_deploy" \
--exclude='.git/' \
--exclude='.gitea/' \
--exclude='.claude/' \
--exclude='data/' \
--exclude='logs/' \
--exclude='backups/' \
--exclude='config/google_credentials.json' \
--exclude='config/google_token.pickle' \
--exclude='venv/' \
--exclude='__pycache__/' \
--exclude='*.pyc' \
--exclude='.env' \
--exclude='*.db' \
--exclude='*.db-journal' \
--exclude='*.md' \
--exclude='docs/' \
--exclude='memory/' \
--exclude='k8s/' \
--exclude='n8n-workflows/' \
--exclude='aiops-core/' \
--exclude='monitoring/alertmanager/' \
--exclude='._*' \
./ ollama@192.168.0.188:/home/ollama/momo-pro/ || true
# H2: 先 build 成功再短暫切換,避免 no-cache build 時間全變成 502
ssh -i ~/.ssh/id_deploy ollama@192.168.0.188 \
"cd /home/ollama/momo-pro && \
docker compose build --no-cache momo-app && \
(docker stop momo-pro-system momo-scheduler momo-telegram-bot 2>/dev/null || true) && \
(docker rm momo-pro-system momo-scheduler momo-telegram-bot 2>/dev/null || true) && \
docker compose up -d --no-deps --force-recreate momo-app scheduler telegram-bot && \
echo '✅ Image 重建完成(三容器)'"
- name: 重新載入監控設定
run: |
CHANGED=$(git diff --name-only HEAD~1 HEAD 2>/dev/null || echo "")
if echo "$CHANGED" | grep -qE '^(monitoring/prometheus\.yml|monitoring/blackbox\.yml)$'; then
ssh -i ~/.ssh/id_deploy ollama@192.168.0.188 \
"cd /home/ollama/momo-pro/monitoring && \
docker compose up -d prometheus blackbox-exporter && \
docker compose restart prometheus blackbox-exporter && \
echo '✅ Monitoring 設定已重新載入'"
else
echo " Monitoring 設定未變更,略過重新載入"
fi
# ── 健康檢查H3: HTTP + 三容器狀態雙重驗證) ─────────────────────────
- name: 健康檢查
run: |
echo "⏳ 等待服務啟動30s..."
sleep 30
for i in $(seq 1 12); do
INTERNAL_CODE=$(ssh -i ~/.ssh/id_deploy ollama@192.168.0.188 \
"docker exec momo-pro-system curl -s -o /dev/null -w '%{http_code}' --max-time 8 http://127.0.0.1:80/health" 2>/dev/null || true)
EXTERNAL_CODE=$(curl -s -o /dev/null -w "%{http_code}" https://mo.wooo.work/health --max-time 10 2>/dev/null || true)
INTERNAL_CODE=${INTERNAL_CODE:-000}
EXTERNAL_CODE=${EXTERNAL_CODE:-000}
if [ "$INTERNAL_CODE" = "200" ] && [ "$EXTERNAL_CODE" = "200" ]; then
echo "✅ HTTP 健康檢查通過internal=$INTERNAL_CODE, external=$EXTERNAL_CODE"
break
fi
echo "⏳ 嘗試 $i/12internal=$INTERNAL_CODE external=$EXTERNAL_CODE等待 15s..."
[ "$i" -eq 12 ] && echo "❌ HTTP 健康檢查失敗" && exit 1
sleep 15
done
# 驗證三應用容器均在 Running 狀態
ssh -i ~/.ssh/id_deploy ollama@192.168.0.188 \
'RUNNING=$(docker ps --format "{{.Names}}" | grep -cE "momo-(pro-system|scheduler|telegram-bot)" || true); \
if [ "$RUNNING" -lt 3 ]; then \
docker ps --format "{{.Names}}\t{{.Status}}" | grep momo-; \
echo "❌ 容器未全部就緒Running: $RUNNING/3"; exit 1; \
else \
echo "✅ 三容器均正常運行($RUNNING/3"; \
fi'
# ── 觸發 Post-Deploy Code Review ─────────────────────────────────────
- name: 觸發 AI Code Review
if: success()
continue-on-error: true
env:
WEBHOOK_TOKEN: ${{ secrets.INTERNAL_WEBHOOK_TOKEN }}
COMMIT_SHA_FULL: ${{ github.sha }}
BRANCH_NAME: ${{ github.ref_name }}
DEPLOY_TYPE: ${{ steps.deploy_type.outputs.type }}
run: |
CHANGED=$(git diff --name-only HEAD~1 HEAD 2>/dev/null || echo "")
FILES_JSON=$(echo "$CHANGED" | grep -E '\.(py|yaml|yml|json)$' | \
jq -Rs '[split("\n")[] | select(. != "")]')
curl -fS --max-time 10 \
-X POST "https://mo.wooo.work/code-review/api/internal/trigger" \
-H "Content-Type: application/json" \
-H "X-Internal-Token: ${WEBHOOK_TOKEN}" \
-d "$(jq -n \
--arg sha "$COMMIT_SHA_FULL" \
--argjson files "$FILES_JSON" \
--arg branch "$BRANCH_NAME" \
--arg type "$DEPLOY_TYPE" \
'{commit_sha:$sha,changed_files:$files,branch:$branch,deploy_type:$type}')" \
&& echo "✅ Code Review Pipeline 已觸發" \
|| echo "⚠️ Code Review webhook 呼叫失敗(不影響部署結果)"
# ── 部署成功通知C1 fix: env: 區塊隔離)────────────────────────────
- name: 通知部署成功
if: success()
env:
COMMIT_MSG: ${{ steps.commit.outputs.message }}
COMMIT_SHA: ${{ steps.commit.outputs.short_sha }}
START_TIME: ${{ steps.commit.outputs.start_time }}
TG_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TG_CHAT: ${{ secrets.TELEGRAM_CHAT_ID }}
run: |
END_TIME=$(date +%s)
DURATION=$((END_TIME - START_TIME))
COMMIT_ESC=$(printf '%s' "$COMMIT_MSG" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
MSG=$(printf '✅ <b>EwoooC 部署成功</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n├ ⏱ 耗時 %ss\n└ 🌐 https://mo.wooo.work' \
"${COMMIT_ESC}" "${COMMIT_SHA}" "${DURATION}")
curl -fS -X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg c "$TG_CHAT" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML"}')"
# ── H4: 緊急回滾嘗試(部署失敗時嘗試起回三容器回復服務)───────────────
- name: 緊急回滾嘗試
if: failure()
run: |
echo "⚠️ 部署失敗,嘗試 compose up -d 回復三容器..."
# 同樣用 up -d 而非 restart對「不存在的容器」也能起回來
ssh -i ~/.ssh/id_deploy ollama@192.168.0.188 \
"cd /home/ollama/momo-pro && \
docker compose up -d --no-deps momo-app scheduler telegram-bot 2>&1 || true" || true
# ── 部署失敗通知C1 fix: env: 區塊隔離)────────────────────────────
- name: 通知部署失敗
if: failure()
env:
COMMIT_MSG: ${{ steps.commit.outputs.message }}
COMMIT_SHA: ${{ steps.commit.outputs.short_sha }}
TG_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TG_CHAT: ${{ secrets.TELEGRAM_CHAT_ID }}
run: |
COMMIT_ESC=$(printf '%s' "$COMMIT_MSG" | sed 's/&/\&amp;/g; s/</\&lt;/g; s/>/\&gt;/g')
MSG=$(printf '❌ <b>EwoooC 部署失敗</b>\n├ 📝 <code>%s</code>\n├ 🔖 <code>%s</code>\n└ 🔍 請查看 Gitea Actions 日誌' \
"${COMMIT_ESC}" "${COMMIT_SHA}")
curl -fS -X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg c "$TG_CHAT" --arg t "$MSG" '{chat_id:$c,text:$t,parse_mode:"HTML"}')"