ci(cd): 首席架構師 Review Phase 25 全批修正 (C1-C4 / S1-S4 / I1-I4)
修正項目: C1: DOCKER_BUILDKIT=1 + ARG BUILDKIT_INLINE_CACHE + syntax directive (兩個 Dockerfile) C2: Alert Chain Smoke Test 修正 pass/fail 輸出邏輯 (不再無條件 pass) C3: API Dockerfile builder stage 先 pip install 後 COPY src/ (deps cache 正確失效) C4: Deploy step 自行管理 SSH key + ssh-keyscan 取代 StrictHostKeyChecking=no S1/S2: 統一 SSH 連線方式,移除 StrictHostKeyChecking=no S3: API Dockerfile HEALTHCHECK 改用 curl 取代 httpx (確保 image 有該工具) S4: type-sync-check.yaml python → python3 I1: 建立 .dockerignore 防止無關檔案污染 build context I2: 加入 Setup Python Tools 共用步驟 I3: deploy-alerts job 移至獨立 deploy-alerts.yaml workflow (paths trigger) I4: E2E Smoke Test 加入 pnpm install + PLAYWRIGHT_BASE_URL 公網域名 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
50
.dockerignore
Normal file
50
.dockerignore
Normal file
@@ -0,0 +1,50 @@
|
||||
# 首席架構師 Review I1 (2026-04-05 Claude Code)
|
||||
# 防止無關檔案射入 Docker build context,縮短 context 傳輸時間
|
||||
# 並防止 .playwright-mcp/ PNG/HTML 等大檔案造成 layer hash 不必要失效
|
||||
|
||||
# Git
|
||||
.git
|
||||
.gitignore
|
||||
|
||||
# CI/CD
|
||||
.gitea
|
||||
.github
|
||||
|
||||
# 開發工具
|
||||
.playwright-mcp
|
||||
.vscode
|
||||
.idea
|
||||
*.md
|
||||
*.log
|
||||
*.tmp
|
||||
|
||||
# 文件與腳本(不需要進 image)
|
||||
docs
|
||||
scripts
|
||||
|
||||
# Node 快取(monorepo 根目錄)
|
||||
node_modules
|
||||
|
||||
# Python 快取
|
||||
__pycache__
|
||||
*.pyc
|
||||
*.pyo
|
||||
.venv
|
||||
.pytest_cache
|
||||
.mypy_cache
|
||||
dist
|
||||
*.egg-info
|
||||
|
||||
# 測試結果
|
||||
test-results
|
||||
coverage
|
||||
.coverage
|
||||
|
||||
# 環境變數(絕對不能進 image)
|
||||
.env
|
||||
.env.*
|
||||
apps/api/.env
|
||||
apps/web/.env*
|
||||
|
||||
# memory/ADR(不影響 build)
|
||||
memory
|
||||
@@ -17,8 +17,8 @@ on:
|
||||
- 'apps/**'
|
||||
- 'k8s/**'
|
||||
- '.gitea/workflows/**'
|
||||
- 'ops/monitoring/alerts-unified.yml' # 2026-04-05 Claude Code: 告警規則變更自動部署
|
||||
# docs/、memory/、ADR 等不觸發
|
||||
# ops/monitoring/alerts-unified.yml 由 deploy-alerts.yaml 獨立處理 (I3)
|
||||
workflow_dispatch:
|
||||
# 手動觸發永遠可用(用於補跑、緊急部署)
|
||||
|
||||
@@ -114,7 +114,11 @@ jobs:
|
||||
# ── API 鏡像建置(含 Layer Cache 加速)──────────────────────────────
|
||||
# 2026-04-01 ogt: CACHE_BUST=git_sha 確保 src/ 和 models.json 層每次重建
|
||||
# deps 層 (pip install) 仍可 cache → 加速;代碼/配置層強制失效
|
||||
# 首席架構師 Review C1 (2026-04-05 Claude Code): 補 DOCKER_BUILDKIT=1
|
||||
# BUILDKIT_INLINE_CACHE=1 只有在 BuildKit 啟用時才有效
|
||||
- name: Build and Push API
|
||||
env:
|
||||
DOCKER_BUILDKIT: "1"
|
||||
run: |
|
||||
docker build -f apps/api/Dockerfile \
|
||||
--build-arg BUILDKIT_INLINE_CACHE=1 \
|
||||
@@ -134,6 +138,8 @@ jobs:
|
||||
# - deps 層 (pnpm install) 仍可 cache → 節省 ~2-3 min
|
||||
# - COPY . . 以下由 CACHE_BUST 強制失效 → CSRF fix 等代碼變更正確進入 bundle
|
||||
- name: Build and Push Web
|
||||
env:
|
||||
DOCKER_BUILDKIT: "1"
|
||||
run: |
|
||||
docker build -f apps/web/Dockerfile \
|
||||
--build-arg NEXT_PUBLIC_API_URL=https://awoooi.wooo.work \
|
||||
@@ -333,15 +339,22 @@ jobs:
|
||||
id: smoke
|
||||
continue-on-error: true
|
||||
run: |
|
||||
# 首席架構師 Review I4 + 2026-04-05 Claude Code:
|
||||
# playwright.config.ts import @playwright/test — 必須先安裝 pnpm node_modules
|
||||
corepack enable 2>/dev/null || npm install -g pnpm@9 -q
|
||||
pnpm install --frozen-lockfile 2>&1 | tail -5
|
||||
|
||||
cd apps/web
|
||||
# 安裝 Playwright Chromium(CI 環境,含系統依賴)
|
||||
npx playwright install chromium --with-deps
|
||||
# 跑 smoke test,line reporter 方便 CI 日誌閱讀
|
||||
npx playwright test tests/e2e/smoke.spec.ts --reporter=line
|
||||
echo "smoke_status=pass" >> $GITHUB_OUTPUT
|
||||
npx playwright install chromium --with-deps 2>&1 | tail -5
|
||||
# 對已部署的生產環境跑 smoke test
|
||||
npx playwright test tests/e2e/smoke.spec.ts --reporter=line \
|
||||
&& echo "smoke_status=pass" >> $GITHUB_OUTPUT \
|
||||
|| echo "smoke_status=fail" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
# Playwright 在 CI 環境使用已建置的 pnpm node_modules
|
||||
CI: "true"
|
||||
# 直接測試已部署的生產環境,不啟動本地 dev server
|
||||
PLAYWRIGHT_BASE_URL: "https://awoooi.wooo.work"
|
||||
|
||||
- name: Notify Health Check Success
|
||||
env:
|
||||
@@ -368,44 +381,3 @@ jobs:
|
||||
-d "chat_id=${{ secrets.TELEGRAM_CHAT_ID }}" \
|
||||
-d "parse_mode=HTML" \
|
||||
--data-urlencode "text@-"
|
||||
|
||||
# =============================================================================
|
||||
# Deploy Prometheus Alert Rules (獨立 job,不依賴 build-and-deploy)
|
||||
# 2026-04-05 Claude Code: Sprint 1 — 告警規則 CD 自動部署
|
||||
# 觸發條件: ops/monitoring/alerts-unified.yml 有變更 或 workflow_dispatch
|
||||
# =============================================================================
|
||||
deploy-alerts:
|
||||
name: "Deploy Prometheus Alert Rules"
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
if: |
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
contains(toJSON(github.event.commits), 'ops/monitoring/alerts-unified.yml')
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Validate alerts YAML
|
||||
run: python3 -c "import yaml; yaml.safe_load(open('ops/monitoring/alerts-unified.yml')); print('YAML OK')"
|
||||
|
||||
- name: Setup SSH key
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "${{ secrets.DEPLOY_SSH_KEY }}" > ~/.ssh/id_ed25519
|
||||
chmod 600 ~/.ssh/id_ed25519
|
||||
ssh-keyscan 192.168.0.110 >> ~/.ssh/known_hosts
|
||||
|
||||
- name: Deploy alerts to Prometheus
|
||||
run: bash scripts/ops/deploy-alerts.sh
|
||||
|
||||
- name: Notify deploy result
|
||||
if: always()
|
||||
run: |
|
||||
STATUS="${{ job.status }}"
|
||||
EMOJI="✅"
|
||||
[ "$STATUS" != "success" ] && EMOJI="❌"
|
||||
SHORT_SHA="${{ github.sha }}"
|
||||
SHORT_SHA="${SHORT_SHA:0:7}"
|
||||
MSG="${EMOJI} Prometheus 告警規則部署 ${STATUS} (${SHORT_SHA})"
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ secrets.TELEGRAM_CHAT_ID }}" \
|
||||
--data-urlencode "text=${MSG}" || true
|
||||
|
||||
49
.gitea/workflows/deploy-alerts.yaml
Normal file
49
.gitea/workflows/deploy-alerts.yaml
Normal file
@@ -0,0 +1,49 @@
|
||||
# =============================================================================
|
||||
# Deploy Prometheus Alert Rules (獨立 workflow)
|
||||
# 2026-04-05 Claude Code (ADR-039 I3): 從 cd.yaml 分離
|
||||
# 觸發條件: ops/monitoring/alerts-unified.yml 有變更 或 workflow_dispatch
|
||||
# 說明: 告警規則部署不依賴應用構建,獨立觸發以加快響應速度
|
||||
# =============================================================================
|
||||
|
||||
name: Deploy Alert Rules
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'ops/monitoring/alerts-unified.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
deploy-alerts:
|
||||
name: "Deploy Prometheus Alert Rules"
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Validate alerts YAML
|
||||
run: python3 -c "import yaml; yaml.safe_load(open('ops/monitoring/alerts-unified.yml')); print('YAML OK')"
|
||||
|
||||
- name: Setup SSH key
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "${{ secrets.DEPLOY_SSH_KEY }}" > ~/.ssh/id_ed25519
|
||||
chmod 600 ~/.ssh/id_ed25519
|
||||
ssh-keyscan 192.168.0.110 >> ~/.ssh/known_hosts
|
||||
|
||||
- name: Deploy alerts to Prometheus
|
||||
run: bash scripts/ops/deploy-alerts.sh
|
||||
|
||||
- name: Notify deploy result
|
||||
if: always()
|
||||
run: |
|
||||
STATUS="${{ job.status }}"
|
||||
EMOJI="✅"
|
||||
[ "$STATUS" != "success" ] && EMOJI="❌"
|
||||
SHORT_SHA="${{ github.sha }}"
|
||||
SHORT_SHA="${SHORT_SHA:0:7}"
|
||||
MSG="${EMOJI} Prometheus 告警規則部署 ${STATUS} (${SHORT_SHA})"
|
||||
curl -fS -X POST "https://api.telegram.org/bot${{ secrets.TELEGRAM_BOT_TOKEN }}/sendMessage" \
|
||||
-d "chat_id=${{ secrets.TELEGRAM_CHAT_ID }}" \
|
||||
--data-urlencode "text=${MSG}" || true
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
- name: Generate Types (Temp)
|
||||
run: |
|
||||
cd apps/api
|
||||
python ../../scripts/generate-schemas.py
|
||||
python3 ../../scripts/generate-schemas.py
|
||||
cd ../../packages/shared-types
|
||||
pnpm generate:types
|
||||
|
||||
|
||||
@@ -6,6 +6,11 @@
|
||||
#
|
||||
# 注意: 必須從 monorepo 根目錄執行,否則無法存取 packages/
|
||||
|
||||
# syntax=docker/dockerfile:1
|
||||
# 首席架構師 Review C1 (2026-04-05 Claude Code): BuildKit inline cache 需要 syntax 宣告
|
||||
# BUILDKIT_INLINE_CACHE=1 才能真正把 cache metadata 寫入 image
|
||||
ARG BUILDKIT_INLINE_CACHE=0
|
||||
|
||||
FROM python:3.11-slim AS builder
|
||||
|
||||
WORKDIR /app
|
||||
@@ -14,22 +19,26 @@ WORKDIR /app
|
||||
COPY --from=ghcr.io/astral-sh/uv:0.6.9 /uv /bin/uv
|
||||
|
||||
# Phase 6.4i: 複製本地 packages 到 Docker context
|
||||
# 順序重要: 先複製 packages,再複製 api (利用 Docker layer cache)
|
||||
COPY packages/lewooogo-data/ /packages/lewooogo-data/
|
||||
COPY packages/lewooogo-brain/ /packages/lewooogo-brain/
|
||||
|
||||
# 複製 API 依賴文件 (pyproject.toml 需要 README.md)
|
||||
# 複製 API 依賴文件(只複製 metadata,不含 src/)
|
||||
COPY apps/api/pyproject.toml apps/api/README.md ./
|
||||
|
||||
# 複製 src 目錄 (hatchling build 需要)
|
||||
COPY apps/api/src/ ./src/
|
||||
|
||||
# 安裝本地 packages 與 API 依賴 (合併 RUN 減少 layer)
|
||||
# 注意: `uv pip install .` 從 pyproject.toml 安裝依賴
|
||||
RUN uv pip install --system --no-cache /packages/lewooogo-data && \
|
||||
# 首席架構師 Review C3 (2026-04-05 Claude Code):
|
||||
# 原始問題:COPY src/ 在 pip install 之前,src 任何變更都讓 deps layer 失效
|
||||
# 修復:先安裝 local packages,再用 --no-build-isolation 只安裝 pyproject 的依賴項
|
||||
# (不 build wheel,不需要 src/),src/ 在之後才 COPY
|
||||
# 注意:--no-sources 不被 uv 支援,改用建立 stub src 讓 hatchling 可以解析
|
||||
RUN mkdir -p src/awoooi_api && \
|
||||
touch src/awoooi_api/__init__.py && \
|
||||
uv pip install --system --no-cache /packages/lewooogo-data && \
|
||||
uv pip install --system --no-cache /packages/lewooogo-brain && \
|
||||
uv pip install --system --no-cache .
|
||||
|
||||
# deps 安裝完後才複製真正的 src(使 deps layer 可 cache)
|
||||
COPY apps/api/src/ ./src/
|
||||
|
||||
# Production stage
|
||||
FROM python:3.11-slim
|
||||
|
||||
@@ -52,9 +61,10 @@ USER appuser
|
||||
# Expose port
|
||||
EXPOSE 8000
|
||||
|
||||
# Health check (使用正確的 API 路徑)
|
||||
# 首席架構師 Review S3 (2026-04-05 Claude Code):
|
||||
# httpx 可能只在 dev deps,生產 image 不保證有。改用 curl(python:3.11-slim 內建)
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD python -c "import httpx; httpx.get('http://localhost:8000/api/v1/health', timeout=5)" || exit 1
|
||||
CMD curl -sf http://localhost:8000/api/v1/health || exit 1
|
||||
|
||||
# Run application
|
||||
CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
# AWOOOI Web - Production Dockerfile
|
||||
# syntax=docker/dockerfile:1
|
||||
# 首席架構師 Review C1 (2026-04-05 Claude Code): BuildKit inline cache 需要 ARG 宣告
|
||||
ARG BUILDKIT_INLINE_CACHE=0
|
||||
|
||||
FROM node:20-alpine AS base
|
||||
|
||||
|
||||
Reference in New Issue
Block a user