fix(k8s+ops): 修復告警鏈路 + Gitea runner 自動啟動
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 21s

## 修復項目

1. NetworkPolicy allow-nginx-ingress 加入 192.168.0.110
   - Alertmanager (在 110) 需要從 110 直接 POST webhook 到 API pod
   - 修復前: 110 被 NetworkPolicy default-deny 阻擋,webhook timeout
   - 修復後: 110 加入 ingress 白名單,告警鏈路恢復

2. awoooi-startup-110.sh 加入 Gitea Act Runner
   - Step 6: 啟動 /home/wooo/act-runner (gitea-runner container)
   - 修復前: 重開機後 runner 離線,CD pipeline 全面失效
   - 修復後: runner 自動重啟,若配置過期自動清除重新註冊

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-04-05 01:42:52 +08:00
parent be3aa6069b
commit ad4abefcd9
2 changed files with 52 additions and 6 deletions

View File

@@ -43,11 +43,14 @@ spec:
- protocol: TCP
port: 8000
# 允許 K3s NodePort 流量 (SNAT 後源 IP 變為節點 IP)
# 2026-04-05 Claude Code: 加入 110 — Alertmanager 需要從 110 送 webhook 到 API
- from:
- ipBlock:
cidr: 192.168.0.120/32 # K3s Master (mon)
- ipBlock:
cidr: 192.168.0.121/32 # K3s Worker (mon1)
- ipBlock:
cidr: 192.168.0.110/32 # DevOps 主機 (Alertmanager 來源)
ports:
- protocol: TCP
port: 3000

View File

@@ -100,7 +100,7 @@ fi
# ──────────────────────────────────────────────
# STEP 4: 其他服務Gitea, Langfuse, Monitoring
# ──────────────────────────────────────────────
log "[4/5] 啟動其他服務..."
log "[4/6] 啟動其他服務Gitea, Langfuse, Monitoring..."
GITEA_DIR="/home/wooo/gitea"
if [ -f "$GITEA_DIR/docker-compose.yml" ]; then
@@ -120,13 +120,22 @@ MONITORING_DIR="/home/wooo/monitoring"
if [ -f "$MONITORING_DIR/docker-compose.yml" ]; then
cd "$MONITORING_DIR"
docker compose up -d 2>&1 | tail -3
log "✅ Monitoring 啟動指令已發送"
log "✅ Monitoring (Prometheus/Grafana/Alertmanager) 啟動指令已發送"
sleep 10
# 驗證 Alertmanager 是否啟動
if curl -sf --max-time 5 http://localhost:9093/-/healthy >/dev/null 2>&1; then
log "✅ Alertmanager healthy"
else
log "⚠️ Alertmanager 尚未就緒,等待 20 秒..."
sleep 20
curl -sf --max-time 5 http://localhost:9093/-/healthy >/dev/null 2>&1 && log "✅ Alertmanager 就緒" || log "❌ Alertmanager 未就緒,需手動檢查"
fi
fi
# ──────────────────────────────────────────────
# STEP 5: SignOz
# ──────────────────────────────────────────────
log "[5/5] 啟動 SignOz..."
log "[5/6] 啟動 SignOz..."
SIGNOZ_DIR="/home/wooo/signoz/deploy/docker"
if [ -f "$SIGNOZ_DIR/docker-compose.yaml" ]; then
cd "$SIGNOZ_DIR"
@@ -134,12 +143,46 @@ if [ -f "$SIGNOZ_DIR/docker-compose.yaml" ]; then
log "✅ SignOz 啟動指令已發送"
fi
# ──────────────────────────────────────────────
# STEP 6: Gitea Act RunnerCI/CD 核心)
# 2026-04-05 Claude Code: 加入 — 解決重開機後 Gitea runner 離線、CD 失效
# 重要:必須在 Gitea server 啟動後才能啟動 runner
# ──────────────────────────────────────────────
log "[6/6] 啟動 Gitea Act Runner..."
RUNNER_DIR="/home/wooo/act-runner"
if [ -f "$RUNNER_DIR/docker-compose.yml" ]; then
# 若舊的 .runner 配置指向過期 hostname先清除讓 runner 重新註冊
RUNNER_FILE="$RUNNER_DIR/data/.runner"
if [ -f "$RUNNER_FILE" ]; then
OLD_URL=$(python3 -c "import json; d=json.load(open('$RUNNER_FILE')); print(d.get('address',''))" 2>/dev/null || echo "")
if [ "$OLD_URL" != "http://192.168.0.110:3001" ]; then
log "⚠️ runner 配置過期 ($OLD_URL),清除重新註冊..."
rm -f "$RUNNER_FILE" || true
fi
fi
cd "$RUNNER_DIR"
docker compose up -d 2>&1 | tail -3
sleep 15
# 驗證 runner 已連線 Gitea
if docker logs gitea-runner --tail 5 2>/dev/null | grep -q "SUCCESS\|Connected\|Listening"; then
log "✅ Gitea Act Runner 已連線"
else
log "⚠️ Gitea Act Runner 可能尚未連線,查看: docker logs gitea-runner"
fi
else
log "⚠️ 找不到 act-runner compose 檔案: $RUNNER_DIR/docker-compose.yml"
fi
# ──────────────────────────────────────────────
# 完成
# ──────────────────────────────────────────────
log "=== 192.168.0.110 啟動序列完成 ==="
log "Harbor: http://192.168.0.110:5000"
log "Gitea: http://192.168.0.110:3001"
log "Grafana: http://192.168.0.110:3002"
log "Harbor: http://192.168.0.110:5000"
log "Gitea: http://192.168.0.110:3001"
log "Grafana: http://192.168.0.110:3002"
log "Alertmanager: http://192.168.0.110:9093"
log "Gitea Runner: docker logs gitea-runner"
exit 0