fix(k8s+ops): 修復告警鏈路 + Gitea runner 自動啟動
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 21s
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 21s
## 修復項目 1. NetworkPolicy allow-nginx-ingress 加入 192.168.0.110 - Alertmanager (在 110) 需要從 110 直接 POST webhook 到 API pod - 修復前: 110 被 NetworkPolicy default-deny 阻擋,webhook timeout - 修復後: 110 加入 ingress 白名單,告警鏈路恢復 2. awoooi-startup-110.sh 加入 Gitea Act Runner - Step 6: 啟動 /home/wooo/act-runner (gitea-runner container) - 修復前: 重開機後 runner 離線,CD pipeline 全面失效 - 修復後: runner 自動重啟,若配置過期自動清除重新註冊 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -43,11 +43,14 @@ spec:
|
||||
- protocol: TCP
|
||||
port: 8000
|
||||
# 允許 K3s NodePort 流量 (SNAT 後源 IP 變為節點 IP)
|
||||
# 2026-04-05 Claude Code: 加入 110 — Alertmanager 需要從 110 送 webhook 到 API
|
||||
- from:
|
||||
- ipBlock:
|
||||
cidr: 192.168.0.120/32 # K3s Master (mon)
|
||||
- ipBlock:
|
||||
cidr: 192.168.0.121/32 # K3s Worker (mon1)
|
||||
- ipBlock:
|
||||
cidr: 192.168.0.110/32 # DevOps 主機 (Alertmanager 來源)
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 3000
|
||||
|
||||
@@ -100,7 +100,7 @@ fi
|
||||
# ──────────────────────────────────────────────
|
||||
# STEP 4: 其他服務(Gitea, Langfuse, Monitoring)
|
||||
# ──────────────────────────────────────────────
|
||||
log "[4/5] 啟動其他服務..."
|
||||
log "[4/6] 啟動其他服務(Gitea, Langfuse, Monitoring)..."
|
||||
|
||||
GITEA_DIR="/home/wooo/gitea"
|
||||
if [ -f "$GITEA_DIR/docker-compose.yml" ]; then
|
||||
@@ -120,13 +120,22 @@ MONITORING_DIR="/home/wooo/monitoring"
|
||||
if [ -f "$MONITORING_DIR/docker-compose.yml" ]; then
|
||||
cd "$MONITORING_DIR"
|
||||
docker compose up -d 2>&1 | tail -3
|
||||
log "✅ Monitoring 啟動指令已發送"
|
||||
log "✅ Monitoring (Prometheus/Grafana/Alertmanager) 啟動指令已發送"
|
||||
sleep 10
|
||||
# 驗證 Alertmanager 是否啟動
|
||||
if curl -sf --max-time 5 http://localhost:9093/-/healthy >/dev/null 2>&1; then
|
||||
log "✅ Alertmanager healthy"
|
||||
else
|
||||
log "⚠️ Alertmanager 尚未就緒,等待 20 秒..."
|
||||
sleep 20
|
||||
curl -sf --max-time 5 http://localhost:9093/-/healthy >/dev/null 2>&1 && log "✅ Alertmanager 就緒" || log "❌ Alertmanager 未就緒,需手動檢查"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# STEP 5: SignOz
|
||||
# ──────────────────────────────────────────────
|
||||
log "[5/5] 啟動 SignOz..."
|
||||
log "[5/6] 啟動 SignOz..."
|
||||
SIGNOZ_DIR="/home/wooo/signoz/deploy/docker"
|
||||
if [ -f "$SIGNOZ_DIR/docker-compose.yaml" ]; then
|
||||
cd "$SIGNOZ_DIR"
|
||||
@@ -134,12 +143,46 @@ if [ -f "$SIGNOZ_DIR/docker-compose.yaml" ]; then
|
||||
log "✅ SignOz 啟動指令已發送"
|
||||
fi
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# STEP 6: Gitea Act Runner(CI/CD 核心)
|
||||
# 2026-04-05 Claude Code: 加入 — 解決重開機後 Gitea runner 離線、CD 失效
|
||||
# 重要:必須在 Gitea server 啟動後才能啟動 runner
|
||||
# ──────────────────────────────────────────────
|
||||
log "[6/6] 啟動 Gitea Act Runner..."
|
||||
RUNNER_DIR="/home/wooo/act-runner"
|
||||
if [ -f "$RUNNER_DIR/docker-compose.yml" ]; then
|
||||
# 若舊的 .runner 配置指向過期 hostname,先清除讓 runner 重新註冊
|
||||
RUNNER_FILE="$RUNNER_DIR/data/.runner"
|
||||
if [ -f "$RUNNER_FILE" ]; then
|
||||
OLD_URL=$(python3 -c "import json; d=json.load(open('$RUNNER_FILE')); print(d.get('address',''))" 2>/dev/null || echo "")
|
||||
if [ "$OLD_URL" != "http://192.168.0.110:3001" ]; then
|
||||
log "⚠️ runner 配置過期 ($OLD_URL),清除重新註冊..."
|
||||
rm -f "$RUNNER_FILE" || true
|
||||
fi
|
||||
fi
|
||||
|
||||
cd "$RUNNER_DIR"
|
||||
docker compose up -d 2>&1 | tail -3
|
||||
sleep 15
|
||||
|
||||
# 驗證 runner 已連線 Gitea
|
||||
if docker logs gitea-runner --tail 5 2>/dev/null | grep -q "SUCCESS\|Connected\|Listening"; then
|
||||
log "✅ Gitea Act Runner 已連線"
|
||||
else
|
||||
log "⚠️ Gitea Act Runner 可能尚未連線,查看: docker logs gitea-runner"
|
||||
fi
|
||||
else
|
||||
log "⚠️ 找不到 act-runner compose 檔案: $RUNNER_DIR/docker-compose.yml"
|
||||
fi
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# 完成
|
||||
# ──────────────────────────────────────────────
|
||||
log "=== 192.168.0.110 啟動序列完成 ==="
|
||||
log "Harbor: http://192.168.0.110:5000"
|
||||
log "Gitea: http://192.168.0.110:3001"
|
||||
log "Grafana: http://192.168.0.110:3002"
|
||||
log "Harbor: http://192.168.0.110:5000"
|
||||
log "Gitea: http://192.168.0.110:3001"
|
||||
log "Grafana: http://192.168.0.110:3002"
|
||||
log "Alertmanager: http://192.168.0.110:9093"
|
||||
log "Gitea Runner: docker logs gitea-runner"
|
||||
|
||||
exit 0
|
||||
|
||||
Reference in New Issue
Block a user