From 2adbf1e6cd874a0b84df0f071ee3e1327ac8dccb Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 6 May 2026 15:04:38 +0800 Subject: [PATCH] fix(cd): timeout 188 ops sync --- .gitea/workflows/cd.yaml | 17 ++++++++++++++--- docs/LOGBOOK.md | 12 ++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/.gitea/workflows/cd.yaml b/.gitea/workflows/cd.yaml index 564b9299..58e55ecd 100644 --- a/.gitea/workflows/cd.yaml +++ b/.gitea/workflows/cd.yaml @@ -811,23 +811,34 @@ jobs: echo "$SSH_KEY_188" > "${HOME}/.ssh/deploy_key_188" chmod 600 "${HOME}/.ssh/deploy_key_188" ssh-keyscan 192.168.0.188 >> ~/.ssh/known_hosts 2>/dev/null + SSH_188_OPTS=( + -i "${HOME}/.ssh/deploy_key_188" + -o BatchMode=yes + -o ConnectTimeout=10 + -o ServerAliveInterval=10 + -o ServerAliveCountMax=3 + ) + + timeout 30s ssh "${SSH_188_OPTS[@]}" ollama@192.168.0.188 \ + "mkdir -p ~/awoooi-ops" \ + || echo "⚠️ 188 ops 目錄確認失敗" # 同步 docker-health-monitor.sh - scp -i "${HOME}/.ssh/deploy_key_188" \ + timeout 60s scp "${SSH_188_OPTS[@]}" \ scripts/ops/docker-health-monitor.sh \ ollama@192.168.0.188:~/awoooi-ops/docker-health-monitor.sh \ && echo "✅ docker-health-monitor.sh 已同步" \ || echo "⚠️ docker-health-monitor.sh 同步失敗" # 同步 pg-backup.sh - scp -i "${HOME}/.ssh/deploy_key_188" \ + timeout 60s scp "${SSH_188_OPTS[@]}" \ scripts/ops/pg-backup.sh \ ollama@192.168.0.188:~/awoooi-ops/pg-backup.sh \ && echo "✅ pg-backup.sh 已同步" \ || echo "⚠️ pg-backup.sh 同步失敗" # 確保執行權限 - ssh -i "${HOME}/.ssh/deploy_key_188" ollama@192.168.0.188 \ + timeout 30s ssh "${SSH_188_OPTS[@]}" ollama@192.168.0.188 \ "chmod +x ~/awoooi-ops/docker-health-monitor.sh ~/awoooi-ops/pg-backup.sh && echo '✅ 權限設定完成'" \ || echo "⚠️ 權限設定失敗" diff --git a/docs/LOGBOOK.md b/docs/LOGBOOK.md index 053dd386..b6a2060b 100644 --- a/docs/LOGBOOK.md +++ b/docs/LOGBOOK.md @@ -1,3 +1,15 @@ +## 2026-05-06 | Gitea CD 188 ops sync 加上 timeout 防卡死 + +**背景**:`d441f706` 的主 CD 已完成 tests 與 deploy marker,但 runner 卡在 `Sync Ops Scripts to 188` 的裸 `scp`;188 剛經歷重開後,沒有 timeout 的 sftp 子程序會阻塞 `post-deploy-checks`。 + +**本次修補**: +- `.gitea/workflows/cd.yaml` 的 188 ops sync 步驟新增 `BatchMode=yes`、`ConnectTimeout=10`、`ServerAliveInterval=10`、`ServerAliveCountMax=3`。 +- `scp` 包 `timeout 60s`,`ssh mkdir/chmod` 包 `timeout 30s`;同步失敗仍只警告,不阻塞主部署。 + +**驗證**: +- `python` YAML parse `.gitea/workflows/cd.yaml` OK。 +- 既有 live 卡住的 runner 子程序需清掉,讓下一輪 CD 用新 workflow 收斂。 + ## 2026-05-06 | 188 legacy Ollama 退場 Gate 與 dev 路由修正 **背景**:Telegram 告警已不再應出現 `Router:OLLAMA_188`;統帥要求 188 Ollama 移除,正式順序維持 GCP-A → GCP-B → 111 → Gemini 備援。