fix(ci): stop exposing deploy ssh key env
Some checks failed
CD Pipeline / workflow-shape (push) Successful in 0s
CD Pipeline / cancel-stale-cd (push) Has been skipped
CD Pipeline / tests (push) Successful in 38s
CD Pipeline / build-and-deploy (push) Failing after 13m4s
CD Pipeline / post-deploy-checks (push) Has been skipped

This commit is contained in:
Your Name
2026-07-01 15:41:54 +08:00
parent 55b60f9e99
commit 06819ea96c
5 changed files with 58 additions and 23 deletions

View File

@@ -112,7 +112,6 @@ jobs:
# 注入 Dev K8s Secrets
- name: Inject Dev K8s Secrets
env:
DEPLOY_SSH_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
SRE_GROUP_CHAT_ID: ${{ secrets.SRE_GROUP_CHAT_ID }}
@@ -129,10 +128,17 @@ jobs:
print(base64.b64encode(data).decode(), end="")
PY
}
write_deploy_key() {
prepare_deploy_key() {
mkdir -p ~/.ssh
umask 077
printf '%s\n' "${DEPLOY_SSH_KEY}" > ~/.ssh/deploy_key
local source_key="${AWOOOI_DEPLOY_SSH_KEY_PATH:-${HOME}/.ssh/deploy_key}"
if [ ! -r "${source_key}" ]; then
echo "❌ deploy ssh key file missing: ${source_key}" >&2
exit 1
fi
if [ "${source_key}" != "${HOME}/.ssh/deploy_key" ]; then
cp "${source_key}" "${HOME}/.ssh/deploy_key"
fi
chmod 600 ~/.ssh/deploy_key
}
TG_BOT_TOKEN_B64="$(secret_b64_env TELEGRAM_BOT_TOKEN)"
@@ -141,7 +147,7 @@ jobs:
GEMINI_API_KEY_B64="$(secret_b64_env GEMINI_API_KEY)"
mkdir -p ~/.ssh
write_deploy_key
prepare_deploy_key
# Keep deploy-time host keys separate from the runner user's global
# known_hosts, which is also used by reboot/cold-start checks.
DEPLOY_KNOWN_HOSTS="${HOME}/.ssh/deploy_known_hosts"

View File

@@ -1331,7 +1331,6 @@ jobs:
AWOOOP_OPERATOR_API_KEY: ${{ secrets.AWOOOP_OPERATOR_API_KEY }}
CLAUDE_API_KEY: ${{ secrets.CLAUDE_API_KEY }}
DATABASE_URL: ${{ secrets.DATABASE_URL }}
DEPLOY_SSH_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
JWT_ALGORITHM: ${{ secrets.JWT_ALGORITHM }}
JWT_SECRET: ${{ secrets.JWT_SECRET }}
@@ -1378,10 +1377,17 @@ jobs:
printf '%s' "${secret_value}" | base64 | tr -d '\n'
fi
}
write_deploy_key() {
prepare_deploy_key() {
mkdir -p "${HOME}/.ssh"
umask 077
printf '%s\n' "${DEPLOY_SSH_KEY}" > "${HOME}/.ssh/deploy_key"
local source_key="${AWOOOI_DEPLOY_SSH_KEY_PATH:-${HOME}/.ssh/deploy_key}"
if [ ! -r "${source_key}" ]; then
echo "❌ deploy ssh key file missing: ${source_key}" >&2
exit 1
fi
if [ "${source_key}" != "${HOME}/.ssh/deploy_key" ]; then
cp "${source_key}" "${HOME}/.ssh/deploy_key"
fi
chmod 600 "${HOME}/.ssh/deploy_key"
}
@@ -1411,7 +1417,7 @@ jobs:
SRE_GROUP_CHAT_ID_B64="$(secret_b64_env SRE_GROUP_CHAT_ID)"
# S1/S2: 統一命名 deploy_key改用 ssh-keyscan 與強制 host key 驗證。
write_deploy_key
prepare_deploy_key
# 2026-05-13 Codex: keyscan must include ED25519 explicitly. Some
# OpenSSH builds otherwise record only RSA/ECDSA, then strict deploy
# SSH fails with "No ED25519 host key is known" after image push.
@@ -1656,17 +1662,23 @@ jobs:
- name: Deploy to K8s (ArgoCD GitOps)
env:
CD_PUSH_TOKEN: ${{ secrets.CD_PUSH_TOKEN }}
DEPLOY_SSH_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
run: |
write_deploy_key() {
prepare_deploy_key() {
mkdir -p "${HOME}/.ssh"
umask 077
printf '%s\n' "${DEPLOY_SSH_KEY}" > "${HOME}/.ssh/deploy_key"
local source_key="${AWOOOI_DEPLOY_SSH_KEY_PATH:-${HOME}/.ssh/deploy_key}"
if [ ! -r "${source_key}" ]; then
echo "❌ deploy ssh key file missing: ${source_key}" >&2
exit 1
fi
if [ "${source_key}" != "${HOME}/.ssh/deploy_key" ]; then
cp "${source_key}" "${HOME}/.ssh/deploy_key"
fi
chmod 600 "${HOME}/.ssh/deploy_key"
}
mkdir -p ~/.ssh
write_deploy_key
prepare_deploy_key
# 2026-05-13 Codex: mirror Inject K8s Secrets host-key handling so the
# deploy job never reaches SSH with a known_hosts file missing ED25519.
# 2026-06-13 Codex: use the deploy-only known_hosts file so this
@@ -2195,13 +2207,18 @@ jobs:
# evidence and notification signal, but no longer blocks CD completion.
- name: Alert Chain Smoke Test
id: alert_chain_smoke
env:
DEPLOY_SSH_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
run: |
write_deploy_key() {
prepare_deploy_key() {
mkdir -p "${HOME}/.ssh"
umask 077
printf '%s\n' "${DEPLOY_SSH_KEY}" > "${HOME}/.ssh/deploy_key"
local source_key="${AWOOOI_DEPLOY_SSH_KEY_PATH:-${HOME}/.ssh/deploy_key}"
if [ ! -r "${source_key}" ]; then
echo "❌ deploy ssh key file missing: ${source_key}" >&2
exit 1
fi
if [ "${source_key}" != "${HOME}/.ssh/deploy_key" ]; then
cp "${source_key}" "${HOME}/.ssh/deploy_key"
fi
chmod 600 "${HOME}/.ssh/deploy_key"
}
collect_observability_statuses() {
@@ -2230,7 +2247,7 @@ jobs:
OTEL_COLLECTOR_STATUSES=""
EVENT_EXPORTER_STATUSES=""
write_deploy_key
prepare_deploy_key
DEPLOY_KNOWN_HOSTS="${HOME}/.ssh/deploy_known_hosts"
if ssh-keyscan -T 5 -t ed25519,rsa,ecdsa "${K8S_SSH_HOST}" > "${DEPLOY_KNOWN_HOSTS}" 2>/dev/null && test -s "${DEPLOY_KNOWN_HOSTS}"; then
SSH_OPTS="-i ${HOME}/.ssh/deploy_key -o BatchMode=yes -o StrictHostKeyChecking=yes -o UserKnownHostsFile=${DEPLOY_KNOWN_HOSTS} -o ConnectTimeout=10"

View File

@@ -31,12 +31,15 @@ jobs:
python3 -c "import yaml; yaml.safe_load(open('ops/monitoring/slo-rules.yml')); print('SLO YAML OK')"
- name: Setup SSH key
env:
DEPLOY_SSH_KEY: ${{ secrets.DEPLOY_SSH_KEY }}
run: |
mkdir -p ~/.ssh
umask 077
printf '%s\n' "${DEPLOY_SSH_KEY}" > ~/.ssh/id_ed25519
source_key="${AWOOOI_DEPLOY_SSH_KEY_PATH:-${HOME}/.ssh/deploy_key}"
if [ ! -r "${source_key}" ]; then
echo "deploy ssh key file missing: ${source_key}" >&2
exit 1
fi
cp "${source_key}" ~/.ssh/id_ed25519
chmod 600 ~/.ssh/id_ed25519
ssh-keyscan 192.168.0.110 >> ~/.ssh/known_hosts

View File

@@ -7,10 +7,11 @@
- Gitea API / internal API 均讀回 `{"version":"1.25.5"}`9 個 expected private repos 均可透過 Gitea SSH 讀回 heads`awoooi``ewoooc``2026FIFAWorldCup``agent-bounty-protocol``AwoooGo``stockplatform-v2``vibework``momo-pro-system``tsenyang-website`
- 188 backup exporter 讀回 `awoooi_gitea_bundle_expected_repo_missing_count=0``failed_repo_count=0``checksum_missing_count=0``all_expected_ok=1`Gitea private bundle backup 沒有再只靠 public repo search 判斷。
- 推上 Gitea main 後 CD `#4256` 在 API redaction 單測失敗;已補 `agent-autonomous-runtime-control` public value redaction避免 `secret_value` 類 public-forbidden term 出現在對外 runtime-control payload。
- Gitea job log readback 發現 multi-line deploy SSH key 不應透過 step `env` 傳遞;已移除 `cd.yaml``cd-dev.yaml``deploy-alerts.yaml``DEPLOY_SSH_KEY` raw env改用 runner 上既有 deploy key 檔案路徑,並升級 `check-gitea-step-env-secrets.js``DEPLOY_SSH_KEY` step env fail-closed。
**仍維持 / 未完成**
- `registry.wooo.work/v2/``harbor.wooo.work/api/v2.0/health` 仍回 502110 `5000/5001` 仍 closed這是 Harbor/registry cold-start / auto-recovery 缺口,不能宣稱全 110 服務完成。
- 110 SSH 在 post-boot 高負載窗口仍會 timeout不得因此重開 legacy / generic runnerrunner 仍放最後。
- `registry.wooo.work/v2/``harbor.wooo.work/api/v2.0/health` 曾在 post-boot 期間持續 50215:30 後已讀回 Harbor health `200`,但仍需把 Harbor cold-start / 110 SSH timeout 納入後續 SLO scorecard不得只用 Gitea 200 宣稱全 110 服務完成。
- 110 SSH 在 post-boot 高負載窗口 timeout不得因此重開 legacy / generic runnerrunner 仍放最後。
- 未讀 secret / token / `.env` / raw sessions / SQLite / auth未使用 GitHub / `gh` / GitHub API未刪 repo、未 restore、未 prune、未 DB write。
**下一步**

View File

@@ -15,6 +15,7 @@ const workflowDir = path.join(root, ".gitea", "workflows");
const violations = [];
const routeViolations = [];
const secretExprPattern = /\$\{\{\s*secrets\./;
const forbiddenStepEnvSecrets = new Set(["DEPLOY_SSH_KEY"]);
for (const fileName of fs.readdirSync(workflowDir).sort()) {
if (!fileName.endsWith(".yml") && !fileName.endsWith(".yaml")) {
@@ -70,11 +71,18 @@ for (const fileName of fs.readdirSync(workflowDir).sort()) {
if (block && block.section !== "env" && secretExprPattern.test(line)) {
violations.push(`${filePath}:${index + 1}:${block.section}`);
}
if (block && block.section === "env") {
const envKey = trimmed.split(":", 1)[0];
if (forbiddenStepEnvSecrets.has(envKey)) {
violations.push(`${filePath}:${index + 1}:env:${envKey}`);
}
}
});
}
if (violations.length > 0) {
console.error("Gitea workflow exposes secrets through run/with text:");
console.error("Gitea workflow exposes secrets through unsafe run/with/env transport:");
for (const violation of violations) {
console.error(` - ${violation}`);
}