fix(api): serialize startup bootstrap ddl
All checks were successful
CD Pipeline / tests (push) Successful in 5m29s
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / build-and-deploy (push) Successful in 4m9s
CD Pipeline / post-deploy-checks (push) Successful in 1m57s

This commit is contained in:
Your Name
2026-05-24 17:10:26 +08:00
parent 5b8f14e32e
commit 9b01f1fa46
4 changed files with 155 additions and 8 deletions

View File

@@ -157,6 +157,9 @@ async def get_db_context(project_id: str | None = None) -> AsyncGenerator[AsyncS
# Initialization
# =============================================================================
_DB_BOOTSTRAP_LOCK_NAME = "awoooi:init_db:ddl"
async def init_db() -> None:
"""
Initialize database tables
@@ -165,6 +168,28 @@ async def init_db() -> None:
"""
engine = get_engine()
async with engine.connect() as lock_conn:
# 2026-05-24 ogt + Codex: 兩個 API replica 同時啟動時PostgreSQL 會在
# ALTER TABLE ... IF NOT EXISTS 上互相等待並 deadlock。整段 bootstrap
# DDL 必須序列化,避免 rollout 因一個 pod CrashLoop 變成 1/2 ready。
await lock_conn.execute(
text("SELECT pg_advisory_lock(hashtext(:lock_name))"),
{"lock_name": _DB_BOOTSTRAP_LOCK_NAME},
)
try:
await _run_init_db_ddl(engine)
finally:
await lock_conn.execute(
text("SELECT pg_advisory_unlock(hashtext(:lock_name))"),
{"lock_name": _DB_BOOTSTRAP_LOCK_NAME},
)
async def _run_init_db_ddl(engine: AsyncEngine) -> None:
"""
Run idempotent DB bootstrap DDL while caller holds the bootstrap advisory lock.
"""
# 2026-04-15 ogt: 多 replica 並行啟動競爭修復
# 問題:單一大 transaction 裡兩個 pod 同時建 table → 其中一個 CREATE INDEX 失敗
# PostgreSQL 中 transaction 內任何錯誤導致整個 transaction ROLLBACK