fix(api): serialize startup bootstrap ddl
This commit is contained in:
@@ -157,6 +157,9 @@ async def get_db_context(project_id: str | None = None) -> AsyncGenerator[AsyncS
|
||||
# Initialization
|
||||
# =============================================================================
|
||||
|
||||
_DB_BOOTSTRAP_LOCK_NAME = "awoooi:init_db:ddl"
|
||||
|
||||
|
||||
async def init_db() -> None:
|
||||
"""
|
||||
Initialize database tables
|
||||
@@ -165,6 +168,28 @@ async def init_db() -> None:
|
||||
"""
|
||||
engine = get_engine()
|
||||
|
||||
async with engine.connect() as lock_conn:
|
||||
# 2026-05-24 ogt + Codex: 兩個 API replica 同時啟動時,PostgreSQL 會在
|
||||
# ALTER TABLE ... IF NOT EXISTS 上互相等待並 deadlock。整段 bootstrap
|
||||
# DDL 必須序列化,避免 rollout 因一個 pod CrashLoop 變成 1/2 ready。
|
||||
await lock_conn.execute(
|
||||
text("SELECT pg_advisory_lock(hashtext(:lock_name))"),
|
||||
{"lock_name": _DB_BOOTSTRAP_LOCK_NAME},
|
||||
)
|
||||
try:
|
||||
await _run_init_db_ddl(engine)
|
||||
finally:
|
||||
await lock_conn.execute(
|
||||
text("SELECT pg_advisory_unlock(hashtext(:lock_name))"),
|
||||
{"lock_name": _DB_BOOTSTRAP_LOCK_NAME},
|
||||
)
|
||||
|
||||
|
||||
async def _run_init_db_ddl(engine: AsyncEngine) -> None:
|
||||
"""
|
||||
Run idempotent DB bootstrap DDL while caller holds the bootstrap advisory lock.
|
||||
"""
|
||||
|
||||
# 2026-04-15 ogt: 多 replica 並行啟動競爭修復
|
||||
# 問題:單一大 transaction 裡兩個 pod 同時建 table → 其中一個 CREATE INDEX 失敗
|
||||
# PostgreSQL 中 transaction 內任何錯誤導致整個 transaction ROLLBACK
|
||||
|
||||
Reference in New Issue
Block a user