diff --git a/apps/api/src/db/base.py b/apps/api/src/db/base.py index ede5270f..0990bd93 100644 --- a/apps/api/src/db/base.py +++ b/apps/api/src/db/base.py @@ -143,21 +143,33 @@ async def init_db() -> None: Call this at application startup. """ engine = get_engine() + + # 2026-04-15 ogt: 多 replica 並行啟動競爭修復 + # 問題:單一大 transaction 裡兩個 pod 同時建 table → 其中一個 CREATE INDEX 失敗 + # PostgreSQL 中 transaction 內任何錯誤導致整個 transaction ROLLBACK + # → table + index 全消失 → 下次重啟同樣問題 → 無限 CrashLoop + # 修法:每個 table 獨立 transaction;先 DROP INDEX IF EXISTS 清殘留孤兒 index; + # 捕捉 "already exists" 讓並行 pod 優雅跳過 + async with engine.connect() as probe_conn: + existing = set(await probe_conn.run_sync( + lambda c: __import__('sqlalchemy', fromlist=['inspect']).inspect(c).get_table_names() + )) + + for table in Base.metadata.sorted_tables: + if table.name not in existing: + try: + async with engine.begin() as conn: + # 先清殘留孤兒 index(前次 CrashLoop 留下的部分狀態) + for index in table.indexes: + await conn.execute(text(f'DROP INDEX IF EXISTS "{index.name}"')) + await conn.run_sync(table.create) + except Exception as exc: + if "already exists" in str(exc).lower(): + pass # 並行 pod 已建好,忽略 + else: + raise + async with engine.begin() as conn: - # SQLAlchemy 2.0 問題:create_all(checkfirst=True) 跳過 CREATE TABLE, - # 但仍對 __table_args__ Index 物件發出獨立 CREATE INDEX → CrashLoopBackOff - # 修法:先 inspect 取得現有 tables,只對不存在的 table 呼叫 table.create() - # 這樣 index 只隨新 table 一起建立,永遠不會 duplicate - # 2026-04-15 Claude Sonnet 4.6(亞太)Phase 3 修復 - def _create_missing_tables(sync_conn): - from sqlalchemy import inspect as sa_inspect - existing = set(sa_inspect(sync_conn).get_table_names()) - for table in Base.metadata.sorted_tables: - if table.name not in existing: - table.create(sync_conn) - - await conn.run_sync(_create_missing_tables) - # 2026-04-02 Claude Code: 確保 risklevel enum 包含 'high' 值 # Phase 23 新增,避免舊 DB 缺少此值導致 InvalidTextRepresentation await conn.execute(