Files
awoooi/scripts/backfill_alertname.py
Your Name 1a2b04f5cf
Some checks failed
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m14s
CD Pipeline / build-and-deploy (push) Failing after 3m33s
CD Pipeline / post-deploy-checks (push) Has been skipped
fix(awooop): persist signal metadata and auto-repair prestate
2026-05-18 10:59:54 +08:00

112 lines
4.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
ADR-073 回填腳本:修補存量 incidents 三個 NULL 欄位
- alertname
- notification_type
- alert_category
根本原因save_to_episodic_memory() 建立 IncidentRecord 時漏掉這三個欄位。
2026-04-12 ogt (ADR-073 修補 Fix #1)
執行方式:
kubectl exec -n awoooi-prod <api-pod> -- python3 /app/scripts/backfill_alertname.py
"""
import asyncio
import sys
sys.path.insert(0, "/app")
from sqlalchemy import text
from src.db.base import get_db_context
from src.services.incident_service import classify_alert_early
async def main() -> None:
async with get_db_context() as db:
# --- Step 1: 統計目前 NULL 狀況 ---
null_r = await db.execute(text("""
SELECT
COUNT(*) FILTER (WHERE alertname IS NULL) AS alertname_null,
COUNT(*) FILTER (WHERE notification_type IS NULL) AS notification_type_null,
COUNT(*) FILTER (WHERE alert_category IS NULL) AS alert_category_null,
COUNT(*) AS total
FROM incidents
"""))
row = null_r.fetchone()
print(f"總計: {row.total}")
print(f" alertname NULL: {row.alertname_null}")
print(f" notification_type NULL: {row.notification_type_null}")
print(f" alert_category NULL: {row.alert_category_null}")
# --- Step 2: SQL 回填 alertname ---
# signals 是 JSONB arraySignal 模型序列化後的 key 是 alert_name
# labels dict 裡也有 alertname key
result = await db.execute(text("""
UPDATE incidents
SET alertname = COALESCE(
signals->0->'labels'->>'alertname',
signals->0->>'alert_name',
signals->0->>'alertname'
)
WHERE alertname IS NULL
AND signals IS NOT NULL
AND json_array_length(signals) > 0
"""))
await db.commit()
print(f"\n✅ alertname 回填: {result.rowcount}")
# --- Step 3: Python 回填 notification_type + alert_category ---
# 需要 Python 跑分類邏輯,從 DB 讀出 alertname + severity 批次更新
rows_r = await db.execute(text("""
SELECT incident_id, alertname, severity
FROM incidents
WHERE (notification_type IS NULL OR alert_category IS NULL)
AND alertname IS NOT NULL
"""))
rows = rows_r.fetchall()
print(f"\n待分類回填: {len(rows)}")
updated = 0
for row in rows:
alert_category, notification_type = classify_alert_early(
alertname=row.alertname or "",
severity=row.severity or "warning",
labels={},
)
await db.execute(text("""
UPDATE incidents
SET notification_type = :notification_type,
alert_category = :alert_category
WHERE incident_id = :incident_id
"""), {
"notification_type": notification_type,
"alert_category": alert_category,
"incident_id": row.incident_id,
})
updated += 1
await db.commit()
print(f"✅ notification_type + alert_category 回填: {updated}")
# --- Step 4: 最終統計 ---
final_r = await db.execute(text("""
SELECT
COUNT(*) FILTER (WHERE alertname IS NULL) AS alertname_null,
COUNT(*) FILTER (WHERE notification_type IS NULL) AS notification_type_null,
COUNT(*) FILTER (WHERE alert_category IS NULL) AS alert_category_null
FROM incidents
"""))
f = final_r.fetchone()
print("\n最終 NULL 統計:")
print(f" alertname NULL: {f.alertname_null}")
print(f" notification_type NULL: {f.notification_type_null}")
print(f" alert_category NULL: {f.alert_category_null}")
if f.alertname_null == 0 and f.notification_type_null == 0 and f.alert_category_null == 0:
print("\n✅ 三個欄位全部回填完成")
else:
print("\n⚠️ 部分記錄仍為 NULL (可能 signals 為空或格式異常)")
if __name__ == "__main__":
asyncio.run(main())