問題根因: 1. create_incident_for_approval 只存 Redis,不存 PostgreSQL → TTL 7天後消失,Playbook 萃取永遠找不到 Incident 2. ApprovalRecord 無 incident_id 欄位 → _trigger_playbook_extraction 靠 regex 掃中文文字找 INC-,永遠失敗 3. operation_parser namespace fallback 是 "default" → 所有 deployment 在 awoooi-prod,203 次執行全失敗 修復: - Incident 同時寫入 Redis + PostgreSQL (save_to_episodic_memory) - ApprovalRecord 加入 incident_id 欄位 (model + ORM + migration) - alertmanager_webhook 建立 Approval 後回寫 incident_id - _trigger_playbook_extraction 直接用 approval.incident_id - operation_parser DEFAULT_NAMESPACE = "awoooi-prod" Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
31 lines
1.1 KiB
SQL
31 lines
1.1 KiB
SQL
-- =============================================================================
|
|
-- Phase 26: Incident → KM 完整鏈路補全
|
|
-- 2026-04-06 ogt: 修復三重死鎖 — 告警必須寫入 DB 並建立 KM
|
|
-- =============================================================================
|
|
|
|
-- 1. approval_records 加入 incident_id 欄位
|
|
ALTER TABLE approval_records
|
|
ADD COLUMN IF NOT EXISTS incident_id TEXT;
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_approval_records_incident_id
|
|
ON approval_records (incident_id)
|
|
WHERE incident_id IS NOT NULL;
|
|
|
|
-- 2. incidents 表確保有 source 欄位 (alertmanager / manual 等)
|
|
ALTER TABLE incidents
|
|
ADD COLUMN IF NOT EXISTS source TEXT DEFAULT 'alertmanager';
|
|
|
|
-- 3. knowledge_entries 確保有 related_approval_id 欄位
|
|
ALTER TABLE knowledge_entries
|
|
ADD COLUMN IF NOT EXISTS related_approval_id TEXT;
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_knowledge_entries_related_approval
|
|
ON knowledge_entries (related_approval_id)
|
|
WHERE related_approval_id IS NOT NULL;
|
|
|
|
-- 完成確認
|
|
DO $$
|
|
BEGIN
|
|
RAISE NOTICE 'Phase 26 migration completed: incident_id + source + related_approval_id';
|
|
END $$;
|