Files
awoooi/apps/api/migrations/phase11b_backfill_alert_operation_log.sql
OG T f20121ad41
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 1m29s
feat(audit): Phase 11 告警操作完整溯源 — alert_operation_log + 歷史回填
統帥指令「所有告警訊息通通寫入資料庫,並記錄相關操作」

變更:
- phase11_alert_operation_log.sql: 新表 (Event Sourcing,不可變)
- phase11b_backfill_alert_operation_log.sql: 歷史回填 654 筆
  - 14 筆 ALERT_RECEIVED (incidents)
  - 265 筆 TELEGRAM_SENT (approval_records)
  - 265 筆 USER_ACTION (approval_records)
  - 110 筆 EXECUTION_COMPLETED (audit_logs)
- db/models.py: AlertOperationLog SQLAlchemy model
- repositories/alert_operation_log_repository.py: append/list_by_incident/get_stats
- webhooks.py: _try_auto_repair_background 寫入 AUTO_REPAIR_TRIGGERED + EXECUTION_COMPLETED + TELEGRAM_RESULT_SENT
- webhooks.py: _push_to_telegram_background 寫入 TELEGRAM_SENT
- telegram.py: handle_callback 寫入 USER_ACTION (approve/reject)

已執行 migration: awoooi_prod@192.168.0.188 

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 11:22:03 +08:00

153 lines
5.4 KiB
SQL

-- Phase 11b: 歷史數據回填 alert_operation_log
-- 建立時間: 2026-04-08 (台北時區)
-- 建立者: Claude Code — 統帥指令「把之前所有的告警訊息,通通寫入資料庫」
--
-- 資料來源:
-- incidents (14筆) → ALERT_RECEIVED 事件
-- approval_records (265筆) → TELEGRAM_SENT + USER_ACTION 事件
-- audit_logs (110筆) → EXECUTION_STARTED + EXECUTION_COMPLETED 事件
--
-- 注意: 使用 ON CONFLICT DO NOTHING 避免重複執行
-- ============================================================
-- Step 1: incidents → ALERT_RECEIVED
-- ============================================================
INSERT INTO alert_operation_log (
id, incident_id, event_type, actor, action_detail, success, context, created_at
)
SELECT
gen_random_uuid()::text,
incident_id,
'ALERT_RECEIVED',
COALESCE(source, 'alertmanager'),
COALESCE(
signals->0->>'alert_name',
'unknown'
),
TRUE,
jsonb_build_object(
'severity', severity::text,
'status', status::text,
'alert_name', COALESCE(signals->0->>'alert_name', 'unknown'),
'namespace', COALESCE(signals->0->'labels'->>'namespace', 'default'),
'resource', COALESCE(signals->0->'labels'->>'resource', ''),
'message', COALESCE(signals->0->'annotations'->>'message', ''),
'source', COALESCE(source, 'alertmanager'),
'signal_count', json_array_length(signals),
'backfill', TRUE,
'backfill_at', NOW()::text
),
created_at
FROM incidents
ON CONFLICT DO NOTHING;
-- ============================================================
-- Step 2: approval_records → TELEGRAM_SENT (每筆 approval 代表推送了一次卡片)
-- ============================================================
INSERT INTO alert_operation_log (
id, incident_id, approval_id, event_type, actor, action_detail, success, context, created_at
)
SELECT
gen_random_uuid()::text,
incident_id,
id,
'TELEGRAM_SENT',
'system',
'approval_card_sent',
TRUE,
jsonb_build_object(
'action', action,
'risk_level', risk_level::text,
'requested_by', requested_by,
'hit_count', hit_count,
'backfill', TRUE,
'backfill_at', NOW()::text
),
created_at
FROM approval_records
ON CONFLICT DO NOTHING;
-- ============================================================
-- Step 3: approval_records (APPROVED/REJECTED) → USER_ACTION
-- ============================================================
INSERT INTO alert_operation_log (
id, incident_id, approval_id, event_type, actor, action_detail, success, context, created_at
)
SELECT
gen_random_uuid()::text,
incident_id,
id,
'USER_ACTION',
COALESCE(requested_by, 'unknown'),
CASE status::text
WHEN 'APPROVED' THEN 'approve'
WHEN 'REJECTED' THEN 'reject'
WHEN 'EXECUTION_SUCCESS' THEN 'approve'
WHEN 'EXECUTION_FAILED' THEN 'approve'
ELSE status::text
END,
CASE status::text
WHEN 'APPROVED' THEN TRUE
WHEN 'EXECUTION_SUCCESS' THEN TRUE
WHEN 'REJECTED' THEN FALSE
WHEN 'EXECUTION_FAILED' THEN TRUE -- 批准了但執行失敗
ELSE NULL
END,
jsonb_build_object(
'status', status::text,
'risk_level', risk_level::text,
'rejection_reason', COALESCE(rejection_reason, ''),
'signatures', signatures,
'resolved_at', COALESCE(resolved_at::text, ''),
'backfill', TRUE,
'backfill_at', NOW()::text
),
COALESCE(resolved_at, updated_at, created_at)
FROM approval_records
WHERE status::text IN ('APPROVED', 'REJECTED', 'EXECUTION_SUCCESS', 'EXECUTION_FAILED')
ON CONFLICT DO NOTHING;
-- ============================================================
-- Step 4: audit_logs → EXECUTION_COMPLETED
-- ============================================================
INSERT INTO alert_operation_log (
id, approval_id, audit_log_id, event_type, actor, action_detail, success, error_message, context, created_at
)
SELECT
gen_random_uuid()::text,
approval_id,
id,
'EXECUTION_COMPLETED',
COALESCE(executed_by, 'system'),
COALESCE(operation_type, 'unknown') || '/' || COALESCE(target_resource, ''),
success,
error_message,
jsonb_build_object(
'operation_type', operation_type,
'target_resource', target_resource,
'namespace', namespace,
'execution_duration_ms', execution_duration_ms,
'dry_run_passed', dry_run_passed,
'authorization_channel', COALESCE(authorization_channel, ''),
'retry_count', retry_count,
'failure_classification', COALESCE(failure_classification, ''),
'auto_repair_attempted', auto_repair_attempted,
'backfill', TRUE,
'backfill_at', NOW()::text
),
created_at
FROM audit_logs
ON CONFLICT DO NOTHING;
-- ============================================================
-- 驗證結果
-- ============================================================
SELECT
event_type::text,
COUNT(*) as count,
MIN(created_at) as oldest,
MAX(created_at) as newest
FROM alert_operation_log
GROUP BY event_type
ORDER BY event_type;