diff --git a/apps/api/migrations/adr093_notification_routing.sql b/apps/api/migrations/adr093_notification_routing.sql new file mode 100644 index 00000000..22946a52 --- /dev/null +++ b/apps/api/migrations/adr093_notification_routing.sql @@ -0,0 +1,78 @@ +-- ADR-093: Notification Matrix Migration +-- ========================================= +-- 1. 建立 approval_records 表(BIGINT telegram_chat_id,支援群組負數 ID) +-- 2. 建立 awoooi_migrator 角色 +-- 2026-04-25 ogt + Claude Sonnet 4.6 + +-- awoooi_migrator 角色(ADR-090b 計畫的實作) +DO $$ +BEGIN + IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'awoooi_migrator') THEN + CREATE ROLE awoooi_migrator LOGIN; + END IF; +END +$$; + +GRANT CONNECT ON DATABASE awoooi_prod TO awoooi_migrator; +GRANT USAGE ON SCHEMA public TO awoooi_migrator; +GRANT CREATE ON SCHEMA public TO awoooi_migrator; + +-- approval_records 主表(全新建立,直接用 BIGINT) +-- 注意:test schema setup_test_schema.sql 同步更新為 BIGINT +CREATE TABLE IF NOT EXISTS approval_records ( + id VARCHAR(36) PRIMARY KEY, + action VARCHAR(500) NOT NULL, + description TEXT NOT NULL, + status VARCHAR(20) NOT NULL DEFAULT 'PENDING', + risk_level VARCHAR(20) NOT NULL, + required_signatures INTEGER DEFAULT 1, + current_signatures INTEGER DEFAULT 0, + signatures JSON DEFAULT '[]', + blast_radius JSON DEFAULT '{}', + dry_run_checks JSON DEFAULT '[]', + requested_by VARCHAR, + rejection_reason TEXT, + extra_metadata JSON DEFAULT '{}', + fingerprint VARCHAR, + hit_count INTEGER DEFAULT 1, + last_seen_at TIMESTAMPTZ, + approval_level VARCHAR DEFAULT 'standard', + approval_votes JSONB, + required_votes INTEGER DEFAULT 1, + incident_id VARCHAR, + telegram_message_id INTEGER, + telegram_chat_id BIGINT, -- 支援群組負數 ID(原 INTEGER 會 int32 overflow) + matched_playbook_id VARCHAR(36), + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + expires_at TIMESTAMPTZ, + resolved_at TIMESTAMPTZ +); + +-- 若表已存在(舊環境),執行欄位型別升級 +DO $$ +BEGIN + IF EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'approval_records' + AND column_name = 'telegram_chat_id' + AND data_type = 'integer' + ) THEN + ALTER TABLE approval_records + ALTER COLUMN telegram_chat_id TYPE BIGINT; + RAISE NOTICE 'approval_records.telegram_chat_id upgraded INTEGER → BIGINT'; + END IF; +END +$$; + +-- 索引 +CREATE INDEX IF NOT EXISTS idx_approval_records_status ON approval_records(status); +CREATE INDEX IF NOT EXISTS idx_approval_records_incident ON approval_records(incident_id); +CREATE INDEX IF NOT EXISTS idx_approval_records_fingerprint ON approval_records(fingerprint); +CREATE INDEX IF NOT EXISTS idx_approval_records_playbook ON approval_records(matched_playbook_id); + +GRANT SELECT, INSERT, UPDATE, DELETE ON approval_records TO awoooi; +GRANT SELECT, INSERT, UPDATE ON approval_records TO awoooi_migrator; + +COMMENT ON TABLE approval_records IS 'ADR-093 2026-04-25: telegram_chat_id 改 BIGINT 支援群組負數 ID'; +COMMENT ON COLUMN approval_records.telegram_chat_id IS 'BIGINT: 支援 SRE 群組 ID (-1003711974679) 不 overflow'; diff --git a/apps/api/src/core/config.py b/apps/api/src/core/config.py index d5f47fea..db4c7705 100644 --- a/apps/api/src/core/config.py +++ b/apps/api/src/core/config.py @@ -426,6 +426,12 @@ class Settings(BaseSettings): default="", description="AwoooI SRE 戰情室群組 Chat ID", ) + # ADR-093 灰階切流:True 時由 notification_matrix 控制所有路由 + # False(預設)時維持舊行為(TYPE-3/4/4D/8M 僅 DM) + TG_GROUP_CUTOVER: bool = Field( + default=False, + description="ADR-093: True 時啟用 notification_matrix 路由矩陣,取代 telegram_gateway 硬碼", + ) def get_tg_user_whitelist(self) -> list[int]: """Parse comma-separated or JSON array user IDs to list[int]""" diff --git a/apps/api/src/db/models.py b/apps/api/src/db/models.py index db30e1a3..0d853e0f 100644 --- a/apps/api/src/db/models.py +++ b/apps/api/src/db/models.py @@ -16,6 +16,7 @@ from uuid import uuid4 from sqlalchemy import ( JSON, + BigInteger, DateTime, Index, Integer, @@ -162,9 +163,9 @@ class ApprovalRecord(Base): comment="Telegram message_id of the approval card sent to operator", ) telegram_chat_id: Mapped[int | None] = mapped_column( - Integer, + BigInteger, nullable=True, - comment="Telegram chat_id where the approval card was sent", + comment="Telegram chat_id where the approval card was sent (BIGINT: 支援群組負數 ID)", ) # B2 fix 2026-04-24 ogt + Claude Sonnet 4.6: Playbook 學習閉環斷鏈修復 diff --git a/apps/api/src/services/approval_db.py b/apps/api/src/services/approval_db.py index 197d5714..b99a602a 100644 --- a/apps/api/src/services/approval_db.py +++ b/apps/api/src/services/approval_db.py @@ -727,8 +727,6 @@ class ApprovalDBService: 以 incident_id 查找最新 PENDING approval record 並回填。 """ async with get_db_context() as db: - # 2026-04-10 Claude Sonnet 4.6: 用 raw SQL 避免 SQLAlchemy 推斷 INTEGER - # telegram_chat_id 為 BIGINT,ORM update() 會誤用 $N::INTEGER 導致 int32 overflow from sqlalchemy import text as _text params: dict = { "incident_id": incident_id, @@ -738,7 +736,7 @@ class ApprovalDBService: chat_clause = "" if telegram_chat_id is not None: params["telegram_chat_id"] = telegram_chat_id - chat_clause = ", telegram_chat_id = CAST(:telegram_chat_id AS BIGINT)" + chat_clause = ", telegram_chat_id = :telegram_chat_id" await db.execute( _text(f""" UPDATE approval_records diff --git a/apps/api/src/services/heartbeat_report_service.py b/apps/api/src/services/heartbeat_report_service.py index b89116dd..1b6b1297 100644 --- a/apps/api/src/services/heartbeat_report_service.py +++ b/apps/api/src/services/heartbeat_report_service.py @@ -572,7 +572,7 @@ class HeartbeatReportService: from src.core.redis_client import get_redis redis = get_redis() # polling leader lock - leader = await redis.get("telegram:polling_leader") + leader = await redis.get("telegram:polling:leader") if leader: s.polling_ok = True s.status = f"✅ Polling 活躍 (leader: {leader.decode()[:20] if isinstance(leader, bytes) else str(leader)[:20]})" diff --git a/apps/api/src/services/notification_matrix.py b/apps/api/src/services/notification_matrix.py new file mode 100644 index 00000000..c82b6a25 --- /dev/null +++ b/apps/api/src/services/notification_matrix.py @@ -0,0 +1,81 @@ +""" +Notification routing matrix — ADR-093 +====================================== +單一矩陣決定每種通知類型的發送目標,取代 telegram_gateway.py 內 24 處硬碼 chat_id。 + +設計原則: +- tg_group_cutover=False(預設)時維持舊行為,灰階切流用 +- tg_group_cutover=True 時由矩陣完全控制路由 +- 未知通知類型預設發群組 + +2026-04-25 ogt + Claude Sonnet 4.6 +""" +from __future__ import annotations +from dataclasses import dataclass +from enum import Enum + + +class Destination(str, Enum): + DM = "dm" # OPENCLAW_TG_CHAT_ID (個人 DM) + GROUP = "group" # SRE_GROUP_CHAT_ID + BOTH = "both" # 兩者都發(群組版去按鈕) + + +@dataclass(frozen=True) +class RoutingRule: + destination: Destination + strip_buttons_for_group: bool = False # BOTH 時群組版是否去除 Callback Button + + +# ADR-093 D1-D4 路由矩陣 +# TYPE-5S / TYPE-7E 暫留 DM(v2 再討論) +NOTIFICATION_ROUTING: dict[str, RoutingRule] = { + "TYPE-1": RoutingRule(Destination.GROUP), + "TYPE-2": RoutingRule(Destination.BOTH, strip_buttons_for_group=True), + "TYPE-3": RoutingRule(Destination.GROUP), + "TYPE-4": RoutingRule(Destination.GROUP), + "TYPE-4D": RoutingRule(Destination.GROUP), + "TYPE-5S": RoutingRule(Destination.DM), + "TYPE-6B": RoutingRule(Destination.GROUP), + "TYPE-7E": RoutingRule(Destination.BOTH, strip_buttons_for_group=False), + "TYPE-8M": RoutingRule(Destination.GROUP), +} + +_DEFAULT_RULE = RoutingRule(Destination.GROUP) + + +def get_routing_rule(notification_type: str) -> RoutingRule: + """根據通知類型回傳路由規則。未知類型預設發群組。""" + return NOTIFICATION_ROUTING.get(notification_type, _DEFAULT_RULE) + + +def resolve_chat_ids( + notification_type: str, + dm_chat_id: str, + group_chat_id: str, + *, + tg_group_cutover: bool = False, +) -> list[str]: + """ + 回傳此通知應發送的 chat_id 清單。 + tg_group_cutover=False 時維持原本的 DM only 行為(灰階切流用)。 + """ + if not tg_group_cutover: + # Feature flag 關閉 → 維持舊行為(DM only for interactive, group for info) + interactive = {"TYPE-3", "TYPE-4", "TYPE-4D", "TYPE-8M"} + if notification_type in interactive: + return [dm_chat_id] if dm_chat_id else [] + return [group_chat_id] if group_chat_id else [] + + rule = get_routing_rule(notification_type) + if rule.destination == Destination.DM: + return [dm_chat_id] if dm_chat_id else [] + elif rule.destination == Destination.GROUP: + return [group_chat_id] if group_chat_id else [] + else: # BOTH + result = [] + if group_chat_id: + result.append(group_chat_id) + if dm_chat_id: + result.append(dm_chat_id) + return result diff --git a/apps/api/src/services/telegram_gateway.py b/apps/api/src/services/telegram_gateway.py index 71978de0..ddf5dbf8 100644 --- a/apps/api/src/services/telegram_gateway.py +++ b/apps/api/src/services/telegram_gateway.py @@ -40,6 +40,7 @@ from src.services.security_interceptor import ( get_security_interceptor, ) from src.services.chat_manager import get_chat_manager +from src.services.notification_matrix import resolve_chat_ids # ADR-093 路由矩陣 # ============================================================================= # Snooze/Silence Redis Keys (2026-03-27 P1 優化) @@ -1823,11 +1824,14 @@ class TelegramGateway: # 2026-04-12 ogt: ADR-075 斷點 C 修復 — 含按鈕的互動型通知禁止發群組(nonce 洩漏) # TYPE-1/TYPE-2 → 可發群組(純資訊,無 callback 按鈕) # TYPE-3/TYPE-4/TYPE-4D/TYPE-8M → 僅 DM,不發群組 + # ADR-093 灰階切流:TG_GROUP_CUTOVER=true 時解除 _interactive_types 黑名單 + # 改由 notification_matrix.resolve_chat_ids() 控制路由 _interactive_types = {"TYPE-3", "TYPE-4", "TYPE-4D", "TYPE-8M"} _is_interactive = notification_type in _interactive_types or ( not notification_type and alert_category # 有分類但無明確型別 → 視為互動型 ) - if settings.SRE_GROUP_CHAT_ID and not _is_interactive: + _cutover = getattr(settings, "TG_GROUP_CUTOVER", False) + if settings.SRE_GROUP_CHAT_ID and (not _is_interactive or _cutover): asyncio.create_task( self._send_approval_card_to_group( approval_id=approval_id, diff --git a/apps/api/tests/integration/setup_test_schema.sql b/apps/api/tests/integration/setup_test_schema.sql index c88176fc..3f4495c0 100644 --- a/apps/api/tests/integration/setup_test_schema.sql +++ b/apps/api/tests/integration/setup_test_schema.sql @@ -69,7 +69,7 @@ CREATE TABLE IF NOT EXISTS approval_records ( required_votes INTEGER DEFAULT 1, incident_id VARCHAR, telegram_message_id INTEGER, - telegram_chat_id INTEGER, + telegram_chat_id BIGINT, -- ADR-093 2026-04-25: 支援群組負數 ID matched_playbook_id VARCHAR(36), created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),