feat(ws2): ADR-093 路由統一 — BIGINT + NotificationMatrix + feature flag

## 修復

### T2.1 BigInteger overflow 修復
- `db/models.py`: telegram_chat_id Integer → BigInteger
  (原 int32 無法容納群組 ID -1003711974679)

### T2.2 移除 CAST workaround
- `approval_db.py:739`: 移除 CAST(:telegram_chat_id AS BIGINT)
  ORM 已正確使用 BigInteger,workaround 可退役

### T2.3 Redis key 一致性修復
- `heartbeat_report_service.py:575`: telegram:polling_leader → telegram:polling:leader
  (telegram_gateway.py 使用冒號分隔,heartbeat 用底線是 bug)

## 新增

### T2.4 notification_matrix.py
- `services/notification_matrix.py`: ADR-093 路由矩陣
  - Destination(DM/GROUP/BOTH) + RoutingRule dataclass
  - NOTIFICATION_ROUTING dict(TYPE-1 ~ TYPE-8M 完整映射)
  - resolve_chat_ids(type, dm, group, *, tg_group_cutover=False) 灰階切流 API

### T2.5 telegram_gateway.py feature flag 保護
- line 43: 加 notification_matrix import
- line 1827-1834: TG_GROUP_CUTOVER=False 時維持舊行為
  TG_GROUP_CUTOVER=True 時解除 _interactive_types 黑名單,由矩陣控制

### T2.6 Migration SQL
- `migrations/adr093_notification_routing.sql`:
  - CREATE TABLE approval_records (telegram_chat_id BIGINT)
  - CREATE ROLE awoooi_migrator (IF NOT EXISTS)
  - 含舊環境 ALTER COLUMN int→bigint 保護

## 測試同步
- `tests/integration/setup_test_schema.sql`: telegram_chat_id BIGINT

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Your Name
2026-04-25 01:45:16 +08:00
parent 054d0ae422
commit 6d5fd3c124
8 changed files with 176 additions and 8 deletions

View File

@@ -0,0 +1,78 @@
-- ADR-093: Notification Matrix Migration
-- =========================================
-- 1. 建立 approval_records 表BIGINT telegram_chat_id支援群組負數 ID
-- 2. 建立 awoooi_migrator 角色
-- 2026-04-25 ogt + Claude Sonnet 4.6
-- awoooi_migrator 角色ADR-090b 計畫的實作)
DO $$
BEGIN
IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = 'awoooi_migrator') THEN
CREATE ROLE awoooi_migrator LOGIN;
END IF;
END
$$;
GRANT CONNECT ON DATABASE awoooi_prod TO awoooi_migrator;
GRANT USAGE ON SCHEMA public TO awoooi_migrator;
GRANT CREATE ON SCHEMA public TO awoooi_migrator;
-- approval_records 主表(全新建立,直接用 BIGINT
-- 注意test schema setup_test_schema.sql 同步更新為 BIGINT
CREATE TABLE IF NOT EXISTS approval_records (
id VARCHAR(36) PRIMARY KEY,
action VARCHAR(500) NOT NULL,
description TEXT NOT NULL,
status VARCHAR(20) NOT NULL DEFAULT 'PENDING',
risk_level VARCHAR(20) NOT NULL,
required_signatures INTEGER DEFAULT 1,
current_signatures INTEGER DEFAULT 0,
signatures JSON DEFAULT '[]',
blast_radius JSON DEFAULT '{}',
dry_run_checks JSON DEFAULT '[]',
requested_by VARCHAR,
rejection_reason TEXT,
extra_metadata JSON DEFAULT '{}',
fingerprint VARCHAR,
hit_count INTEGER DEFAULT 1,
last_seen_at TIMESTAMPTZ,
approval_level VARCHAR DEFAULT 'standard',
approval_votes JSONB,
required_votes INTEGER DEFAULT 1,
incident_id VARCHAR,
telegram_message_id INTEGER,
telegram_chat_id BIGINT, -- 支援群組負數 ID原 INTEGER 會 int32 overflow
matched_playbook_id VARCHAR(36),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
expires_at TIMESTAMPTZ,
resolved_at TIMESTAMPTZ
);
-- 若表已存在(舊環境),執行欄位型別升級
DO $$
BEGIN
IF EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'approval_records'
AND column_name = 'telegram_chat_id'
AND data_type = 'integer'
) THEN
ALTER TABLE approval_records
ALTER COLUMN telegram_chat_id TYPE BIGINT;
RAISE NOTICE 'approval_records.telegram_chat_id upgraded INTEGER → BIGINT';
END IF;
END
$$;
-- 索引
CREATE INDEX IF NOT EXISTS idx_approval_records_status ON approval_records(status);
CREATE INDEX IF NOT EXISTS idx_approval_records_incident ON approval_records(incident_id);
CREATE INDEX IF NOT EXISTS idx_approval_records_fingerprint ON approval_records(fingerprint);
CREATE INDEX IF NOT EXISTS idx_approval_records_playbook ON approval_records(matched_playbook_id);
GRANT SELECT, INSERT, UPDATE, DELETE ON approval_records TO awoooi;
GRANT SELECT, INSERT, UPDATE ON approval_records TO awoooi_migrator;
COMMENT ON TABLE approval_records IS 'ADR-093 2026-04-25: telegram_chat_id 改 BIGINT 支援群組負數 ID';
COMMENT ON COLUMN approval_records.telegram_chat_id IS 'BIGINT: 支援 SRE 群組 ID (-1003711974679) 不 overflow';

View File

@@ -426,6 +426,12 @@ class Settings(BaseSettings):
default="",
description="AwoooI SRE 戰情室群組 Chat ID",
)
# ADR-093 灰階切流True 時由 notification_matrix 控制所有路由
# False預設時維持舊行為TYPE-3/4/4D/8M 僅 DM
TG_GROUP_CUTOVER: bool = Field(
default=False,
description="ADR-093: True 時啟用 notification_matrix 路由矩陣,取代 telegram_gateway 硬碼",
)
def get_tg_user_whitelist(self) -> list[int]:
"""Parse comma-separated or JSON array user IDs to list[int]"""

View File

@@ -16,6 +16,7 @@ from uuid import uuid4
from sqlalchemy import (
JSON,
BigInteger,
DateTime,
Index,
Integer,
@@ -162,9 +163,9 @@ class ApprovalRecord(Base):
comment="Telegram message_id of the approval card sent to operator",
)
telegram_chat_id: Mapped[int | None] = mapped_column(
Integer,
BigInteger,
nullable=True,
comment="Telegram chat_id where the approval card was sent",
comment="Telegram chat_id where the approval card was sent (BIGINT: 支援群組負數 ID)",
)
# B2 fix 2026-04-24 ogt + Claude Sonnet 4.6: Playbook 學習閉環斷鏈修復

View File

@@ -727,8 +727,6 @@ class ApprovalDBService:
以 incident_id 查找最新 PENDING approval record 並回填。
"""
async with get_db_context() as db:
# 2026-04-10 Claude Sonnet 4.6: 用 raw SQL 避免 SQLAlchemy 推斷 INTEGER
# telegram_chat_id 為 BIGINTORM update() 會誤用 $N::INTEGER 導致 int32 overflow
from sqlalchemy import text as _text
params: dict = {
"incident_id": incident_id,
@@ -738,7 +736,7 @@ class ApprovalDBService:
chat_clause = ""
if telegram_chat_id is not None:
params["telegram_chat_id"] = telegram_chat_id
chat_clause = ", telegram_chat_id = CAST(:telegram_chat_id AS BIGINT)"
chat_clause = ", telegram_chat_id = :telegram_chat_id"
await db.execute(
_text(f"""
UPDATE approval_records

View File

@@ -572,7 +572,7 @@ class HeartbeatReportService:
from src.core.redis_client import get_redis
redis = get_redis()
# polling leader lock
leader = await redis.get("telegram:polling_leader")
leader = await redis.get("telegram:polling:leader")
if leader:
s.polling_ok = True
s.status = f"✅ Polling 活躍 (leader: {leader.decode()[:20] if isinstance(leader, bytes) else str(leader)[:20]})"

View File

@@ -0,0 +1,81 @@
"""
Notification routing matrix — ADR-093
======================================
單一矩陣決定每種通知類型的發送目標,取代 telegram_gateway.py 內 24 處硬碼 chat_id。
設計原則:
- tg_group_cutover=False預設時維持舊行為灰階切流用
- tg_group_cutover=True 時由矩陣完全控制路由
- 未知通知類型預設發群組
2026-04-25 ogt + Claude Sonnet 4.6
"""
from __future__ import annotations
from dataclasses import dataclass
from enum import Enum
class Destination(str, Enum):
DM = "dm" # OPENCLAW_TG_CHAT_ID (個人 DM)
GROUP = "group" # SRE_GROUP_CHAT_ID
BOTH = "both" # 兩者都發(群組版去按鈕)
@dataclass(frozen=True)
class RoutingRule:
destination: Destination
strip_buttons_for_group: bool = False # BOTH 時群組版是否去除 Callback Button
# ADR-093 D1-D4 路由矩陣
# TYPE-5S / TYPE-7E 暫留 DMv2 再討論)
NOTIFICATION_ROUTING: dict[str, RoutingRule] = {
"TYPE-1": RoutingRule(Destination.GROUP),
"TYPE-2": RoutingRule(Destination.BOTH, strip_buttons_for_group=True),
"TYPE-3": RoutingRule(Destination.GROUP),
"TYPE-4": RoutingRule(Destination.GROUP),
"TYPE-4D": RoutingRule(Destination.GROUP),
"TYPE-5S": RoutingRule(Destination.DM),
"TYPE-6B": RoutingRule(Destination.GROUP),
"TYPE-7E": RoutingRule(Destination.BOTH, strip_buttons_for_group=False),
"TYPE-8M": RoutingRule(Destination.GROUP),
}
_DEFAULT_RULE = RoutingRule(Destination.GROUP)
def get_routing_rule(notification_type: str) -> RoutingRule:
"""根據通知類型回傳路由規則。未知類型預設發群組。"""
return NOTIFICATION_ROUTING.get(notification_type, _DEFAULT_RULE)
def resolve_chat_ids(
notification_type: str,
dm_chat_id: str,
group_chat_id: str,
*,
tg_group_cutover: bool = False,
) -> list[str]:
"""
回傳此通知應發送的 chat_id 清單。
tg_group_cutover=False 時維持原本的 DM only 行為(灰階切流用)。
"""
if not tg_group_cutover:
# Feature flag 關閉 → 維持舊行為DM only for interactive, group for info
interactive = {"TYPE-3", "TYPE-4", "TYPE-4D", "TYPE-8M"}
if notification_type in interactive:
return [dm_chat_id] if dm_chat_id else []
return [group_chat_id] if group_chat_id else []
rule = get_routing_rule(notification_type)
if rule.destination == Destination.DM:
return [dm_chat_id] if dm_chat_id else []
elif rule.destination == Destination.GROUP:
return [group_chat_id] if group_chat_id else []
else: # BOTH
result = []
if group_chat_id:
result.append(group_chat_id)
if dm_chat_id:
result.append(dm_chat_id)
return result

View File

@@ -40,6 +40,7 @@ from src.services.security_interceptor import (
get_security_interceptor,
)
from src.services.chat_manager import get_chat_manager
from src.services.notification_matrix import resolve_chat_ids # ADR-093 路由矩陣
# =============================================================================
# Snooze/Silence Redis Keys (2026-03-27 P1 優化)
@@ -1823,11 +1824,14 @@ class TelegramGateway:
# 2026-04-12 ogt: ADR-075 斷點 C 修復 — 含按鈕的互動型通知禁止發群組nonce 洩漏)
# TYPE-1/TYPE-2 → 可發群組(純資訊,無 callback 按鈕)
# TYPE-3/TYPE-4/TYPE-4D/TYPE-8M → 僅 DM不發群組
# ADR-093 灰階切流TG_GROUP_CUTOVER=true 時解除 _interactive_types 黑名單
# 改由 notification_matrix.resolve_chat_ids() 控制路由
_interactive_types = {"TYPE-3", "TYPE-4", "TYPE-4D", "TYPE-8M"}
_is_interactive = notification_type in _interactive_types or (
not notification_type and alert_category # 有分類但無明確型別 → 視為互動型
)
if settings.SRE_GROUP_CHAT_ID and not _is_interactive:
_cutover = getattr(settings, "TG_GROUP_CUTOVER", False)
if settings.SRE_GROUP_CHAT_ID and (not _is_interactive or _cutover):
asyncio.create_task(
self._send_approval_card_to_group(
approval_id=approval_id,

View File

@@ -69,7 +69,7 @@ CREATE TABLE IF NOT EXISTS approval_records (
required_votes INTEGER DEFAULT 1,
incident_id VARCHAR,
telegram_message_id INTEGER,
telegram_chat_id INTEGER,
telegram_chat_id BIGINT, -- ADR-093 2026-04-25: 支援群組負數 ID
matched_playbook_id VARCHAR(36),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),