""" Incident Engine - Phase 16 lewooogo-brain 整合版 ================================================ Phase R-R2 (2026-04-01 ogt): 移除內嵌 IncidentEngine 重複邏輯, 全面切換至 lewooogo-brain IncidentEngine。 完整舊版本歸檔: src/services/_archived/incident_engine_v1.py Phase R-R2.1 (2026-04-01 ogt): 修復 P0-02 ImportError 保護 + P1-01 死碼移除 + P1-03 save_incident key prefix 不一致 (IncidentMemoryAdapter 委派給 self._memory) 架構: - IncidentMemoryAdapter: 橋接 DualIncidentMemory 接口到 lewooogo-brain IncidentEngine - BlastRadiusAdapter: 包裝 topology_graph 注入 lewooogo-brain - get_incident_engine(): 統一入口 (Singleton) 統帥鐵律: - 禁止告警風暴: 相關告警必須聚合,減少 Incident 數量 - 禁止 O(N) 掃描: 所有查詢必須 O(1) - 禁止 Race Condition: 所有寫入必須原子操作 """ from typing import Any, Protocol, runtime_checkable import structlog from src.core.redis_client import get_redis from src.models.incident import Incident from src.services.graph_rag import BlastRadiusResult, topology_graph from src.services.incident_memory import get_incident_memory from src.utils.incident_converter import brain_to_local logger = structlog.get_logger(__name__) # ============================================================================= # Constants # ============================================================================= # Redis Key Patterns (索引 Key,與 lewooogo-brain DualIncidentMemory 保持一致) INCIDENT_INDEX_NS = "incident:idx:ns:" # namespace → incident_id INCIDENT_INDEX_TARGET = "incident:idx:target:" # target → incident_id # 聚合時間窗口 AGGREGATION_WINDOW_MINUTES = 30 AGGREGATION_WINDOW_SECONDS = AGGREGATION_WINDOW_MINUTES * 60 # ============================================================================= # Protocol Interface (Phase 17 P1 - 紅區治理) # ============================================================================= @runtime_checkable class IIncidentEngine(Protocol): """ IncidentEngine 介面定義 用途: - 依賴注入 (DI) 時的型別約束 - 測試時 Mock 的型別檢查 - 符合 leWOOOgo 積木化規範 Tier 3 紅區服務: 修改需首席架構師簽核 @see feedback_lewooogo_modular_enforcement.md @see docs/RED_ZONES.md """ async def process_signal( self, signal_data: dict[str, Any], ) -> Incident | None: """處理 Signal: 原子建立或聚合 Incident (返回 LocalIncident,ADR-046)""" ... async def get_incident(self, incident_id: str) -> Incident | None: """取得指定 Incident (返回 LocalIncident,ADR-046)""" ... async def update_status( self, incident_id: str, status: Any, # IncidentStatus enum (lewooogo-brain) ) -> bool: """更新 Incident 狀態 (2026-04-01 ogt: 簽名與 lewooogo-brain 對齊, ADR-046 P2-02)""" ... # ============================================================================= # Phase 16: 絞殺者模式 - Adapter 實作 # ============================================================================= class IncidentMemoryAdapter: """ Incident Memory Adapter - 實作 lewooogo-brain 的 IIncidentMemory Protocol Phase 16 R1.3: 橋接 DualIncidentMemory 接口到 lewooogo-brain IncidentEngine Phase R-R2.1 (2026-04-01 ogt): 修復 save_incident key prefix 不一致 - 全部委派給 self._memory (lewooogo-brain DualIncidentMemory) - 不再直接存取 Redis,確保 key prefix 一致性 NOTE: self._memory 是 lewooogo-brain.adapters.incident_memory.DualIncidentMemory key_prefix = "awoooi:incidents" (由 get_incident_memory() 初始化時設定) """ def __init__(self, memory: Any) -> None: self._memory = memory async def load_incident(self, incident_id: str) -> Any: """ 從 Working Memory 載入 Incident (返回 BrainIncident,供 brain engine 內部使用) 注意: 此 adapter 注入 brain engine,brain 內部呼叫時期望 BrainIncident。 本地服務透過 IncidentEngineAdapter.get_incident() 取得 LocalIncident。 (ADR-046 - 2026-04-01 ogt) """ return await self._memory.load_incident(incident_id) async def save_incident(self, incident: Any, ttl_seconds: int = 604800) -> bool: """儲存 Incident 到 Working Memory (委派給 DualIncidentMemory,保持 key prefix 一致)""" return await self._memory.save_incident(incident, ttl_seconds) async def persist_incident(self, incident: Any) -> bool: """持久化到 Episodic Memory (PostgreSQL,透過 IncidentDbAdapter)""" return await self._memory.persist_incident(incident) async def find_related_incident( self, namespace: str, target: str, window_minutes: int = 30, # noqa: ARG002 ) -> Any: """尋找相關的活躍 Incident (用於聚合)""" redis_client = get_redis() # 嘗試 namespace 索引 ns_key = f"{INCIDENT_INDEX_NS}{namespace}" incident_id = await redis_client.get(ns_key) if not incident_id: # 嘗試 target 索引 target_key = f"{INCIDENT_INDEX_TARGET}{target}" incident_id = await redis_client.get(target_key) if incident_id: if isinstance(incident_id, bytes): incident_id = incident_id.decode() return await self.load_incident(incident_id) return None async def update_index( self, incident_id: str, namespace: str, target: str, ) -> bool: """更新反向索引""" try: redis_client = get_redis() ttl = AGGREGATION_WINDOW_SECONDS ns_key = f"{INCIDENT_INDEX_NS}{namespace}" target_key = f"{INCIDENT_INDEX_TARGET}{target}" await redis_client.set(ns_key, incident_id, ex=ttl, nx=True) await redis_client.set(target_key, incident_id, ex=ttl, nx=True) return True except Exception as e: logger.exception("update_index_error", error=str(e)) return False class BlastRadiusAdapter: """ Blast Radius Adapter - 實作 lewooogo-brain 的 IBlastRadiusAnalyzer Protocol Phase 16 R1.3: 包裝現有 topology_graph 版本: v1.0 建立: 2026-03-26 (台北時區) 建立者: Claude Code """ def __init__(self, graph=None) -> None: self._graph = graph or topology_graph def analyze(self, target: str) -> list[str]: """分析受影響的服務列表 (外部依賴: topology_graph GraphRAG,失敗時降級返回 [target])""" try: result: BlastRadiusResult = self._graph.get_blast_radius(target) return result.affected_services except Exception as e: logger.warning("blast_radius_analysis_failed", target=target, error=str(e)) return [target] if target != "unknown" else [] # ============================================================================= # IncidentEngineAdapter (ADR-046: brain 輸出轉換為 LocalIncident) # ============================================================================= class IncidentEngineAdapter: """ Incident Engine Adapter - 包裝 lewooogo-brain IncidentEngine ADR-046 (2026-04-01 ogt): brain 引擎輸出 BrainIncident, 透過 brain_to_local() 轉換為 LocalIncident,供本地服務消費。 這是本地服務與 brain 引擎之間的邊界層。 讓 IIncidentEngine Protocol 的返回型別宣告 Incident 得以成立。 """ def __init__(self, brain_engine: Any) -> None: self._engine = brain_engine async def process_signal(self, signal_data: dict[str, Any]) -> Incident | None: """處理 Signal,返回 LocalIncident (brain 輸出轉換)""" brain_incident = await self._engine.process_signal(signal_data) if brain_incident is None: return None return brain_to_local(brain_incident) async def get_incident(self, incident_id: str) -> Incident | None: """取得 Incident,返回 LocalIncident (brain 輸出轉換)""" brain_incident = await self._engine.get_incident(incident_id) if brain_incident is None: return None return brain_to_local(brain_incident) async def update_status(self, incident_id: str, status: Any) -> bool: """更新狀態 (直接委派,brain status 與 local status 值相容)""" return await self._engine.update_status(incident_id, status) # ============================================================================= # Singleton (Phase R-R2: 僅保留 lewooogo-brain 版本) # ============================================================================= _new_incident_engine: IncidentEngineAdapter | None = None def get_incident_engine() -> IncidentEngineAdapter: """ 取得 Incident Engine 實例 (Singleton) Phase R-R2: 統一使用 lewooogo-brain IncidentEngine。 ADR-046: 返回 IncidentEngineAdapter,輸出已轉換為 LocalIncident。 回滾方式: git revert Phase R-R2 commit + redeploy。 Raises: ImportError: lewooogo-brain 未安裝 RuntimeError: 引擎初始化失敗 """ global _new_incident_engine if _new_incident_engine is None: try: from lewooogo_brain.engines import IncidentEngine as NewIncidentEngine memory_adapter = IncidentMemoryAdapter(get_incident_memory()) blast_adapter = BlastRadiusAdapter() brain_engine = NewIncidentEngine( memory=memory_adapter, blast_analyzer=blast_adapter, logger=logger, ) _new_incident_engine = IncidentEngineAdapter(brain_engine) logger.info("incident_engine_initialized", version="lewooogo-brain", adapter="IncidentEngineAdapter") except ImportError as e: logger.error("lewooogo_brain_engines_not_available", error=str(e)) raise except Exception as e: logger.error("incident_engine_init_failed", error=str(e)) raise return _new_incident_engine