Files
awoooi/apps/api/src/services/incident_engine.py
OG T a94bb57d8b feat(types): ADR-046 IncidentConverter + IncidentEngineAdapter
實作 ADR-046 Option B: IncidentConverter 轉換層,解決
BrainIncident (lewooogo-brain) 與 LocalIncident (apps/api) 型別邊界問題。

變更:
- 新增 src/utils/incident_converter.py
  - brain_to_local(): BrainIncident → LocalIncident
  - local_to_brain(): LocalIncident → BrainIncident
  - ESCALATED → MITIGATING 映射 (brain 無 ESCALATED)
- incident_engine.py: 新增 IncidentEngineAdapter 包裝層
  - process_signal() / get_incident() 輸出轉換為 LocalIncident
  - get_incident_engine() 返回 IncidentEngineAdapter
- incident_memory.py: 加入 brain_to_local import,更新 _record_to_incident 說明
- ADR-046: 標記三個轉換點全部完成

解鎖: #123 proposal_service.py 清理 (下一步)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-31 22:47:54 +08:00

279 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Incident Engine - Phase 16 lewooogo-brain 整合版
================================================
Phase R-R2 (2026-04-01 ogt): 移除內嵌 IncidentEngine 重複邏輯,
全面切換至 lewooogo-brain IncidentEngine。
完整舊版本歸檔: src/services/_archived/incident_engine_v1.py
Phase R-R2.1 (2026-04-01 ogt): 修復 P0-02 ImportError 保護 + P1-01 死碼移除
+ P1-03 save_incident key prefix 不一致 (IncidentMemoryAdapter 委派給 self._memory)
架構:
- IncidentMemoryAdapter: 橋接 DualIncidentMemory 接口到 lewooogo-brain IncidentEngine
- BlastRadiusAdapter: 包裝 topology_graph 注入 lewooogo-brain
- get_incident_engine(): 統一入口 (Singleton)
統帥鐵律:
- 禁止告警風暴: 相關告警必須聚合,減少 Incident 數量
- 禁止 O(N) 掃描: 所有查詢必須 O(1)
- 禁止 Race Condition: 所有寫入必須原子操作
"""
from typing import Any, Protocol, runtime_checkable
import structlog
from src.core.redis_client import get_redis
from src.models.incident import Incident
from src.services.graph_rag import BlastRadiusResult, topology_graph
from src.services.incident_memory import get_incident_memory
from src.utils.incident_converter import brain_to_local
logger = structlog.get_logger(__name__)
# =============================================================================
# Constants
# =============================================================================
# Redis Key Patterns (索引 Key與 lewooogo-brain DualIncidentMemory 保持一致)
INCIDENT_INDEX_NS = "incident:idx:ns:" # namespace → incident_id
INCIDENT_INDEX_TARGET = "incident:idx:target:" # target → incident_id
# 聚合時間窗口
AGGREGATION_WINDOW_MINUTES = 30
AGGREGATION_WINDOW_SECONDS = AGGREGATION_WINDOW_MINUTES * 60
# =============================================================================
# Protocol Interface (Phase 17 P1 - 紅區治理)
# =============================================================================
@runtime_checkable
class IIncidentEngine(Protocol):
"""
IncidentEngine 介面定義
用途:
- 依賴注入 (DI) 時的型別約束
- 測試時 Mock 的型別檢查
- 符合 leWOOOgo 積木化規範
Tier 3 紅區服務: 修改需首席架構師簽核
@see feedback_lewooogo_modular_enforcement.md
@see docs/RED_ZONES.md
"""
async def process_signal(
self,
signal_data: dict[str, Any],
) -> Incident | None:
"""處理 Signal: 原子建立或聚合 Incident (返回 LocalIncidentADR-046)"""
...
async def get_incident(self, incident_id: str) -> Incident | None:
"""取得指定 Incident (返回 LocalIncidentADR-046)"""
...
async def update_status(
self,
incident_id: str,
status: Any, # IncidentStatus enum (lewooogo-brain)
) -> bool:
"""更新 Incident 狀態 (2026-04-01 ogt: 簽名與 lewooogo-brain 對齊, ADR-046 P2-02)"""
...
# =============================================================================
# Phase 16: 絞殺者模式 - Adapter 實作
# =============================================================================
class IncidentMemoryAdapter:
"""
Incident Memory Adapter - 實作 lewooogo-brain 的 IIncidentMemory Protocol
Phase 16 R1.3: 橋接 DualIncidentMemory 接口到 lewooogo-brain IncidentEngine
Phase R-R2.1 (2026-04-01 ogt): 修復 save_incident key prefix 不一致
- 全部委派給 self._memory (lewooogo-brain DualIncidentMemory)
- 不再直接存取 Redis確保 key prefix 一致性
NOTE: self._memory 是 lewooogo-brain.adapters.incident_memory.DualIncidentMemory
key_prefix = "awoooi:incidents" (由 get_incident_memory() 初始化時設定)
"""
def __init__(self, memory: Any) -> None:
self._memory = memory
async def load_incident(self, incident_id: str) -> Any:
"""
從 Working Memory 載入 Incident (返回 BrainIncident供 brain engine 內部使用)
注意: 此 adapter 注入 brain enginebrain 內部呼叫時期望 BrainIncident。
本地服務透過 IncidentEngineAdapter.get_incident() 取得 LocalIncident。
(ADR-046 - 2026-04-01 ogt)
"""
return await self._memory.load_incident(incident_id)
async def save_incident(self, incident: Any, ttl_seconds: int = 604800) -> bool:
"""儲存 Incident 到 Working Memory (委派給 DualIncidentMemory保持 key prefix 一致)"""
return await self._memory.save_incident(incident, ttl_seconds)
async def persist_incident(self, incident: Any) -> bool:
"""持久化到 Episodic Memory (PostgreSQL透過 IncidentDbAdapter)"""
return await self._memory.persist_incident(incident)
async def find_related_incident(
self,
namespace: str,
target: str,
window_minutes: int = 30, # noqa: ARG002
) -> Any:
"""尋找相關的活躍 Incident (用於聚合)"""
redis_client = get_redis()
# 嘗試 namespace 索引
ns_key = f"{INCIDENT_INDEX_NS}{namespace}"
incident_id = await redis_client.get(ns_key)
if not incident_id:
# 嘗試 target 索引
target_key = f"{INCIDENT_INDEX_TARGET}{target}"
incident_id = await redis_client.get(target_key)
if incident_id:
if isinstance(incident_id, bytes):
incident_id = incident_id.decode()
return await self.load_incident(incident_id)
return None
async def update_index(
self,
incident_id: str,
namespace: str,
target: str,
) -> bool:
"""更新反向索引"""
try:
redis_client = get_redis()
ttl = AGGREGATION_WINDOW_SECONDS
ns_key = f"{INCIDENT_INDEX_NS}{namespace}"
target_key = f"{INCIDENT_INDEX_TARGET}{target}"
await redis_client.set(ns_key, incident_id, ex=ttl, nx=True)
await redis_client.set(target_key, incident_id, ex=ttl, nx=True)
return True
except Exception as e:
logger.exception("update_index_error", error=str(e))
return False
class BlastRadiusAdapter:
"""
Blast Radius Adapter - 實作 lewooogo-brain 的 IBlastRadiusAnalyzer Protocol
Phase 16 R1.3: 包裝現有 topology_graph
版本: v1.0
建立: 2026-03-26 (台北時區)
建立者: Claude Code
"""
def __init__(self, graph=None) -> None:
self._graph = graph or topology_graph
def analyze(self, target: str) -> list[str]:
"""分析受影響的服務列表 (外部依賴: topology_graph GraphRAG失敗時降級返回 [target])"""
try:
result: BlastRadiusResult = self._graph.get_blast_radius(target)
return result.affected_services
except Exception as e:
logger.warning("blast_radius_analysis_failed", target=target, error=str(e))
return [target] if target != "unknown" else []
# =============================================================================
# IncidentEngineAdapter (ADR-046: brain 輸出轉換為 LocalIncident)
# =============================================================================
class IncidentEngineAdapter:
"""
Incident Engine Adapter - 包裝 lewooogo-brain IncidentEngine
ADR-046 (2026-04-01 ogt): brain 引擎輸出 BrainIncident
透過 brain_to_local() 轉換為 LocalIncident供本地服務消費。
這是本地服務與 brain 引擎之間的邊界層。
讓 IIncidentEngine Protocol 的返回型別宣告 Incident 得以成立。
"""
def __init__(self, brain_engine: Any) -> None:
self._engine = brain_engine
async def process_signal(self, signal_data: dict[str, Any]) -> Incident | None:
"""處理 Signal返回 LocalIncident (brain 輸出轉換)"""
brain_incident = await self._engine.process_signal(signal_data)
if brain_incident is None:
return None
return brain_to_local(brain_incident)
async def get_incident(self, incident_id: str) -> Incident | None:
"""取得 Incident返回 LocalIncident (brain 輸出轉換)"""
brain_incident = await self._engine.get_incident(incident_id)
if brain_incident is None:
return None
return brain_to_local(brain_incident)
async def update_status(self, incident_id: str, status: Any) -> bool:
"""更新狀態 (直接委派brain status 與 local status 值相容)"""
return await self._engine.update_status(incident_id, status)
# =============================================================================
# Singleton (Phase R-R2: 僅保留 lewooogo-brain 版本)
# =============================================================================
_new_incident_engine: IncidentEngineAdapter | None = None
def get_incident_engine() -> IncidentEngineAdapter:
"""
取得 Incident Engine 實例 (Singleton)
Phase R-R2: 統一使用 lewooogo-brain IncidentEngine。
ADR-046: 返回 IncidentEngineAdapter輸出已轉換為 LocalIncident。
回滾方式: git revert Phase R-R2 commit + redeploy。
Raises:
ImportError: lewooogo-brain 未安裝
RuntimeError: 引擎初始化失敗
"""
global _new_incident_engine
if _new_incident_engine is None:
try:
from lewooogo_brain.engines import IncidentEngine as NewIncidentEngine
memory_adapter = IncidentMemoryAdapter(get_incident_memory())
blast_adapter = BlastRadiusAdapter()
brain_engine = NewIncidentEngine(
memory=memory_adapter,
blast_analyzer=blast_adapter,
logger=logger,
)
_new_incident_engine = IncidentEngineAdapter(brain_engine)
logger.info("incident_engine_initialized", version="lewooogo-brain", adapter="IncidentEngineAdapter")
except ImportError as e:
logger.error("lewooogo_brain_engines_not_available", error=str(e))
raise
except Exception as e:
logger.error("incident_engine_init_failed", error=str(e))
raise
return _new_incident_engine