339 lines
12 KiB
Python
339 lines
12 KiB
Python
"""
|
||
Incident Memory Provider - 事件記憶體提供者
|
||
============================================
|
||
Phase 6.4e: DualIncidentMemory 整合
|
||
Phase 16 R1.2: 絞殺者模式 (Strangler Fig Pattern) 2026-03-26
|
||
Phase R-R2 (2026-04-01 ogt): 移除內嵌 DualIncidentMemory 重複邏輯,
|
||
全面切換至 lewooogo-brain。回滾方式: git revert + redeploy。
|
||
|
||
設計:
|
||
- IncidentDbAdapter: SQLAlchemy Bridge,注入 lewooogo-brain DualIncidentMemory
|
||
- 雙層記憶體: Working (Redis) + Episodic (PostgreSQL)
|
||
- 反向索引: namespace:target -> incident_id
|
||
|
||
統帥鐵律:
|
||
- Working Memory (Redis): 7 天 TTL
|
||
- Episodic Memory (PostgreSQL): 永久
|
||
- 反向索引: 30 分鐘 TTL (聚合窗口)
|
||
"""
|
||
|
||
from typing import Any
|
||
|
||
import structlog
|
||
|
||
from src.core.redis_client import get_redis
|
||
from src.db.base import get_db_context
|
||
from src.db.models import IncidentRecord
|
||
from src.models.incident import Incident
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
|
||
def _signal_to_dict(signal: Any) -> dict[str, Any]:
|
||
"""Normalize brain/local Signal objects and raw dicts into one shape."""
|
||
if isinstance(signal, dict):
|
||
return signal
|
||
if hasattr(signal, "model_dump"):
|
||
return signal.model_dump(mode="json")
|
||
return {
|
||
"alert_name": getattr(signal, "alert_name", None),
|
||
"severity": getattr(signal, "severity", None),
|
||
"source": getattr(signal, "source", None),
|
||
"labels": getattr(signal, "labels", None) or {},
|
||
"annotations": getattr(signal, "annotations", None) or {},
|
||
"fingerprint": getattr(signal, "fingerprint", None),
|
||
}
|
||
|
||
|
||
def _derive_incident_alert_metadata(incident: Incident) -> dict[str, Any]:
|
||
"""Derive alert metadata for incidents saved through the lewooogo bridge."""
|
||
first_signal = incident.signals[0] if incident.signals else None
|
||
signal = _signal_to_dict(first_signal) if first_signal else {}
|
||
labels = signal.get("labels") or {}
|
||
annotations = signal.get("annotations") or {}
|
||
|
||
alertname = (
|
||
labels.get("alertname")
|
||
or signal.get("alert_name")
|
||
or signal.get("alertname")
|
||
or ""
|
||
)
|
||
severity = (
|
||
signal.get("severity")
|
||
or getattr(incident.severity, "value", incident.severity)
|
||
or labels.get("severity")
|
||
or "warning"
|
||
)
|
||
severity = getattr(severity, "value", severity)
|
||
|
||
alert_category = None
|
||
notification_type = None
|
||
if alertname:
|
||
from src.services.incident_service import classify_alert_early
|
||
|
||
alert_category, notification_type = classify_alert_early(
|
||
str(alertname),
|
||
str(severity),
|
||
labels,
|
||
)
|
||
|
||
description = (
|
||
annotations.get("message")
|
||
or annotations.get("description")
|
||
or annotations.get("summary")
|
||
or ""
|
||
)
|
||
|
||
return {
|
||
"alertname": str(alertname) if alertname else None,
|
||
"severity": str(severity) if severity else None,
|
||
"alert_category": alert_category,
|
||
"notification_type": notification_type,
|
||
"description": str(description) if description else None,
|
||
"actor": signal.get("source") or labels.get("source") or "signal_worker",
|
||
}
|
||
|
||
|
||
async def _add_signal_timeline_event(
|
||
incident: Incident,
|
||
metadata: dict[str, Any],
|
||
) -> None:
|
||
"""Best-effort timeline seed for incidents created outside Alertmanager."""
|
||
alertname = metadata.get("alertname")
|
||
if not alertname:
|
||
return
|
||
|
||
try:
|
||
from src.services.approval_db import get_timeline_service
|
||
|
||
await get_timeline_service().add_event(
|
||
event_type="webhook",
|
||
status="success",
|
||
title=f"Signal received: {alertname}",
|
||
description=metadata.get("description"),
|
||
actor=metadata.get("actor"),
|
||
actor_role="signal_worker",
|
||
risk_level=getattr(incident.severity, "value", incident.severity),
|
||
incident_id=incident.incident_id,
|
||
)
|
||
except Exception as exc:
|
||
logger.warning(
|
||
"incident_signal_timeline_seed_failed",
|
||
incident_id=incident.incident_id,
|
||
alertname=alertname,
|
||
error=str(exc),
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# Phase 16: IncidentDbAdapter (DI 注入實現)
|
||
# =============================================================================
|
||
|
||
class IncidentDbAdapter:
|
||
"""
|
||
Incident DB Adapter - 實現 lewooogo-brain 的 IIncidentDbAdapter
|
||
|
||
Phase 16: 將 apps/api 的 SQLAlchemy Model 操作封裝為 adapter
|
||
注入到 lewooogo-brain 的 DualIncidentMemory
|
||
"""
|
||
|
||
async def load(self, incident_id: str) -> Incident | None:
|
||
"""從 PostgreSQL 載入 Incident"""
|
||
try:
|
||
async with get_db_context() as db:
|
||
from sqlalchemy import select
|
||
stmt = select(IncidentRecord).where(
|
||
IncidentRecord.incident_id == incident_id
|
||
)
|
||
result = await db.execute(stmt)
|
||
record = result.scalar_one_or_none()
|
||
|
||
if record:
|
||
return self._record_to_incident(record)
|
||
return None
|
||
|
||
except Exception as e:
|
||
logger.error("db_adapter_load_failed", incident_id=incident_id, error=str(e))
|
||
return None
|
||
|
||
async def save(self, incident: Incident) -> bool:
|
||
"""儲存 Incident 到 PostgreSQL (upsert)"""
|
||
metadata = _derive_incident_alert_metadata(incident)
|
||
created = False
|
||
try:
|
||
async with get_db_context() as db:
|
||
from sqlalchemy import select
|
||
|
||
stmt = select(IncidentRecord).where(
|
||
IncidentRecord.incident_id == incident.incident_id
|
||
)
|
||
result = await db.execute(stmt)
|
||
existing = result.scalar_one_or_none()
|
||
|
||
if existing:
|
||
existing.status = incident.status.value
|
||
existing.severity = incident.severity.value
|
||
existing.signals = [
|
||
s.model_dump(mode="json") for s in incident.signals
|
||
]
|
||
existing.affected_services = incident.affected_services
|
||
existing.updated_at = incident.updated_at
|
||
if incident.resolved_at:
|
||
existing.resolved_at = incident.resolved_at
|
||
if incident.closed_at:
|
||
existing.closed_at = incident.closed_at
|
||
if metadata.get("alertname") and not existing.alertname:
|
||
existing.alertname = metadata["alertname"]
|
||
if metadata.get("notification_type") and not existing.notification_type:
|
||
existing.notification_type = metadata["notification_type"]
|
||
if metadata.get("alert_category") and not existing.alert_category:
|
||
existing.alert_category = metadata["alert_category"]
|
||
else:
|
||
record = IncidentRecord(
|
||
incident_id=incident.incident_id,
|
||
status=incident.status.value,
|
||
severity=incident.severity.value,
|
||
signals=[
|
||
s.model_dump(mode="json") for s in incident.signals
|
||
],
|
||
affected_services=incident.affected_services,
|
||
decision_chain=(
|
||
incident.decision_chain.model_dump(mode="json")
|
||
if hasattr(incident, 'decision_chain') and incident.decision_chain
|
||
else None
|
||
),
|
||
proposal_ids=[str(pid) for pid in incident.proposal_ids],
|
||
outcome=(
|
||
incident.outcome.model_dump(mode="json")
|
||
if hasattr(incident, 'outcome') and incident.outcome
|
||
else None
|
||
),
|
||
created_at=incident.created_at,
|
||
updated_at=incident.updated_at,
|
||
resolved_at=incident.resolved_at,
|
||
closed_at=incident.closed_at,
|
||
ttl_days=getattr(incident, 'ttl_days', 30),
|
||
vectorized=getattr(incident, 'vectorized', False),
|
||
alertname=metadata.get("alertname"),
|
||
notification_type=metadata.get("notification_type"),
|
||
alert_category=metadata.get("alert_category"),
|
||
)
|
||
db.add(record)
|
||
created = True
|
||
|
||
if created:
|
||
await _add_signal_timeline_event(incident, metadata)
|
||
logger.debug("db_adapter_save_success", incident_id=incident.incident_id)
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.error("db_adapter_save_failed", incident_id=incident.incident_id, error=str(e))
|
||
return False
|
||
|
||
def _record_to_incident(self, record: IncidentRecord) -> Any:
|
||
"""
|
||
將 DB Record 轉換為 BrainIncident (lewooogo-brain 版本)
|
||
|
||
注意: 返回 BrainIncident 供 lewooogo-brain DualIncidentMemory 內部使用。
|
||
本地服務消費時透過 IncidentConverter.brain_to_local() 轉換。
|
||
(ADR-046 - 2026-04-01 ogt)
|
||
"""
|
||
from lewooogo_brain.interfaces.incident_processor import (
|
||
Incident as BrainIncident,
|
||
)
|
||
from lewooogo_brain.interfaces.incident_processor import (
|
||
IncidentStatus as BrainIncidentStatus,
|
||
)
|
||
from lewooogo_brain.interfaces.incident_processor import (
|
||
Severity as BrainSeverity,
|
||
)
|
||
from lewooogo_brain.interfaces.incident_processor import (
|
||
Signal as BrainSignal,
|
||
)
|
||
|
||
signals = []
|
||
for s in record.signals or []:
|
||
signals.append(BrainSignal.model_validate(s))
|
||
|
||
return BrainIncident(
|
||
incident_id=record.incident_id,
|
||
status=BrainIncidentStatus(record.status),
|
||
severity=BrainSeverity(record.severity),
|
||
signals=signals,
|
||
affected_services=record.affected_services or [],
|
||
proposal_ids=record.proposal_ids or [],
|
||
created_at=record.created_at,
|
||
updated_at=record.updated_at,
|
||
resolved_at=record.resolved_at,
|
||
closed_at=record.closed_at,
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# Singleton (Phase R-R2: 僅保留 lewooogo-brain 版本)
|
||
# =============================================================================
|
||
|
||
_new_engine_memory: Any | None = None
|
||
_db_adapter: IncidentDbAdapter | None = None
|
||
|
||
|
||
def get_incident_memory() -> Any:
|
||
"""
|
||
取得 DualIncidentMemory 實例 (Singleton)
|
||
|
||
Phase R-R2: 統一使用 lewooogo-brain 套件版本。
|
||
回滾方式: git revert Phase R-R2 commit + redeploy。
|
||
"""
|
||
return _get_new_engine_memory()
|
||
|
||
|
||
def _get_new_engine_memory() -> Any:
|
||
"""
|
||
取得 lewooogo-brain 套件版本
|
||
|
||
注意事項:
|
||
- 需要 lewooogo-brain 已安裝 (Dockerfile 已配置)
|
||
- PostgreSQL 透過 IncidentDbAdapter 注入 (Phase 16 DI 模式)
|
||
"""
|
||
global _new_engine_memory, _db_adapter
|
||
|
||
if _new_engine_memory is None:
|
||
try:
|
||
from lewooogo_brain.adapters.incident_memory import (
|
||
DualIncidentMemory as NewDualIncidentMemory,
|
||
)
|
||
|
||
redis_client = get_redis()
|
||
|
||
if _db_adapter is None:
|
||
_db_adapter = IncidentDbAdapter()
|
||
|
||
_new_engine_memory = NewDualIncidentMemory(
|
||
redis_client=redis_client,
|
||
db_adapter=_db_adapter,
|
||
key_prefix="awoooi:incidents",
|
||
)
|
||
|
||
logger.info(
|
||
"incident_memory_initialized",
|
||
engine="lewooogo_brain_package",
|
||
db_adapter="IncidentDbAdapter",
|
||
redis_connected=True,
|
||
)
|
||
|
||
except ImportError as e:
|
||
logger.error(
|
||
"lewooogo_brain_not_available",
|
||
error=str(e),
|
||
)
|
||
raise
|
||
|
||
except Exception as e:
|
||
logger.error(
|
||
"new_engine_init_failed",
|
||
error=str(e),
|
||
)
|
||
raise
|
||
|
||
return _new_engine_memory
|