""" Sentry Webhook Service ====================== Phase 23 (ADR-048): Sentry → OpenClaw AI Triage 業務邏輯層 遵循 leWOOOgo 積木化原則: - Service 層負責: 解析、分析、建立 Incident、組裝訊息 - Router 層只做: 接收、驗證、呼叫 Service、回傳 - 禁止 Router 層直接存取 Redis/DB 版本: v1.0 建立: 2026-04-01 (台北時區) 建立者: Claude Code (Phase 23 ADR-048) """ import hashlib import hmac import structlog from src.core.config import settings from src.models.approval import ( ApprovalRequestCreate, BlastRadius, DataImpact, RiskLevel, ) from src.services.approval_db import get_approval_service from src.services.openclaw_http_service import get_openclaw_http_service logger = structlog.get_logger(__name__) # Sentry Level → Risk Level 映射 (ADR-048) SENTRY_LEVEL_TO_RISK: dict[str, RiskLevel] = { "fatal": RiskLevel.CRITICAL, "error": RiskLevel.HIGH, "warning": RiskLevel.MEDIUM, "info": RiskLevel.LOW, } # ============================================================================= # Data Models (純資料容器,不含業務邏輯) # ============================================================================= class SentryIssueContext: """ 解析後的 Sentry Issue 上下文 從 Sentry webhook payload 提取關鍵欄位, 供後續 OpenClaw 分析與 Incident 建立使用。 """ def __init__( self, issue_id: str, title: str, culprit: str, level: str, project: str, first_seen: str | None, count: int, message: str | None, platform: str | None, tags: list, stacktrace: list[dict], ) -> None: self.issue_id = issue_id self.title = title self.culprit = culprit self.level = level self.project = project self.first_seen = first_seen self.count = count self.message = message self.platform = platform self.tags = tags self.stacktrace = stacktrace def to_dict(self) -> dict: return { "issue_id": self.issue_id, "title": self.title, "culprit": self.culprit, "level": self.level, "project": self.project, "first_seen": self.first_seen, "count": self.count, "message": self.message, "platform": self.platform, "tags": self.tags, "stacktrace": self.stacktrace, } class AIDecision: """ OpenClaw AI 分析結果 封裝 OpenClaw 對 Sentry Issue 的分析輸出, 包含根因、影響範圍、修復建議、預防措施。 """ def __init__( self, root_cause: str, impact: str, fix_suggestion: str, prevention: str, confidence: float, analyzed_by: str, ) -> None: self.root_cause = root_cause self.impact = impact self.fix_suggestion = fix_suggestion self.prevention = prevention self.confidence = confidence self.analyzed_by = analyzed_by # ============================================================================= # SentryWebhookService # ============================================================================= class SentryWebhookService: """ Sentry Webhook 業務邏輯 Service 職責: 1. parse_sentry_issue() - 解析 webhook payload → SentryIssueContext 2. analyze_with_openclaw() - 呼叫 OpenClaw 分析 → AIDecision | None 3. create_incident() - 建立 Approval (Incident) 記錄 4. build_telegram_message() - 組裝 Telegram 告警訊息 leWOOOgo 積木化原則: - 禁止直接存取 Redis/DB,透過對應 Service 呼叫 - 每個方法單一職責 """ def parse_sentry_issue(self, payload: dict) -> SentryIssueContext | None: """ 解析 Sentry Issue Alert Payload → SentryIssueContext Args: payload: Sentry webhook raw JSON dict Returns: SentryIssueContext 若解析成功,否則 None Sentry Payload 結構: { "action": "triggered", "data": { "issue": { id, title, culprit, level, firstSeen, count, project }, "event": { message, platform, tags, exception } } } """ try: issue_data = payload.get("data", {}).get("issue", {}) event_data = payload.get("data", {}).get("event", {}) issue_id = issue_data.get("id") if not issue_id: logger.warning("sentry_parse_missing_issue_id") return None return SentryIssueContext( issue_id=str(issue_id), title=issue_data.get("title", "Unknown Error"), culprit=issue_data.get("culprit", "unknown"), level=issue_data.get("level", "error"), project=issue_data.get("project", {}).get("slug", "unknown"), first_seen=issue_data.get("firstSeen"), count=int(issue_data.get("count", 1)), message=event_data.get("message"), platform=event_data.get("platform"), tags=event_data.get("tags", []), stacktrace=self._extract_stacktrace(event_data), ) except Exception as e: logger.exception("sentry_parse_failed", error=str(e)) return None def _extract_stacktrace(self, event_data: dict) -> list[dict]: """提取 Stack Trace 最後 5 個 frame""" try: values = event_data.get("exception", {}).get("values", []) if not values: return [] frames = values[0].get("stacktrace", {}).get("frames", []) return [ { "filename": f.get("filename"), "function": f.get("function"), "lineno": f.get("lineno"), "context_line": f.get("context_line"), } for f in frames[-5:] ] except Exception: return [] async def analyze_with_openclaw( self, issue: SentryIssueContext, ) -> AIDecision | None: """ 透過 OpenClawHttpService 分析 Sentry Issue Args: issue: 已解析的 SentryIssueContext Returns: AIDecision 若分析成功,否則 None (降級:無 AI 分析仍繼續流程) """ try: service = get_openclaw_http_service() data = await service.analyze_error( error_context=issue.to_dict(), prefer_local=True, timeout=60.0, ) if not data: return None return AIDecision( root_cause=data.get("root_cause", "無法判斷根本原因"), impact=data.get("impact", "影響範圍未知"), fix_suggestion=data.get("fix_suggestion", "請人工排查"), prevention=data.get("prevention", "待補充"), confidence=float(data.get("confidence", 0.0)), analyzed_by=data.get("analyzed_by", "unknown"), ) except Exception as e: logger.exception("sentry_openclaw_analyze_failed", error=str(e)) return None async def create_incident( self, issue: SentryIssueContext, decision: AIDecision | None, anomaly_frequency: dict | None = None, ) -> str: """ 建立 Approval (Incident) 記錄 Args: issue: Sentry Issue 上下文 decision: AI 分析結果 (可為 None,降級處理) anomaly_frequency: 頻率統計 dict (可為 None) Returns: str: Approval ID """ import uuid try: approval_service = get_approval_service() # 基礎風險等級 risk_level = SENTRY_LEVEL_TO_RISK.get(issue.level, RiskLevel.MEDIUM) # 根據頻率升級 (ADR-037) if anomaly_frequency: escalation = anomaly_frequency.get("escalation_level") if escalation == "PERMANENT_FIX": risk_level = RiskLevel.CRITICAL elif escalation == "ESCALATE" and risk_level != RiskLevel.CRITICAL: risk_level = RiskLevel.HIGH metadata: dict = { "source": "sentry", "alert_type": f"sentry_{issue.level}", "sentry_issue_id": issue.issue_id, "sentry_project": issue.project, "culprit": issue.culprit, "error_count": issue.count, "first_seen": issue.first_seen, "stacktrace": issue.stacktrace, "llm_provider": decision.analyzed_by if decision else "pending", "llm_confidence": decision.confidence if decision else 0.0, } if anomaly_frequency: metadata["anomaly_frequency"] = anomaly_frequency approval_request = ApprovalRequestCreate( action=f"Sentry {issue.level.upper()} Alert: {issue.culprit}", description=( f"{issue.title}\n\n" f"Root Cause: {decision.root_cause if decision else '待分析'}\n" f"Suggestion: {decision.fix_suggestion if decision else '待 AI 分析'}" ), risk_level=risk_level, blast_radius=BlastRadius( affected_pods=1, estimated_downtime="0", related_services=[issue.project], data_impact=DataImpact.READ_ONLY, ), dry_run_checks=[], requested_by="sentry-webhook", metadata=metadata, ) approval = await approval_service.create_approval(request=approval_request) approval_id = str(approval.id) logger.info( "sentry_incident_created", approval_id=approval_id, issue_id=issue.issue_id, risk_level=risk_level.value, ) return approval_id except Exception as e: logger.exception("sentry_incident_creation_failed", error=str(e)) return f"temp-{uuid.uuid4().hex[:8]}" def build_telegram_message( self, issue: SentryIssueContext, decision: AIDecision | None, approval_id: str, anomaly_frequency: dict | None = None, ) -> str: """ 組裝 Telegram 告警訊息 (純文字摘要) 格式符合 feedback_telegram_alert_format.md 規範。 實際傳送使用 TelegramGateway.send_approval_card(), 此方法提供純文字版本供日誌記錄與測試。 Args: issue: Sentry Issue 上下文 decision: AI 分析結果 approval_id: Approval ID anomaly_frequency: 頻率統計 Returns: str: 格式化訊息文字 """ level_emoji = {"fatal": "💀", "error": "❌", "warning": "⚠️"}.get(issue.level, "🐛") freq_text = "" if anomaly_frequency and anomaly_frequency.get("count_24h", 0) > 1: freq_text = ( f"\n📊 頻率: " f"1h:{anomaly_frequency.get('count_1h', 0)} / " f"24h:{anomaly_frequency.get('count_24h', 0)} / " f"7d:{anomaly_frequency.get('count_7d', 0)}" ) analysis_text = "" if decision: analysis_text = ( f"\n───────────────────────────\n" f"🧠 OpenClaw 分析 (信心: {decision.confidence:.0%}):\n" f"「{decision.root_cause[:120]}」\n" f"💡 建議: {decision.fix_suggestion[:100]}" ) return ( f"═══════════════════════════\n" f"{level_emoji} SENTRY {issue.level.upper()} 告警\n" f"═══════════════════════════\n" f"📦 專案: {issue.project}\n" f"📍 {issue.culprit}\n" f"🔖 {issue.title[:100]}" f"{freq_text}" f"{analysis_text}\n" f"───────────────────────────\n" f"🆔 Approval: {approval_id}" ) # ============================================================================= # Webhook Secret 驗證 (ADR-048) # ============================================================================= class SentrySignatureError(Exception): """Sentry Webhook 簽章驗證失敗""" def verify_sentry_signature(body: bytes, signature_header: str | None) -> bool: """ 驗證 Sentry Webhook 請求的 HMAC-SHA256 簽章 Sentry 使用 Header: sentry-hook-signature (hmac-sha256 hex digest) Fail-Closed 安全策略 (對齊 GitHub Webhook ADR): - 生產環境: Secret 未設定 → 拒絕 - 開發環境: Secret 未設定 → 允許 (僅供本地測試) Args: body: Request body bytes signature_header: sentry-hook-signature header 值 Returns: bool: 驗證通過 Raises: SentrySignatureError: 驗證失敗 """ secret = settings.SENTRY_WEBHOOK_SECRET if not secret: if settings.ENVIRONMENT == "prod": logger.critical( "sentry_webhook_secret_missing_in_production", message="CRITICAL: SENTRY_WEBHOOK_SECRET missing in production!", ) raise SentrySignatureError( "SENTRY_WEBHOOK_SECRET missing in production environment" ) # 開發環境允許跳過 logger.warning( "sentry_signature_skipped_dev_only", reason="SENTRY_WEBHOOK_SECRET not configured (dev mode only)", ) return True if not signature_header: raise SentrySignatureError("Missing sentry-hook-signature header") expected = hmac.new( secret.encode(), body, hashlib.sha256, ).hexdigest() if not hmac.compare_digest(signature_header, expected): raise SentrySignatureError("Invalid sentry-hook-signature") return True # ============================================================================= # Singleton # ============================================================================= _sentry_webhook_service: SentryWebhookService | None = None def get_sentry_webhook_service() -> SentryWebhookService: """取得 SentryWebhookService 實例 (Singleton)""" global _sentry_webhook_service if _sentry_webhook_service is None: _sentry_webhook_service = SentryWebhookService() return _sentry_webhook_service