Phase S 技術債修復 (首席架構師審查 82→完整): - S-01: generate_alert_fingerprint 移至 AlertAnalyzer.generate_fingerprint() staticmethod - S-04: 移除 Pydantic v2 deprecated json_encoders (直接用原生 datetime 序列化) Sentry MCP 整合 (Phase 23): - ADR-048: Sentry→OpenClaw AI Triage 架構決策 - sentry_webhook_service.py: parse/analyze/create_incident/build_message Service 層 - config.py: SENTRY_WEBHOOK_SECRET (Fail-Closed HMAC-SHA256) Playwright MCP 整合 (短期): - smoke.spec.ts: 5 頁面 E2E smoke test (home/dashboard/incidents/approvals/terminal) - cd.yaml: E2E Smoke Test 步驟 + Telegram 🎭 Smoke 狀態通知 長期規劃 ADR: - ADR-049: Figma Code Connect 設計系統同步 - ADR-050: Telegram 互動式 Incident 2.0 (6鍵 Inline Keyboard) - ADR-051: Context7 依賴升級顧問 (Next.js 14→15, FastAPI 0.115→0.128) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
453 lines
15 KiB
Python
453 lines
15 KiB
Python
"""
|
||
Sentry Webhook Service
|
||
======================
|
||
Phase 23 (ADR-048): Sentry → OpenClaw AI Triage 業務邏輯層
|
||
|
||
遵循 leWOOOgo 積木化原則:
|
||
- Service 層負責: 解析、分析、建立 Incident、組裝訊息
|
||
- Router 層只做: 接收、驗證、呼叫 Service、回傳
|
||
- 禁止 Router 層直接存取 Redis/DB
|
||
|
||
版本: v1.0
|
||
建立: 2026-04-01 (台北時區)
|
||
建立者: Claude Code (Phase 23 ADR-048)
|
||
"""
|
||
|
||
import hashlib
|
||
import hmac
|
||
|
||
import structlog
|
||
|
||
from src.core.config import settings
|
||
from src.models.approval import (
|
||
ApprovalRequestCreate,
|
||
BlastRadius,
|
||
DataImpact,
|
||
RiskLevel,
|
||
)
|
||
from src.services.approval_db import get_approval_service
|
||
from src.services.openclaw_http_service import get_openclaw_http_service
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
# Sentry Level → Risk Level 映射 (ADR-048)
|
||
SENTRY_LEVEL_TO_RISK: dict[str, RiskLevel] = {
|
||
"fatal": RiskLevel.CRITICAL,
|
||
"error": RiskLevel.HIGH,
|
||
"warning": RiskLevel.MEDIUM,
|
||
"info": RiskLevel.LOW,
|
||
}
|
||
|
||
|
||
# =============================================================================
|
||
# Data Models (純資料容器,不含業務邏輯)
|
||
# =============================================================================
|
||
|
||
class SentryIssueContext:
|
||
"""
|
||
解析後的 Sentry Issue 上下文
|
||
|
||
從 Sentry webhook payload 提取關鍵欄位,
|
||
供後續 OpenClaw 分析與 Incident 建立使用。
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
issue_id: str,
|
||
title: str,
|
||
culprit: str,
|
||
level: str,
|
||
project: str,
|
||
first_seen: str | None,
|
||
count: int,
|
||
message: str | None,
|
||
platform: str | None,
|
||
tags: list,
|
||
stacktrace: list[dict],
|
||
) -> None:
|
||
self.issue_id = issue_id
|
||
self.title = title
|
||
self.culprit = culprit
|
||
self.level = level
|
||
self.project = project
|
||
self.first_seen = first_seen
|
||
self.count = count
|
||
self.message = message
|
||
self.platform = platform
|
||
self.tags = tags
|
||
self.stacktrace = stacktrace
|
||
|
||
def to_dict(self) -> dict:
|
||
return {
|
||
"issue_id": self.issue_id,
|
||
"title": self.title,
|
||
"culprit": self.culprit,
|
||
"level": self.level,
|
||
"project": self.project,
|
||
"first_seen": self.first_seen,
|
||
"count": self.count,
|
||
"message": self.message,
|
||
"platform": self.platform,
|
||
"tags": self.tags,
|
||
"stacktrace": self.stacktrace,
|
||
}
|
||
|
||
|
||
class AIDecision:
|
||
"""
|
||
OpenClaw AI 分析結果
|
||
|
||
封裝 OpenClaw 對 Sentry Issue 的分析輸出,
|
||
包含根因、影響範圍、修復建議、預防措施。
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
root_cause: str,
|
||
impact: str,
|
||
fix_suggestion: str,
|
||
prevention: str,
|
||
confidence: float,
|
||
analyzed_by: str,
|
||
) -> None:
|
||
self.root_cause = root_cause
|
||
self.impact = impact
|
||
self.fix_suggestion = fix_suggestion
|
||
self.prevention = prevention
|
||
self.confidence = confidence
|
||
self.analyzed_by = analyzed_by
|
||
|
||
|
||
# =============================================================================
|
||
# SentryWebhookService
|
||
# =============================================================================
|
||
|
||
class SentryWebhookService:
|
||
"""
|
||
Sentry Webhook 業務邏輯 Service
|
||
|
||
職責:
|
||
1. parse_sentry_issue() - 解析 webhook payload → SentryIssueContext
|
||
2. analyze_with_openclaw() - 呼叫 OpenClaw 分析 → AIDecision | None
|
||
3. create_incident() - 建立 Approval (Incident) 記錄
|
||
4. build_telegram_message() - 組裝 Telegram 告警訊息
|
||
|
||
leWOOOgo 積木化原則:
|
||
- 禁止直接存取 Redis/DB,透過對應 Service 呼叫
|
||
- 每個方法單一職責
|
||
"""
|
||
|
||
def parse_sentry_issue(self, payload: dict) -> SentryIssueContext | None:
|
||
"""
|
||
解析 Sentry Issue Alert Payload → SentryIssueContext
|
||
|
||
Args:
|
||
payload: Sentry webhook raw JSON dict
|
||
|
||
Returns:
|
||
SentryIssueContext 若解析成功,否則 None
|
||
|
||
Sentry Payload 結構:
|
||
{
|
||
"action": "triggered",
|
||
"data": {
|
||
"issue": { id, title, culprit, level, firstSeen, count, project },
|
||
"event": { message, platform, tags, exception }
|
||
}
|
||
}
|
||
"""
|
||
try:
|
||
issue_data = payload.get("data", {}).get("issue", {})
|
||
event_data = payload.get("data", {}).get("event", {})
|
||
|
||
issue_id = issue_data.get("id")
|
||
if not issue_id:
|
||
logger.warning("sentry_parse_missing_issue_id")
|
||
return None
|
||
|
||
return SentryIssueContext(
|
||
issue_id=str(issue_id),
|
||
title=issue_data.get("title", "Unknown Error"),
|
||
culprit=issue_data.get("culprit", "unknown"),
|
||
level=issue_data.get("level", "error"),
|
||
project=issue_data.get("project", {}).get("slug", "unknown"),
|
||
first_seen=issue_data.get("firstSeen"),
|
||
count=int(issue_data.get("count", 1)),
|
||
message=event_data.get("message"),
|
||
platform=event_data.get("platform"),
|
||
tags=event_data.get("tags", []),
|
||
stacktrace=self._extract_stacktrace(event_data),
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.exception("sentry_parse_failed", error=str(e))
|
||
return None
|
||
|
||
def _extract_stacktrace(self, event_data: dict) -> list[dict]:
|
||
"""提取 Stack Trace 最後 5 個 frame"""
|
||
try:
|
||
values = event_data.get("exception", {}).get("values", [])
|
||
if not values:
|
||
return []
|
||
frames = values[0].get("stacktrace", {}).get("frames", [])
|
||
return [
|
||
{
|
||
"filename": f.get("filename"),
|
||
"function": f.get("function"),
|
||
"lineno": f.get("lineno"),
|
||
"context_line": f.get("context_line"),
|
||
}
|
||
for f in frames[-5:]
|
||
]
|
||
except Exception:
|
||
return []
|
||
|
||
async def analyze_with_openclaw(
|
||
self,
|
||
issue: SentryIssueContext,
|
||
) -> AIDecision | None:
|
||
"""
|
||
透過 OpenClawHttpService 分析 Sentry Issue
|
||
|
||
Args:
|
||
issue: 已解析的 SentryIssueContext
|
||
|
||
Returns:
|
||
AIDecision 若分析成功,否則 None (降級:無 AI 分析仍繼續流程)
|
||
"""
|
||
try:
|
||
service = get_openclaw_http_service()
|
||
data = await service.analyze_error(
|
||
error_context=issue.to_dict(),
|
||
prefer_local=True,
|
||
timeout=60.0,
|
||
)
|
||
if not data:
|
||
return None
|
||
|
||
return AIDecision(
|
||
root_cause=data.get("root_cause", "無法判斷根本原因"),
|
||
impact=data.get("impact", "影響範圍未知"),
|
||
fix_suggestion=data.get("fix_suggestion", "請人工排查"),
|
||
prevention=data.get("prevention", "待補充"),
|
||
confidence=float(data.get("confidence", 0.0)),
|
||
analyzed_by=data.get("analyzed_by", "unknown"),
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.exception("sentry_openclaw_analyze_failed", error=str(e))
|
||
return None
|
||
|
||
async def create_incident(
|
||
self,
|
||
issue: SentryIssueContext,
|
||
decision: AIDecision | None,
|
||
anomaly_frequency: dict | None = None,
|
||
) -> str:
|
||
"""
|
||
建立 Approval (Incident) 記錄
|
||
|
||
Args:
|
||
issue: Sentry Issue 上下文
|
||
decision: AI 分析結果 (可為 None,降級處理)
|
||
anomaly_frequency: 頻率統計 dict (可為 None)
|
||
|
||
Returns:
|
||
str: Approval ID
|
||
"""
|
||
import uuid
|
||
|
||
try:
|
||
approval_service = get_approval_service()
|
||
|
||
# 基礎風險等級
|
||
risk_level = SENTRY_LEVEL_TO_RISK.get(issue.level, RiskLevel.MEDIUM)
|
||
|
||
# 根據頻率升級 (ADR-037)
|
||
if anomaly_frequency:
|
||
escalation = anomaly_frequency.get("escalation_level")
|
||
if escalation == "PERMANENT_FIX":
|
||
risk_level = RiskLevel.CRITICAL
|
||
elif escalation == "ESCALATE" and risk_level != RiskLevel.CRITICAL:
|
||
risk_level = RiskLevel.HIGH
|
||
|
||
metadata: dict = {
|
||
"source": "sentry",
|
||
"alert_type": f"sentry_{issue.level}",
|
||
"sentry_issue_id": issue.issue_id,
|
||
"sentry_project": issue.project,
|
||
"culprit": issue.culprit,
|
||
"error_count": issue.count,
|
||
"first_seen": issue.first_seen,
|
||
"stacktrace": issue.stacktrace,
|
||
"llm_provider": decision.analyzed_by if decision else "pending",
|
||
"llm_confidence": decision.confidence if decision else 0.0,
|
||
}
|
||
if anomaly_frequency:
|
||
metadata["anomaly_frequency"] = anomaly_frequency
|
||
|
||
approval_request = ApprovalRequestCreate(
|
||
action=f"Sentry {issue.level.upper()} Alert: {issue.culprit}",
|
||
description=(
|
||
f"{issue.title}\n\n"
|
||
f"Root Cause: {decision.root_cause if decision else '待分析'}\n"
|
||
f"Suggestion: {decision.fix_suggestion if decision else '待 AI 分析'}"
|
||
),
|
||
risk_level=risk_level,
|
||
blast_radius=BlastRadius(
|
||
affected_pods=1,
|
||
estimated_downtime="0",
|
||
related_services=[issue.project],
|
||
data_impact=DataImpact.READ_ONLY,
|
||
),
|
||
dry_run_checks=[],
|
||
requested_by="sentry-webhook",
|
||
metadata=metadata,
|
||
)
|
||
|
||
approval = await approval_service.create_approval(request=approval_request)
|
||
approval_id = str(approval.id)
|
||
|
||
logger.info(
|
||
"sentry_incident_created",
|
||
approval_id=approval_id,
|
||
issue_id=issue.issue_id,
|
||
risk_level=risk_level.value,
|
||
)
|
||
return approval_id
|
||
|
||
except Exception as e:
|
||
logger.exception("sentry_incident_creation_failed", error=str(e))
|
||
return f"temp-{uuid.uuid4().hex[:8]}"
|
||
|
||
def build_telegram_message(
|
||
self,
|
||
issue: SentryIssueContext,
|
||
decision: AIDecision | None,
|
||
approval_id: str,
|
||
anomaly_frequency: dict | None = None,
|
||
) -> str:
|
||
"""
|
||
組裝 Telegram 告警訊息 (純文字摘要)
|
||
|
||
格式符合 feedback_telegram_alert_format.md 規範。
|
||
實際傳送使用 TelegramGateway.send_approval_card(),
|
||
此方法提供純文字版本供日誌記錄與測試。
|
||
|
||
Args:
|
||
issue: Sentry Issue 上下文
|
||
decision: AI 分析結果
|
||
approval_id: Approval ID
|
||
anomaly_frequency: 頻率統計
|
||
|
||
Returns:
|
||
str: 格式化訊息文字
|
||
"""
|
||
level_emoji = {"fatal": "💀", "error": "❌", "warning": "⚠️"}.get(issue.level, "🐛")
|
||
freq_text = ""
|
||
if anomaly_frequency and anomaly_frequency.get("count_24h", 0) > 1:
|
||
freq_text = (
|
||
f"\n📊 頻率: "
|
||
f"1h:{anomaly_frequency.get('count_1h', 0)} / "
|
||
f"24h:{anomaly_frequency.get('count_24h', 0)} / "
|
||
f"7d:{anomaly_frequency.get('count_7d', 0)}"
|
||
)
|
||
|
||
analysis_text = ""
|
||
if decision:
|
||
analysis_text = (
|
||
f"\n───────────────────────────\n"
|
||
f"🧠 OpenClaw 分析 (信心: {decision.confidence:.0%}):\n"
|
||
f"「{decision.root_cause[:120]}」\n"
|
||
f"💡 建議: {decision.fix_suggestion[:100]}"
|
||
)
|
||
|
||
return (
|
||
f"═══════════════════════════\n"
|
||
f"{level_emoji} SENTRY {issue.level.upper()} 告警\n"
|
||
f"═══════════════════════════\n"
|
||
f"📦 專案: {issue.project}\n"
|
||
f"📍 {issue.culprit}\n"
|
||
f"🔖 {issue.title[:100]}"
|
||
f"{freq_text}"
|
||
f"{analysis_text}\n"
|
||
f"───────────────────────────\n"
|
||
f"🆔 Approval: {approval_id}"
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# Webhook Secret 驗證 (ADR-048)
|
||
# =============================================================================
|
||
|
||
class SentrySignatureError(Exception):
|
||
"""Sentry Webhook 簽章驗證失敗"""
|
||
|
||
|
||
def verify_sentry_signature(body: bytes, signature_header: str | None) -> bool:
|
||
"""
|
||
驗證 Sentry Webhook 請求的 HMAC-SHA256 簽章
|
||
|
||
Sentry 使用 Header: sentry-hook-signature (hmac-sha256 hex digest)
|
||
|
||
Fail-Closed 安全策略 (對齊 GitHub Webhook ADR):
|
||
- 生產環境: Secret 未設定 → 拒絕
|
||
- 開發環境: Secret 未設定 → 允許 (僅供本地測試)
|
||
|
||
Args:
|
||
body: Request body bytes
|
||
signature_header: sentry-hook-signature header 值
|
||
|
||
Returns:
|
||
bool: 驗證通過
|
||
|
||
Raises:
|
||
SentrySignatureError: 驗證失敗
|
||
"""
|
||
secret = settings.SENTRY_WEBHOOK_SECRET
|
||
|
||
if not secret:
|
||
if settings.ENVIRONMENT == "prod":
|
||
logger.critical(
|
||
"sentry_webhook_secret_missing_in_production",
|
||
message="CRITICAL: SENTRY_WEBHOOK_SECRET missing in production!",
|
||
)
|
||
raise SentrySignatureError(
|
||
"SENTRY_WEBHOOK_SECRET missing in production environment"
|
||
)
|
||
# 開發環境允許跳過
|
||
logger.warning(
|
||
"sentry_signature_skipped_dev_only",
|
||
reason="SENTRY_WEBHOOK_SECRET not configured (dev mode only)",
|
||
)
|
||
return True
|
||
|
||
if not signature_header:
|
||
raise SentrySignatureError("Missing sentry-hook-signature header")
|
||
|
||
expected = hmac.new(
|
||
secret.encode(),
|
||
body,
|
||
hashlib.sha256,
|
||
).hexdigest()
|
||
|
||
if not hmac.compare_digest(signature_header, expected):
|
||
raise SentrySignatureError("Invalid sentry-hook-signature")
|
||
|
||
return True
|
||
|
||
|
||
# =============================================================================
|
||
# Singleton
|
||
# =============================================================================
|
||
|
||
_sentry_webhook_service: SentryWebhookService | None = None
|
||
|
||
|
||
def get_sentry_webhook_service() -> SentryWebhookService:
|
||
"""取得 SentryWebhookService 實例 (Singleton)"""
|
||
global _sentry_webhook_service
|
||
if _sentry_webhook_service is None:
|
||
_sentry_webhook_service = SentryWebhookService()
|
||
return _sentry_webhook_service
|