awoooi/apps/api/src/services/error_analyzer_service.py

"""
Error Analyzer Service - #39 Sentry 錯誤 AI 分析
=================================================
Phase 10: Sentry + OpenClaw + UI 整合

功能:
1. 接收 Sentry Issue + Stacktrace 數據
2. 使用 OpenClaw LLM 進行根因分析
3. 生成修復建議與預防措施

遵循 leWOOOgo 積木化原則:
- Service 層負責業務邏輯
- 不直接存取 Redis/DB
- 使用 DI 支援測試

版本: v1.0
建立: 2026-03-26 18:45 (台北時區)
建立者: Claude Code (#39 Error Analyzer Agent)
"""

import json
from typing import Protocol, runtime_checkable

from pydantic import BaseModel, Field

from src.core.logging import get_logger
from src.utils.timezone import now_taipei_iso

logger = get_logger("awoooi.error_analyzer")


# =============================================================================
# Error Analysis Prompt
# =============================================================================

ERROR_ANALYZER_SYSTEM_PROMPT = """# OpenClaw Error Analyzer - AWOOOI 錯誤分析專家

You are a senior Software Engineer specialized in debugging and error analysis.

## 🌐 Language Requirement (CRITICAL)
- You MUST respond in **Traditional Chinese (繁體中文/正體中文)** for all text fields
- FORBIDDEN: Simplified Chinese characters (简体字)
- Use Taiwan locale conventions (台灣用語)

## 🎯 Your Mission
Analyze the given error from Sentry and provide:
1. **Root Cause Analysis** - Why did this error occur?
2. **Impact Assessment** - How serious is this error?
3. **Fix Recommendations** - How to fix this error?
4. **Prevention Suggestions** - How to prevent recurrence?

## 📊 Analysis Categories
- **CODE_BUG**: Logic error, null pointer, type error
- **DEPENDENCY**: Third-party library issue, version conflict
- **CONFIGURATION**: Missing env var, wrong config
- **INFRASTRUCTURE**: Network, timeout, resource exhaustion
- **DATA_INTEGRITY**: Corrupt data, schema mismatch
- **EXTERNAL_SERVICE**: API failure, rate limit
- **UNKNOWN**: Cannot determine from available information

## ⚠️ Output Rules
- Respond with ONLY valid JSON
- confidence MUST be between 0.0 and 1.0
- severity MUST be one of: LOW, MEDIUM, HIGH, CRITICAL
- All text fields in Traditional Chinese

## 📋 JSON Schema (REQUIRED)
```json
{
  "root_cause": "string - 根因分析 (繁體中文)",
  "category": "CODE_BUG|DEPENDENCY|CONFIGURATION|INFRASTRUCTURE|DATA_INTEGRITY|EXTERNAL_SERVICE|UNKNOWN",
  "severity": "LOW|MEDIUM|HIGH|CRITICAL",
  "impact_assessment": "string - 影響評估 (繁體中文)",
  "fix_recommendation": {
    "summary": "string - 修復摘要",
    "steps": ["array - 修復步驟"],
    "code_suggestion": "string | null - 建議的代碼修改"
  },
  "prevention": [
    {
      "type": "CODE_REVIEW|UNIT_TEST|MONITORING|VALIDATION|ERROR_HANDLING",
      "description": "string - 預防措施描述"
    }
  ],
  "related_files": ["array - 可能相關的檔案路徑"],
  "confidence": "number - 0.0 to 1.0",
  "reasoning": "string - 分析推理過程 (繁體中文)"
}
```

Now analyze the following error:
"""


# =============================================================================
# Response Models
# =============================================================================


class FixRecommendation(BaseModel):
    """修復建議"""

    summary: str = Field(description="修復摘要")
    steps: list[str] = Field(default_factory=list, description="修復步驟")
    code_suggestion: str | None = Field(None, description="建議的代碼修改")


class PreventionMeasure(BaseModel):
    """預防措施"""

    type: str = Field(description="類型 (CODE_REVIEW, UNIT_TEST, etc.)")
    description: str = Field(description="描述")


class ErrorAnalysisResult(BaseModel):
    """錯誤分析結果"""

    root_cause: str = Field(description="根因分析")
    category: str = Field(description="分類")
    severity: str = Field(description="嚴重度")
    impact_assessment: str = Field(description="影響評估")
    fix_recommendation: FixRecommendation = Field(description="修復建議")
    prevention: list[PreventionMeasure] = Field(
        default_factory=list, description="預防措施"
    )
    related_files: list[str] = Field(default_factory=list, description="相關檔案")
    confidence: float = Field(description="信心度")
    reasoning: str = Field(description="分析推理過程")


# =============================================================================
# Protocol Interface
# =============================================================================


@runtime_checkable
class ILLMProvider(Protocol):
    """LLM Provider Protocol"""

    async def call(self, prompt: str) -> tuple[str, str, bool]:
        """
        呼叫 LLM

        Returns:
            (response, provider_name, success)
        """
        ...


# =============================================================================
# Error Analyzer Service
# =============================================================================


class ErrorAnalyzerService:
    """
    Error Analyzer Service - Sentry 錯誤 AI 分析

    職責:
    1. 組裝分析 Prompt
    2. 呼叫 OpenClaw LLM
    3. 解析並驗證分析結果
    """

    def __init__(self, llm_provider: ILLMProvider | None = None) -> None:
        """
        初始化 Error Analyzer Service

        Args:
            llm_provider: LLM 提供者 (預設使用 OpenClaw)
        """
        self._llm_provider = llm_provider

    async def _get_llm_provider(self) -> ILLMProvider:
        """取得 LLM Provider (lazy init)"""
        if self._llm_provider is None:
            from src.services.openclaw import get_openclaw

            self._llm_provider = get_openclaw()
        return self._llm_provider

    async def analyze_error(
        self,
        issue_id: str,
        title: str,
        level: str,
        culprit: str | None,
        count: int,
        stacktrace: str,
        context: dict | None = None,
    ) -> tuple[ErrorAnalysisResult | None, str, bool]:
        """
        分析 Sentry 錯誤

        Args:
            issue_id: Sentry Issue ID
            title: 錯誤標題
            level: 嚴重度 (error, warning, etc.)
            culprit: 錯誤來源 (函數/檔案)
            count: 發生次數
            stacktrace: 堆疊追蹤
            context: 額外上下文 (browser, os, tags, etc.)

        Returns:
            (analysis_result, provider, success)
        """
        # 組裝 Prompt
        error_context = {
            "issue_id": issue_id,
            "title": title,
            "level": level,
            "culprit": culprit,
            "occurrence_count": count,
            "stacktrace": stacktrace,
            "context": context or {},
            "analyzed_at": now_taipei_iso(),
        }

        prompt = ERROR_ANALYZER_SYSTEM_PROMPT + "\n```json\n"
        prompt += json.dumps(error_context, ensure_ascii=False, indent=2)
        prompt += "\n```"

        logger.info(
            "error_analysis_start",
            issue_id=issue_id,
            title=title,
            level=level,
        )

        # 呼叫 LLM
        try:
            llm = await self._get_llm_provider()
            response, provider, success = await llm.call(prompt)

            if not success:
                logger.error(
                    "error_analysis_llm_failed",
                    issue_id=issue_id,
                    provider=provider,
                )
                return None, provider, False

            logger.info(
                "error_analysis_llm_response",
                issue_id=issue_id,
                provider=provider,
                response_length=len(response),
            )

            # 解析結果
            result = self._parse_analysis_result(response)

            if result:
                logger.info(
                    "error_analysis_complete",
                    issue_id=issue_id,
                    category=result.category,
                    severity=result.severity,
                    confidence=result.confidence,
                )
            else:
                logger.warning(
                    "error_analysis_parse_failed",
                    issue_id=issue_id,
                    raw_response=response[:300],
                )

            return result, provider, True

        except Exception as e:
            logger.exception(
                "error_analysis_failed",
                issue_id=issue_id,
                error=str(e),
            )
            return None, "error", False

    def _parse_analysis_result(self, raw_response: str) -> ErrorAnalysisResult | None:
        """
        解析 LLM 回應為結構化結果

        Args:
            raw_response: LLM 原始回應

        Returns:
            解析後的 ErrorAnalysisResult，解析失敗返回 None
        """
        try:
            # 嘗試找到 JSON 區塊
            json_str = raw_response

            # 處理可能的 markdown 包裝
            if "```json" in raw_response:
                start = raw_response.find("```json") + 7
                end = raw_response.find("```", start)
                if end > start:
                    json_str = raw_response[start:end]
            elif "```" in raw_response:
                start = raw_response.find("```") + 3
                end = raw_response.find("```", start)
                if end > start:
                    json_str = raw_response[start:end]

            # 解析 JSON
            data = json.loads(json_str.strip())

            # 建立 FixRecommendation
            fix_data = data.get("fix_recommendation", {})
            fix_recommendation = FixRecommendation(
                summary=fix_data.get("summary", "無建議"),
                steps=fix_data.get("steps", []),
                code_suggestion=fix_data.get("code_suggestion"),
            )

            # 建立 PreventionMeasure 列表
            prevention = []
            for p in data.get("prevention", []):
                prevention.append(PreventionMeasure(
                    type=p.get("type", "UNKNOWN"),
                    description=p.get("description", ""),
                ))

            # 建立最終結果
            return ErrorAnalysisResult(
                root_cause=data.get("root_cause", "無法判斷根因"),
                category=data.get("category", "UNKNOWN"),
                severity=data.get("severity", "MEDIUM"),
                impact_assessment=data.get("impact_assessment", "影響評估中"),
                fix_recommendation=fix_recommendation,
                prevention=prevention,
                related_files=data.get("related_files", []),
                confidence=float(data.get("confidence", 0.0)),  # 🔴 無信心度=規則匹配
                reasoning=data.get("reasoning", ""),
            )

        except json.JSONDecodeError as e:
            logger.warning(
                "error_analysis_json_decode_failed",
                error=str(e),
                raw_response=raw_response[:200],
            )
            return None
        except Exception as e:
            logger.warning(
                "error_analysis_parse_error",
                error=str(e),
            )
            return None


# =============================================================================
# Singleton
# =============================================================================

_error_analyzer_service: ErrorAnalyzerService | None = None


def get_error_analyzer_service() -> ErrorAnalyzerService:
    """取得 Error Analyzer Service 實例 (Singleton)"""
    global _error_analyzer_service
    if _error_analyzer_service is None:
        _error_analyzer_service = ErrorAnalyzerService()
    return _error_analyzer_service


def set_error_analyzer_service(service: ErrorAnalyzerService) -> None:
    """設定 Error Analyzer Service 實例 (for testing)"""
    global _error_analyzer_service
    _error_analyzer_service = service