awoooi/apps/api/tests/llm_testing/schema_validators.py

"""
Schema Validators - Tier 1 測試
================================
驗證 LLM 輸出符合預定義 Schema

版本: v1.0
建立: 2026-03-26 (台北時區)
"""

import json
import re
from typing import Any, Literal

from pydantic import BaseModel, Field, ValidationError


class LLMProposalOutput(BaseModel):
    """
    LLM 提案輸出 Schema

    OpenClaw 必須輸出此格式的 JSON
    """

    risk_level: Literal["LOW", "MEDIUM", "HIGH", "CRITICAL"] = Field(
        description="風險等級"
    )
    kubectl_command: str | None = Field(
        None,
        description="kubectl 命令 (可選)"
    )
    action_description: str = Field(
        description="行動描述"
    )
    reasoning: str = Field(
        description="推理過程"
    )
    confidence: float = Field(
        ge=0.0,
        le=1.0,
        description="信心度 0-1"
    )


class LLMAnalysisOutput(BaseModel):
    """
    LLM 分析輸出 Schema

    用於告警分析回應
    """

    root_cause: str = Field(
        description="根因分析"
    )
    severity: Literal["P0", "P1", "P2", "P3"] = Field(
        description="嚴重度"
    )
    affected_services: list[str] = Field(
        default_factory=list,
        description="受影響服務"
    )
    recommended_actions: list[str] = Field(
        default_factory=list,
        description="建議行動"
    )


def validate_proposal_schema(response: str) -> tuple[bool, str, LLMProposalOutput | None]:
    """
    驗證 LLM 回應是否符合 Proposal Schema

    Args:
        response: LLM 原始回應

    Returns:
        (is_valid, error_message, parsed_output)
    """
    # 嘗試提取 JSON
    json_str = extract_json_from_response(response)
    if not json_str:
        return False, "無法從回應中提取 JSON", None

    # 解析 JSON
    try:
        data = json.loads(json_str)
    except json.JSONDecodeError as e:
        return False, f"JSON 解析失敗: {e}", None

    # Schema 驗證
    try:
        output = LLMProposalOutput.model_validate(data)
        return True, "", output
    except ValidationError as e:
        errors = "; ".join([f"{err['loc']}: {err['msg']}" for err in e.errors()])
        return False, f"Schema 驗證失敗: {errors}", None


def validate_analysis_schema(response: str) -> tuple[bool, str, LLMAnalysisOutput | None]:
    """
    驗證 LLM 回應是否符合 Analysis Schema

    Args:
        response: LLM 原始回應

    Returns:
        (is_valid, error_message, parsed_output)
    """
    json_str = extract_json_from_response(response)
    if not json_str:
        return False, "無法從回應中提取 JSON", None

    try:
        data = json.loads(json_str)
    except json.JSONDecodeError as e:
        return False, f"JSON 解析失敗: {e}", None

    try:
        output = LLMAnalysisOutput.model_validate(data)
        return True, "", output
    except ValidationError as e:
        errors = "; ".join([f"{err['loc']}: {err['msg']}" for err in e.errors()])
        return False, f"Schema 驗證失敗: {errors}", None


def extract_json_from_response(response: str) -> str | None:
    """
    從 LLM 回應中提取 JSON

    支援:
    - 純 JSON
    - ```json ... ``` 包裹
    - 混合文字中的 JSON
    """
    if not response:
        return None

    response = response.strip()

    # Case 1: 純 JSON
    if response.startswith("{") and response.endswith("}"):
        return response

    # Case 2: ```json 包裹
    json_block_pattern = r"```(?:json)?\s*([\s\S]*?)```"
    match = re.search(json_block_pattern, response)
    if match:
        return match.group(1).strip()

    # Case 3: 尋找 { ... } 區塊
    brace_pattern = r"\{[\s\S]*\}"
    match = re.search(brace_pattern, response)
    if match:
        return match.group(0)

    return None