Files
awoooi/apps/api/tests/llm_testing/schema_validators.py
OG T fe7fd7a3e0 feat(tests): ADR-018 LLM 測試策略三層架構
問題: LLM 測試因模型波動導致 CI 失敗

解決方案: 三層測試策略
- Tier 1 (CI): Schema 驗證 + Golden Responses
- Tier 2 (Nightly): 屬性測試 + Live LLM
- Tier 3 (Weekly): 語意相似度測試

新增檔案:
- ADR-018-llm-testing-strategy.md
- tests/llm_testing/ 框架
  - schema_validators.py: Pydantic Schema 驗證
  - property_validators.py: kubectl/風險等級驗證
  - golden_responses.py: 預錄回應管理
- tests/test_llm_tier1_schema.py: 35 個 Tier 1 測試

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-26 11:17:00 +08:00

155 lines
3.7 KiB
Python

"""
Schema Validators - Tier 1 測試
================================
驗證 LLM 輸出符合預定義 Schema
版本: v1.0
建立: 2026-03-26 (台北時區)
"""
import json
import re
from typing import Any, Literal
from pydantic import BaseModel, Field, ValidationError
class LLMProposalOutput(BaseModel):
"""
LLM 提案輸出 Schema
OpenClaw 必須輸出此格式的 JSON
"""
risk_level: Literal["LOW", "MEDIUM", "HIGH", "CRITICAL"] = Field(
description="風險等級"
)
kubectl_command: str | None = Field(
None,
description="kubectl 命令 (可選)"
)
action_description: str = Field(
description="行動描述"
)
reasoning: str = Field(
description="推理過程"
)
confidence: float = Field(
ge=0.0,
le=1.0,
description="信心度 0-1"
)
class LLMAnalysisOutput(BaseModel):
"""
LLM 分析輸出 Schema
用於告警分析回應
"""
root_cause: str = Field(
description="根因分析"
)
severity: Literal["P0", "P1", "P2", "P3"] = Field(
description="嚴重度"
)
affected_services: list[str] = Field(
default_factory=list,
description="受影響服務"
)
recommended_actions: list[str] = Field(
default_factory=list,
description="建議行動"
)
def validate_proposal_schema(response: str) -> tuple[bool, str, LLMProposalOutput | None]:
"""
驗證 LLM 回應是否符合 Proposal Schema
Args:
response: LLM 原始回應
Returns:
(is_valid, error_message, parsed_output)
"""
# 嘗試提取 JSON
json_str = extract_json_from_response(response)
if not json_str:
return False, "無法從回應中提取 JSON", None
# 解析 JSON
try:
data = json.loads(json_str)
except json.JSONDecodeError as e:
return False, f"JSON 解析失敗: {e}", None
# Schema 驗證
try:
output = LLMProposalOutput.model_validate(data)
return True, "", output
except ValidationError as e:
errors = "; ".join([f"{err['loc']}: {err['msg']}" for err in e.errors()])
return False, f"Schema 驗證失敗: {errors}", None
def validate_analysis_schema(response: str) -> tuple[bool, str, LLMAnalysisOutput | None]:
"""
驗證 LLM 回應是否符合 Analysis Schema
Args:
response: LLM 原始回應
Returns:
(is_valid, error_message, parsed_output)
"""
json_str = extract_json_from_response(response)
if not json_str:
return False, "無法從回應中提取 JSON", None
try:
data = json.loads(json_str)
except json.JSONDecodeError as e:
return False, f"JSON 解析失敗: {e}", None
try:
output = LLMAnalysisOutput.model_validate(data)
return True, "", output
except ValidationError as e:
errors = "; ".join([f"{err['loc']}: {err['msg']}" for err in e.errors()])
return False, f"Schema 驗證失敗: {errors}", None
def extract_json_from_response(response: str) -> str | None:
"""
從 LLM 回應中提取 JSON
支援:
- 純 JSON
- ```json ... ``` 包裹
- 混合文字中的 JSON
"""
if not response:
return None
response = response.strip()
# Case 1: 純 JSON
if response.startswith("{") and response.endswith("}"):
return response
# Case 2: ```json 包裹
json_block_pattern = r"```(?:json)?\s*([\s\S]*?)```"
match = re.search(json_block_pattern, response)
if match:
return match.group(1).strip()
# Case 3: 尋找 { ... } 區塊
brace_pattern = r"\{[\s\S]*\}"
match = re.search(brace_pattern, response)
if match:
return match.group(0)
return None