Files
awoooi/apps/api/tests/llm_testing/property_validators.py
OG T fe7fd7a3e0 feat(tests): ADR-018 LLM 測試策略三層架構
問題: LLM 測試因模型波動導致 CI 失敗

解決方案: 三層測試策略
- Tier 1 (CI): Schema 驗證 + Golden Responses
- Tier 2 (Nightly): 屬性測試 + Live LLM
- Tier 3 (Weekly): 語意相似度測試

新增檔案:
- ADR-018-llm-testing-strategy.md
- tests/llm_testing/ 框架
  - schema_validators.py: Pydantic Schema 驗證
  - property_validators.py: kubectl/風險等級驗證
  - golden_responses.py: 預錄回應管理
- tests/test_llm_tier1_schema.py: 35 個 Tier 1 測試

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-26 11:17:00 +08:00

273 lines
6.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Property Validators - Tier 2 測試
==================================
驗證 LLM 輸出的不變量屬性
版本: v1.0
建立: 2026-03-26 (台北時區)
"""
import re
import shlex
from dataclasses import dataclass
@dataclass
class ValidationResult:
"""驗證結果"""
is_valid: bool
message: str
details: dict | None = None
def validate_kubectl_syntax(command: str) -> ValidationResult:
"""
驗證 kubectl 命令語法有效性
檢查:
1. 命令可被 shell 解析
2. 第一個 token 是 kubectl
3. 包含有效的 kubectl 動詞
Args:
command: kubectl 命令字串
Returns:
ValidationResult
"""
if not command:
return ValidationResult(False, "命令為空")
command = command.strip()
# 移除可能的 $ 前綴
if command.startswith("$ "):
command = command[2:]
# 嘗試 shell 解析
try:
tokens = shlex.split(command)
except ValueError as e:
return ValidationResult(False, f"命令無法解析: {e}")
if not tokens:
return ValidationResult(False, "解析後無 token")
# 檢查第一個 token
if tokens[0] != "kubectl":
return ValidationResult(
False,
f"第一個 token 應為 kubectl實際為 {tokens[0]}"
)
# 有效的 kubectl 動詞
valid_verbs = {
"get", "describe", "create", "apply", "delete",
"edit", "scale", "rollout", "logs", "exec",
"port-forward", "top", "patch", "label", "annotate",
"cordon", "uncordon", "drain", "taint",
}
if len(tokens) < 2:
return ValidationResult(False, "缺少 kubectl 動詞")
verb = tokens[1]
if verb not in valid_verbs:
return ValidationResult(
False,
f"無效的 kubectl 動詞: {verb}",
details={"valid_verbs": list(valid_verbs)}
)
return ValidationResult(
True,
"kubectl 語法有效",
details={"verb": verb, "tokens": tokens}
)
def validate_risk_level(risk_level: str) -> ValidationResult:
"""
驗證風險等級是否為有效值
Args:
risk_level: 風險等級字串
Returns:
ValidationResult
"""
valid_levels = {"LOW", "MEDIUM", "HIGH", "CRITICAL"}
if not risk_level:
return ValidationResult(False, "風險等級為空")
normalized = risk_level.strip().upper()
if normalized in valid_levels:
return ValidationResult(True, f"有效風險等級: {normalized}")
# 嘗試模糊匹配
fuzzy_mappings = {
"": "LOW",
"": "MEDIUM",
"": "HIGH",
"": "CRITICAL",
"嚴重": "CRITICAL",
}
for key, value in fuzzy_mappings.items():
if key in risk_level:
return ValidationResult(
True,
f"模糊匹配: {risk_level} -> {value}",
details={"original": risk_level, "normalized": value}
)
return ValidationResult(
False,
f"無效風險等級: {risk_level}",
details={"valid_levels": list(valid_levels)}
)
def validate_chinese_ratio(text: str, min_ratio: float = 0.3) -> ValidationResult:
"""
驗證繁體中文比例
Args:
text: 待驗證文字
min_ratio: 最低中文比例 (預設 30%)
Returns:
ValidationResult
"""
if not text:
return ValidationResult(False, "文字為空")
# 中文字元 Unicode 範圍
chinese_pattern = re.compile(r"[\u4e00-\u9fff]")
chinese_chars = chinese_pattern.findall(text)
# 計算比例 (排除空白)
non_space_chars = len(text.replace(" ", "").replace("\n", ""))
if non_space_chars == 0:
return ValidationResult(False, "無有效字元")
ratio = len(chinese_chars) / non_space_chars
if ratio >= min_ratio:
return ValidationResult(
True,
f"中文比例 {ratio:.1%} >= {min_ratio:.0%}",
details={"ratio": ratio, "chinese_count": len(chinese_chars)}
)
return ValidationResult(
False,
f"中文比例 {ratio:.1%} < {min_ratio:.0%}",
details={"ratio": ratio, "chinese_count": len(chinese_chars)}
)
def validate_response_length(
text: str,
min_length: int = 10,
max_length: int = 500,
) -> ValidationResult:
"""
驗證回應長度
Args:
text: 待驗證文字
min_length: 最小長度
max_length: 最大長度
Returns:
ValidationResult
"""
if not text:
return ValidationResult(False, "文字為空")
length = len(text.strip())
if length < min_length:
return ValidationResult(
False,
f"回應過短: {length} < {min_length}",
details={"length": length}
)
if length > max_length:
return ValidationResult(
False,
f"回應過長: {length} > {max_length}",
details={"length": length}
)
return ValidationResult(
True,
f"回應長度 {length} 在 [{min_length}, {max_length}] 範圍內",
details={"length": length}
)
def extract_kubectl_from_text(text: str) -> str | None:
"""
從文字中提取 kubectl 命令
Args:
text: 包含 kubectl 的文字
Returns:
提取的 kubectl 命令或 None
"""
if not text:
return None
# 匹配 kubectl 開頭的整行
patterns = [
r"```(?:bash|shell|sh)?\s*(kubectl[^\n`]+)", # code block
r"^\$?\s*(kubectl[^\n]+)", # 以 $ 或直接開頭
r"(kubectl\s+\S+(?:\s+\S+)*)", # 一般匹配
]
for pattern in patterns:
match = re.search(pattern, text, re.MULTILINE | re.IGNORECASE)
if match:
return match.group(1).strip()
return None
def extract_risk_level_from_text(text: str) -> str | None:
"""
從文字中提取風險等級
Args:
text: 包含風險等級的文字
Returns:
提取的風險等級或 None
"""
if not text:
return None
# 直接匹配
for level in ["CRITICAL", "HIGH", "MEDIUM", "LOW"]:
if level in text.upper():
return level
# 中文匹配
chinese_mappings = [
(r"極高|危險|嚴重|緊急", "CRITICAL"),
(r"高風險|高度", "HIGH"),
(r"中等|中度|一般", "MEDIUM"),
(r"低風險|低度|輕微", "LOW"),
]
for pattern, level in chinese_mappings:
if re.search(pattern, text):
return level
return None