""" Property Validators - Tier 2 測試 ================================== 驗證 LLM 輸出的不變量屬性 版本: v1.0 建立: 2026-03-26 (台北時區) """ import re import shlex from dataclasses import dataclass @dataclass class ValidationResult: """驗證結果""" is_valid: bool message: str details: dict | None = None def validate_kubectl_syntax(command: str) -> ValidationResult: """ 驗證 kubectl 命令語法有效性 檢查: 1. 命令可被 shell 解析 2. 第一個 token 是 kubectl 3. 包含有效的 kubectl 動詞 Args: command: kubectl 命令字串 Returns: ValidationResult """ if not command: return ValidationResult(False, "命令為空") command = command.strip() # 移除可能的 $ 前綴 if command.startswith("$ "): command = command[2:] # 嘗試 shell 解析 try: tokens = shlex.split(command) except ValueError as e: return ValidationResult(False, f"命令無法解析: {e}") if not tokens: return ValidationResult(False, "解析後無 token") # 檢查第一個 token if tokens[0] != "kubectl": return ValidationResult( False, f"第一個 token 應為 kubectl,實際為 {tokens[0]}" ) # 有效的 kubectl 動詞 valid_verbs = { "get", "describe", "create", "apply", "delete", "edit", "scale", "rollout", "logs", "exec", "port-forward", "top", "patch", "label", "annotate", "cordon", "uncordon", "drain", "taint", } if len(tokens) < 2: return ValidationResult(False, "缺少 kubectl 動詞") verb = tokens[1] if verb not in valid_verbs: return ValidationResult( False, f"無效的 kubectl 動詞: {verb}", details={"valid_verbs": list(valid_verbs)} ) return ValidationResult( True, "kubectl 語法有效", details={"verb": verb, "tokens": tokens} ) def validate_risk_level(risk_level: str) -> ValidationResult: """ 驗證風險等級是否為有效值 Args: risk_level: 風險等級字串 Returns: ValidationResult """ valid_levels = {"LOW", "MEDIUM", "HIGH", "CRITICAL"} if not risk_level: return ValidationResult(False, "風險等級為空") normalized = risk_level.strip().upper() if normalized in valid_levels: return ValidationResult(True, f"有效風險等級: {normalized}") # 嘗試模糊匹配 fuzzy_mappings = { "低": "LOW", "中": "MEDIUM", "高": "HIGH", "危": "CRITICAL", "嚴重": "CRITICAL", } for key, value in fuzzy_mappings.items(): if key in risk_level: return ValidationResult( True, f"模糊匹配: {risk_level} -> {value}", details={"original": risk_level, "normalized": value} ) return ValidationResult( False, f"無效風險等級: {risk_level}", details={"valid_levels": list(valid_levels)} ) def validate_chinese_ratio(text: str, min_ratio: float = 0.3) -> ValidationResult: """ 驗證繁體中文比例 Args: text: 待驗證文字 min_ratio: 最低中文比例 (預設 30%) Returns: ValidationResult """ if not text: return ValidationResult(False, "文字為空") # 中文字元 Unicode 範圍 chinese_pattern = re.compile(r"[\u4e00-\u9fff]") chinese_chars = chinese_pattern.findall(text) # 計算比例 (排除空白) non_space_chars = len(text.replace(" ", "").replace("\n", "")) if non_space_chars == 0: return ValidationResult(False, "無有效字元") ratio = len(chinese_chars) / non_space_chars if ratio >= min_ratio: return ValidationResult( True, f"中文比例 {ratio:.1%} >= {min_ratio:.0%}", details={"ratio": ratio, "chinese_count": len(chinese_chars)} ) return ValidationResult( False, f"中文比例 {ratio:.1%} < {min_ratio:.0%}", details={"ratio": ratio, "chinese_count": len(chinese_chars)} ) def validate_response_length( text: str, min_length: int = 10, max_length: int = 500, ) -> ValidationResult: """ 驗證回應長度 Args: text: 待驗證文字 min_length: 最小長度 max_length: 最大長度 Returns: ValidationResult """ if not text: return ValidationResult(False, "文字為空") length = len(text.strip()) if length < min_length: return ValidationResult( False, f"回應過短: {length} < {min_length}", details={"length": length} ) if length > max_length: return ValidationResult( False, f"回應過長: {length} > {max_length}", details={"length": length} ) return ValidationResult( True, f"回應長度 {length} 在 [{min_length}, {max_length}] 範圍內", details={"length": length} ) def extract_kubectl_from_text(text: str) -> str | None: """ 從文字中提取 kubectl 命令 Args: text: 包含 kubectl 的文字 Returns: 提取的 kubectl 命令或 None """ if not text: return None # 匹配 kubectl 開頭的整行 patterns = [ r"```(?:bash|shell|sh)?\s*(kubectl[^\n`]+)", # code block r"^\$?\s*(kubectl[^\n]+)", # 以 $ 或直接開頭 r"(kubectl\s+\S+(?:\s+\S+)*)", # 一般匹配 ] for pattern in patterns: match = re.search(pattern, text, re.MULTILINE | re.IGNORECASE) if match: return match.group(1).strip() return None def extract_risk_level_from_text(text: str) -> str | None: """ 從文字中提取風險等級 Args: text: 包含風險等級的文字 Returns: 提取的風險等級或 None """ if not text: return None # 直接匹配 for level in ["CRITICAL", "HIGH", "MEDIUM", "LOW"]: if level in text.upper(): return level # 中文匹配 chinese_mappings = [ (r"極高|危險|嚴重|緊急", "CRITICAL"), (r"高風險|高度", "HIGH"), (r"中等|中度|一般", "MEDIUM"), (r"低風險|低度|輕微", "LOW"), ] for pattern, level in chinese_mappings: if re.search(pattern, text): return level return None