根本原因: NemoTron 輸出 "investigate" → Pydantic 只接受 4 個值 → 爆炸 → openclaw_analysis_parse_failed → analysis_result=None → 全部 fallback 卡片顯示「待分析」 修復: 1. SuggestedAction enum 新增 INVESTIGATE/OBSERVE/APPLY_HPA/TUNE_RESOURCES (prompt.py 列了 6 個,enum 只有 4 個,prompt/model 不同步是根源) 2. normalize_suggested_action validator: uppercase + 別名映射 + 未知值 fallback NO_ACTION 確保任何 LLM 輸出都不會讓 Pydantic 爆炸導致 analysis_result = None Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
261 lines
7.8 KiB
Python
261 lines
7.8 KiB
Python
"""
|
||
AI Decision Models - Phase 2 Structured Output
|
||
===============================================
|
||
CAI-101: OpenClaw AI 結構化輸出模型
|
||
|
||
防禦性工程鐵律:
|
||
- 絕對禁止 LLM 輸出無法解析的自由文本
|
||
- 必須強制 JSON 格式 + Pydantic 驗證
|
||
- blast_radius 為 REQUIRED 欄位,不可遺漏
|
||
"""
|
||
|
||
from enum import Enum
|
||
|
||
from pydantic import BaseModel, Field, field_validator
|
||
|
||
|
||
class SuggestedAction(str, Enum):
|
||
"""
|
||
AI 建議操作類型
|
||
|
||
必須與 executor.OperationType 對應
|
||
|
||
2026-04-17 ogt + Claude Sonnet 4.6: 新增 INVESTIGATE/APPLY_HPA/TUNE_RESOURCES
|
||
根本原因: prompts.py 列了 6 個值,但 enum 只有 4 個
|
||
→ NemoTron 輸出 "investigate" → Pydantic 爆炸 → analysis_result = None → 全部 fallback
|
||
"""
|
||
RESTART_DEPLOYMENT = "RESTART_DEPLOYMENT"
|
||
DELETE_POD = "DELETE_POD"
|
||
SCALE_DEPLOYMENT = "SCALE_DEPLOYMENT"
|
||
APPLY_HPA = "APPLY_HPA"
|
||
TUNE_RESOURCES = "TUNE_RESOURCES"
|
||
INVESTIGATE = "INVESTIGATE" # 調查診斷,不下執行指令
|
||
OBSERVE = "OBSERVE" # 觀察等待
|
||
NO_ACTION = "NO_ACTION" # 無需處理
|
||
|
||
|
||
class AIRiskLevel(str, Enum):
|
||
"""AI 風險評估等級"""
|
||
LOW = "low"
|
||
MEDIUM = "medium"
|
||
CRITICAL = "critical"
|
||
|
||
|
||
class AIDataImpact(str, Enum):
|
||
"""AI 資料影響評估"""
|
||
NONE = "NONE"
|
||
READ_ONLY = "READ_ONLY"
|
||
WRITE = "WRITE"
|
||
DESTRUCTIVE = "DESTRUCTIVE"
|
||
|
||
|
||
class AIBlastRadius(BaseModel):
|
||
"""
|
||
爆炸半徑分析 (REQUIRED - 符合 API 契約)
|
||
|
||
此物件為必填,LLM 輸出必須包含完整結構
|
||
"""
|
||
affected_pods: int = Field(
|
||
...,
|
||
ge=0,
|
||
description="受影響的 Pod 數量",
|
||
)
|
||
estimated_downtime: str = Field(
|
||
...,
|
||
description="預估停機時間 (例如: '~30s', '~2 min', '0')",
|
||
)
|
||
related_services: list[str] = Field(
|
||
default_factory=list,
|
||
description="相關受影響服務",
|
||
)
|
||
data_impact: AIDataImpact = Field(
|
||
default=AIDataImpact.NONE,
|
||
description="資料影響程度",
|
||
)
|
||
|
||
@field_validator("data_impact", mode="before")
|
||
@classmethod
|
||
def normalize_data_impact(cls, v):
|
||
"""正規化 data_impact (LLM 可能輸出小寫)"""
|
||
if isinstance(v, str):
|
||
return v.upper()
|
||
return v
|
||
|
||
|
||
class OpenClawDecision(BaseModel):
|
||
"""
|
||
OpenClaw AI 決策輸出 (強制結構化)
|
||
|
||
LLM 必須輸出此格式的 JSON,否則視為解析失敗。
|
||
blast_radius 為 REQUIRED 欄位!
|
||
"""
|
||
# === 基本操作欄位 ===
|
||
suggested_action: SuggestedAction = Field(
|
||
...,
|
||
description="建議執行的操作類型",
|
||
)
|
||
|
||
target_resource: str = Field(
|
||
...,
|
||
description="目標資源名稱 (e.g., 'harbor', 'grafana')",
|
||
)
|
||
namespace: str = Field(
|
||
default="default",
|
||
description="Kubernetes namespace",
|
||
)
|
||
# 2026-03-29 ogt: 允許 None,LLM 可能返回 null
|
||
kubectl_command: str | None = Field(
|
||
default="",
|
||
description="具體的 kubectl 指令",
|
||
)
|
||
|
||
@field_validator("kubectl_command", mode="before")
|
||
@classmethod
|
||
def normalize_kubectl_command(cls, v):
|
||
"""將 null 轉換為空字串"""
|
||
return v if v is not None else ""
|
||
|
||
# === 風險評估欄位 ===
|
||
risk_level: AIRiskLevel = Field(
|
||
...,
|
||
description="風險等級評估",
|
||
)
|
||
|
||
# === REQUIRED: 爆炸半徑 (符合 API 契約) ===
|
||
blast_radius: AIBlastRadius = Field(
|
||
...,
|
||
description="爆炸半徑分析 - REQUIRED",
|
||
)
|
||
|
||
# === 分析說明欄位 ===
|
||
action_title: str = Field(
|
||
default="",
|
||
description="操作標題 (繁體中文)",
|
||
)
|
||
description: str = Field(
|
||
default="",
|
||
description="根本原因分析說明 (繁體中文)",
|
||
)
|
||
reasoning: str = Field(
|
||
default="",
|
||
description="給人類主管看的決策理由 (繁體中文)",
|
||
)
|
||
deviation_analysis: str = Field(
|
||
default="",
|
||
description="基準線偏差分析 (例如:CPU 85% 超出基準線 45% 達 +4σ)",
|
||
)
|
||
|
||
# === 信心度與影響範圍 ===
|
||
# 2026-03-29 ogt: 移除預設值,強制 LLM 必須輸出真實信心分數
|
||
# 如果 LLM 沒有輸出 confidence,解析時會補 0.5 並標記為 COLLAB
|
||
confidence: float = Field(
|
||
..., # REQUIRED - 不允許預設值
|
||
ge=0.0,
|
||
le=1.0,
|
||
description="決策信心度 (0-1) - LLM 必須計算",
|
||
)
|
||
affected_services: list[str] = Field(
|
||
default_factory=list,
|
||
description="可能受影響的相關服務",
|
||
)
|
||
|
||
# === v6.0 AI 仲裁欄位 ===
|
||
primary_responsibility: str = Field(
|
||
default="COLLAB",
|
||
description="主要責任團隊 (FE/BE/INFRA/DB/COLLAB)",
|
||
)
|
||
responsibility_reasoning: str = Field(
|
||
default="",
|
||
description="責任判定理由",
|
||
)
|
||
secondary_teams: list[str] = Field(
|
||
default_factory=list,
|
||
description="需協助的其他團隊",
|
||
)
|
||
|
||
# === v7.0 調優建議與 SignOz 整合 ===
|
||
optimization_suggestions: list[dict] = Field(
|
||
default_factory=list,
|
||
description="預防性調優建議 (含 kubectl 指令)",
|
||
)
|
||
signoz_correlation: str = Field(
|
||
default="",
|
||
description="SignOz 指標與告警的關聯分析",
|
||
)
|
||
|
||
@field_validator("risk_level", mode="before")
|
||
@classmethod
|
||
def normalize_risk_level(cls, v):
|
||
"""正規化 risk_level (處理 LLM 可能輸出的非標準值)"""
|
||
if isinstance(v, str):
|
||
mapping = {
|
||
"high": "critical",
|
||
"severe": "critical",
|
||
"warning": "medium",
|
||
"normal": "low",
|
||
"safe": "low",
|
||
}
|
||
return mapping.get(v.lower(), v.lower())
|
||
return v
|
||
|
||
@field_validator("suggested_action", mode="before")
|
||
@classmethod
|
||
def normalize_suggested_action(cls, v):
|
||
"""
|
||
正規化 suggested_action:大小寫 + 別名映射 + 未知值 fallback
|
||
|
||
2026-04-17 ogt + Claude Sonnet 4.6(亞太):
|
||
根本原因: NemoTron 輸出 "investigate"(小寫) → Pydantic 拒絕 → analysis_result = None
|
||
舊版只做 uppercase,未知值仍爆 → 修復為: 先 uppercase,再別名映射,最後 fallback NO_ACTION
|
||
"""
|
||
if isinstance(v, str):
|
||
normalized = v.upper().replace("-", "_").replace(" ", "_")
|
||
# 別名映射 (LLM 可能輸出非正式名稱)
|
||
alias_map = {
|
||
"DIAGNOSE": "INVESTIGATE",
|
||
"DEBUG": "INVESTIGATE",
|
||
"MONITOR": "OBSERVE",
|
||
"WATCH": "OBSERVE",
|
||
"TUNE": "TUNE_RESOURCES",
|
||
"HPA": "APPLY_HPA",
|
||
}
|
||
normalized = alias_map.get(normalized, normalized)
|
||
# 未知值 fallback NO_ACTION,絕不讓 Pydantic 爆炸導致 analysis_result = None
|
||
try:
|
||
SuggestedAction(normalized)
|
||
return normalized
|
||
except ValueError:
|
||
return "NO_ACTION"
|
||
return v
|
||
|
||
|
||
class OpenClawAnalysisRequest(BaseModel):
|
||
"""分析請求"""
|
||
force_refresh: bool = Field(
|
||
default=False,
|
||
description="強制重新抓取監控數據",
|
||
)
|
||
|
||
|
||
class OpenClawAnalysisResponse(BaseModel):
|
||
"""分析回應"""
|
||
success: bool
|
||
message: str
|
||
decision: OpenClawDecision | None = None
|
||
approval_created: bool = Field(
|
||
default=False,
|
||
description="是否已建立待簽核卡片",
|
||
)
|
||
approval_id: str | None = Field(
|
||
default=None,
|
||
description="建立的 ApprovalRecord ID",
|
||
)
|
||
ai_provider: str = Field(
|
||
default="unknown",
|
||
description="使用的 AI 提供者 (ollama/gemini/claude)",
|
||
)
|
||
raw_llm_response: str | None = Field(
|
||
default=None,
|
||
description="LLM 原始回應 (debug 用)",
|
||
)
|