Files
awoooi/apps/api/src/models/ai.py
OG T fe77e6d297
Some checks failed
CD Pipeline / build-and-deploy (push) Successful in 10m48s
Type Sync Check / check-type-sync (push) Failing after 2m52s
fix(ai): SuggestedAction enum 擴充 + Pydantic fallback 防護
根本原因: NemoTron 輸出 "investigate" → Pydantic 只接受 4 個值 → 爆炸
→ openclaw_analysis_parse_failed → analysis_result=None → 全部 fallback 卡片顯示「待分析」

修復:
1. SuggestedAction enum 新增 INVESTIGATE/OBSERVE/APPLY_HPA/TUNE_RESOURCES
   (prompt.py 列了 6 個,enum 只有 4 個,prompt/model 不同步是根源)
2. normalize_suggested_action validator: uppercase + 別名映射 + 未知值 fallback NO_ACTION
   確保任何 LLM 輸出都不會讓 Pydantic 爆炸導致 analysis_result = None

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-17 21:36:36 +08:00

261 lines
7.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
AI Decision Models - Phase 2 Structured Output
===============================================
CAI-101: OpenClaw AI 結構化輸出模型
防禦性工程鐵律:
- 絕對禁止 LLM 輸出無法解析的自由文本
- 必須強制 JSON 格式 + Pydantic 驗證
- blast_radius 為 REQUIRED 欄位,不可遺漏
"""
from enum import Enum
from pydantic import BaseModel, Field, field_validator
class SuggestedAction(str, Enum):
"""
AI 建議操作類型
必須與 executor.OperationType 對應
2026-04-17 ogt + Claude Sonnet 4.6: 新增 INVESTIGATE/APPLY_HPA/TUNE_RESOURCES
根本原因: prompts.py 列了 6 個值,但 enum 只有 4 個
→ NemoTron 輸出 "investigate" → Pydantic 爆炸 → analysis_result = None → 全部 fallback
"""
RESTART_DEPLOYMENT = "RESTART_DEPLOYMENT"
DELETE_POD = "DELETE_POD"
SCALE_DEPLOYMENT = "SCALE_DEPLOYMENT"
APPLY_HPA = "APPLY_HPA"
TUNE_RESOURCES = "TUNE_RESOURCES"
INVESTIGATE = "INVESTIGATE" # 調查診斷,不下執行指令
OBSERVE = "OBSERVE" # 觀察等待
NO_ACTION = "NO_ACTION" # 無需處理
class AIRiskLevel(str, Enum):
"""AI 風險評估等級"""
LOW = "low"
MEDIUM = "medium"
CRITICAL = "critical"
class AIDataImpact(str, Enum):
"""AI 資料影響評估"""
NONE = "NONE"
READ_ONLY = "READ_ONLY"
WRITE = "WRITE"
DESTRUCTIVE = "DESTRUCTIVE"
class AIBlastRadius(BaseModel):
"""
爆炸半徑分析 (REQUIRED - 符合 API 契約)
此物件為必填LLM 輸出必須包含完整結構
"""
affected_pods: int = Field(
...,
ge=0,
description="受影響的 Pod 數量",
)
estimated_downtime: str = Field(
...,
description="預估停機時間 (例如: '~30s', '~2 min', '0')",
)
related_services: list[str] = Field(
default_factory=list,
description="相關受影響服務",
)
data_impact: AIDataImpact = Field(
default=AIDataImpact.NONE,
description="資料影響程度",
)
@field_validator("data_impact", mode="before")
@classmethod
def normalize_data_impact(cls, v):
"""正規化 data_impact (LLM 可能輸出小寫)"""
if isinstance(v, str):
return v.upper()
return v
class OpenClawDecision(BaseModel):
"""
OpenClaw AI 決策輸出 (強制結構化)
LLM 必須輸出此格式的 JSON否則視為解析失敗。
blast_radius 為 REQUIRED 欄位!
"""
# === 基本操作欄位 ===
suggested_action: SuggestedAction = Field(
...,
description="建議執行的操作類型",
)
target_resource: str = Field(
...,
description="目標資源名稱 (e.g., 'harbor', 'grafana')",
)
namespace: str = Field(
default="default",
description="Kubernetes namespace",
)
# 2026-03-29 ogt: 允許 NoneLLM 可能返回 null
kubectl_command: str | None = Field(
default="",
description="具體的 kubectl 指令",
)
@field_validator("kubectl_command", mode="before")
@classmethod
def normalize_kubectl_command(cls, v):
"""將 null 轉換為空字串"""
return v if v is not None else ""
# === 風險評估欄位 ===
risk_level: AIRiskLevel = Field(
...,
description="風險等級評估",
)
# === REQUIRED: 爆炸半徑 (符合 API 契約) ===
blast_radius: AIBlastRadius = Field(
...,
description="爆炸半徑分析 - REQUIRED",
)
# === 分析說明欄位 ===
action_title: str = Field(
default="",
description="操作標題 (繁體中文)",
)
description: str = Field(
default="",
description="根本原因分析說明 (繁體中文)",
)
reasoning: str = Field(
default="",
description="給人類主管看的決策理由 (繁體中文)",
)
deviation_analysis: str = Field(
default="",
description="基準線偏差分析 (例如CPU 85% 超出基準線 45% 達 +4σ)",
)
# === 信心度與影響範圍 ===
# 2026-03-29 ogt: 移除預設值,強制 LLM 必須輸出真實信心分數
# 如果 LLM 沒有輸出 confidence解析時會補 0.5 並標記為 COLLAB
confidence: float = Field(
..., # REQUIRED - 不允許預設值
ge=0.0,
le=1.0,
description="決策信心度 (0-1) - LLM 必須計算",
)
affected_services: list[str] = Field(
default_factory=list,
description="可能受影響的相關服務",
)
# === v6.0 AI 仲裁欄位 ===
primary_responsibility: str = Field(
default="COLLAB",
description="主要責任團隊 (FE/BE/INFRA/DB/COLLAB)",
)
responsibility_reasoning: str = Field(
default="",
description="責任判定理由",
)
secondary_teams: list[str] = Field(
default_factory=list,
description="需協助的其他團隊",
)
# === v7.0 調優建議與 SignOz 整合 ===
optimization_suggestions: list[dict] = Field(
default_factory=list,
description="預防性調優建議 (含 kubectl 指令)",
)
signoz_correlation: str = Field(
default="",
description="SignOz 指標與告警的關聯分析",
)
@field_validator("risk_level", mode="before")
@classmethod
def normalize_risk_level(cls, v):
"""正規化 risk_level (處理 LLM 可能輸出的非標準值)"""
if isinstance(v, str):
mapping = {
"high": "critical",
"severe": "critical",
"warning": "medium",
"normal": "low",
"safe": "low",
}
return mapping.get(v.lower(), v.lower())
return v
@field_validator("suggested_action", mode="before")
@classmethod
def normalize_suggested_action(cls, v):
"""
正規化 suggested_action大小寫 + 別名映射 + 未知值 fallback
2026-04-17 ogt + Claude Sonnet 4.6(亞太):
根本原因: NemoTron 輸出 "investigate"(小寫) → Pydantic 拒絕 → analysis_result = None
舊版只做 uppercase未知值仍爆 → 修復為: 先 uppercase再別名映射最後 fallback NO_ACTION
"""
if isinstance(v, str):
normalized = v.upper().replace("-", "_").replace(" ", "_")
# 別名映射 (LLM 可能輸出非正式名稱)
alias_map = {
"DIAGNOSE": "INVESTIGATE",
"DEBUG": "INVESTIGATE",
"MONITOR": "OBSERVE",
"WATCH": "OBSERVE",
"TUNE": "TUNE_RESOURCES",
"HPA": "APPLY_HPA",
}
normalized = alias_map.get(normalized, normalized)
# 未知值 fallback NO_ACTION絕不讓 Pydantic 爆炸導致 analysis_result = None
try:
SuggestedAction(normalized)
return normalized
except ValueError:
return "NO_ACTION"
return v
class OpenClawAnalysisRequest(BaseModel):
"""分析請求"""
force_refresh: bool = Field(
default=False,
description="強制重新抓取監控數據",
)
class OpenClawAnalysisResponse(BaseModel):
"""分析回應"""
success: bool
message: str
decision: OpenClawDecision | None = None
approval_created: bool = Field(
default=False,
description="是否已建立待簽核卡片",
)
approval_id: str | None = Field(
default=None,
description="建立的 ApprovalRecord ID",
)
ai_provider: str = Field(
default="unknown",
description="使用的 AI 提供者 (ollama/gemini/claude)",
)
raw_llm_response: str | None = Field(
default=None,
description="LLM 原始回應 (debug 用)",
)