🔴 違規修正: 規則匹配/Expert System 不是 AI 分析,confidence 必須 = 0.0 修正檔案: - agents/action_planner.py: 0.9 → 0.0 - agents/blast_radius.py: 0.85/0.5/0.9 → 0.0 - agents/security.py: 計算公式 → 0.0 - signoz_webhook.py: 0.7 → 0.0 - auto_approve.py: default 0.5 → 0.0 - ci_auto_repair.py: 整個計算函數 → return 0.0 - error_analyzer_service.py: default 0.5 → 0.0 - intent_classifier.py: 計算公式 → 0.0 - openclaw.py: default 0.5 → 0.0 - resource_resolver.py: 0.8 → 0.0 - k8s_naming.py: 0.9/0.7 → 0.0 只有 LLM 真實分析返回的 confidence 才能 > 0 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
571 lines
18 KiB
Python
571 lines
18 KiB
Python
"""
|
|
Action Planner Agent - 執行計畫生成專家
|
|
========================================
|
|
|
|
職責:
|
|
- 生成結構化執行計畫
|
|
- 定義 rollback 策略
|
|
- 設定驗證步驟
|
|
- 回傳完整 ActionPlan
|
|
|
|
符合 ADR-009 ActionPlannerAgent 規範
|
|
"""
|
|
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from enum import Enum
|
|
from typing import Any
|
|
|
|
import structlog
|
|
|
|
from src.agents.base import AgentResult, AgentStatus, BaseAgent
|
|
|
|
logger = structlog.get_logger(__name__)
|
|
|
|
|
|
# =============================================================================
|
|
# Action Plan Types
|
|
# =============================================================================
|
|
|
|
|
|
class ActionType(str, Enum):
|
|
"""執行動作類型"""
|
|
RESTART = "restart" # 重啟服務
|
|
SCALE = "scale" # 擴縮容
|
|
ROLLBACK = "rollback" # 回滾版本
|
|
DELETE = "delete" # 刪除資源
|
|
PATCH = "patch" # 修補配置
|
|
EXEC = "exec" # 執行指令
|
|
APPLY = "apply" # 應用變更
|
|
CUSTOM = "custom" # 自訂
|
|
|
|
|
|
class ActionPhase(str, Enum):
|
|
"""執行階段"""
|
|
PRE_CHECK = "pre_check" # 前置檢查
|
|
EXECUTE = "execute" # 主要執行
|
|
VERIFY = "verify" # 驗證結果
|
|
ROLLBACK = "rollback" # 回滾 (如果失敗)
|
|
|
|
|
|
@dataclass
|
|
class ActionStep:
|
|
"""
|
|
單一執行步驟
|
|
|
|
包含:
|
|
- command: 要執行的指令
|
|
- description: 步驟說明
|
|
- phase: 執行階段
|
|
- timeout_sec: 超時時間
|
|
- can_fail: 是否允許失敗
|
|
"""
|
|
command: str
|
|
description: str
|
|
phase: ActionPhase
|
|
timeout_sec: int = 60
|
|
can_fail: bool = False
|
|
order: int = 0
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return {
|
|
"command": self.command,
|
|
"description": self.description,
|
|
"phase": self.phase.value,
|
|
"timeout_sec": self.timeout_sec,
|
|
"can_fail": self.can_fail,
|
|
"order": self.order,
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class ActionPlan(AgentResult):
|
|
"""
|
|
ActionPlannerAgent 分析結果
|
|
|
|
完整的執行計畫,包含:
|
|
- action_type: 動作類型
|
|
- pre_check_steps: 前置檢查
|
|
- execute_steps: 主要執行步驟
|
|
- verify_steps: 驗證步驟
|
|
- rollback_steps: 回滾步驟
|
|
- estimated_duration: 預估執行時間
|
|
"""
|
|
action_type: ActionType = ActionType.CUSTOM
|
|
pre_check_steps: list[ActionStep] = field(default_factory=list)
|
|
execute_steps: list[ActionStep] = field(default_factory=list)
|
|
verify_steps: list[ActionStep] = field(default_factory=list)
|
|
rollback_steps: list[ActionStep] = field(default_factory=list)
|
|
estimated_duration_sec: int = 0
|
|
requires_approval: bool = True
|
|
kubectl_commands: list[str] = field(default_factory=list)
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
"""轉換為 dict"""
|
|
base = super().to_dict()
|
|
base.update({
|
|
"action_type": self.action_type.value,
|
|
"pre_check_steps": [s.to_dict() for s in self.pre_check_steps],
|
|
"execute_steps": [s.to_dict() for s in self.execute_steps],
|
|
"verify_steps": [s.to_dict() for s in self.verify_steps],
|
|
"rollback_steps": [s.to_dict() for s in self.rollback_steps],
|
|
"estimated_duration_sec": self.estimated_duration_sec,
|
|
"requires_approval": self.requires_approval,
|
|
"kubectl_commands": self.kubectl_commands,
|
|
})
|
|
return base
|
|
|
|
def get_all_steps(self) -> list[ActionStep]:
|
|
"""取得所有步驟 (按順序)"""
|
|
all_steps = (
|
|
self.pre_check_steps
|
|
+ self.execute_steps
|
|
+ self.verify_steps
|
|
)
|
|
return sorted(all_steps, key=lambda s: s.order)
|
|
|
|
def get_primary_command(self) -> str | None:
|
|
"""取得主要執行指令"""
|
|
if self.execute_steps:
|
|
return self.execute_steps[0].command
|
|
return None
|
|
|
|
|
|
# =============================================================================
|
|
# Action Templates
|
|
# =============================================================================
|
|
|
|
|
|
# 預定義的執行計畫模板
|
|
ACTION_TEMPLATES: dict[str, dict[str, Any]] = {
|
|
"restart": {
|
|
"action_type": ActionType.RESTART,
|
|
"requires_approval": False, # 重啟相對安全
|
|
"pre_check": [
|
|
{
|
|
"command": "kubectl get deployment {target} -n {namespace} -o wide",
|
|
"description": "確認目標 Deployment 存在且健康",
|
|
},
|
|
{
|
|
"command": "kubectl get pods -l app={target} -n {namespace} --no-headers | wc -l",
|
|
"description": "確認目前 Pod 數量",
|
|
},
|
|
],
|
|
"execute": [
|
|
{
|
|
"command": "kubectl rollout restart deployment/{target} -n {namespace}",
|
|
"description": "執行滾動重啟",
|
|
},
|
|
],
|
|
"verify": [
|
|
{
|
|
"command": "kubectl rollout status deployment/{target} -n {namespace} --timeout=120s",
|
|
"description": "等待滾動更新完成",
|
|
"timeout_sec": 120,
|
|
},
|
|
{
|
|
"command": "kubectl get pods -l app={target} -n {namespace} -o wide",
|
|
"description": "確認新 Pod 狀態",
|
|
},
|
|
],
|
|
"rollback": [
|
|
{
|
|
"command": "kubectl rollout undo deployment/{target} -n {namespace}",
|
|
"description": "回滾到上一個版本",
|
|
},
|
|
],
|
|
},
|
|
|
|
"scale": {
|
|
"action_type": ActionType.SCALE,
|
|
"requires_approval": False,
|
|
"pre_check": [
|
|
{
|
|
"command": "kubectl get deployment {target} -n {namespace} -o jsonpath='{.spec.replicas}'",
|
|
"description": "記錄目前副本數",
|
|
},
|
|
],
|
|
"execute": [
|
|
{
|
|
"command": "kubectl scale deployment/{target} --replicas={replicas} -n {namespace}",
|
|
"description": "調整副本數至 {replicas}",
|
|
},
|
|
],
|
|
"verify": [
|
|
{
|
|
"command": "kubectl rollout status deployment/{target} -n {namespace} --timeout=60s",
|
|
"description": "等待擴縮容完成",
|
|
"timeout_sec": 60,
|
|
},
|
|
],
|
|
"rollback": [
|
|
{
|
|
"command": "kubectl scale deployment/{target} --replicas={original_replicas} -n {namespace}",
|
|
"description": "恢復原始副本數",
|
|
},
|
|
],
|
|
},
|
|
|
|
"rollback": {
|
|
"action_type": ActionType.ROLLBACK,
|
|
"requires_approval": True, # 回滾需要審核
|
|
"pre_check": [
|
|
{
|
|
"command": "kubectl rollout history deployment/{target} -n {namespace}",
|
|
"description": "查看版本歷史",
|
|
},
|
|
],
|
|
"execute": [
|
|
{
|
|
"command": "kubectl rollout undo deployment/{target} -n {namespace}",
|
|
"description": "回滾到上一個版本",
|
|
},
|
|
],
|
|
"verify": [
|
|
{
|
|
"command": "kubectl rollout status deployment/{target} -n {namespace} --timeout=120s",
|
|
"description": "等待回滾完成",
|
|
"timeout_sec": 120,
|
|
},
|
|
{
|
|
"command": "kubectl get pods -l app={target} -n {namespace} -o wide",
|
|
"description": "確認 Pod 狀態",
|
|
},
|
|
],
|
|
"rollback": [
|
|
{
|
|
"command": "kubectl rollout undo deployment/{target} -n {namespace}",
|
|
"description": "再次回滾 (恢復原版本)",
|
|
},
|
|
],
|
|
},
|
|
|
|
"delete_pod": {
|
|
"action_type": ActionType.DELETE,
|
|
"requires_approval": True, # 刪除需要審核
|
|
"pre_check": [
|
|
{
|
|
"command": "kubectl get pod {target} -n {namespace} -o wide",
|
|
"description": "確認目標 Pod 存在",
|
|
},
|
|
],
|
|
"execute": [
|
|
{
|
|
"command": "kubectl delete pod {target} -n {namespace}",
|
|
"description": "刪除異常 Pod (觸發重建)",
|
|
},
|
|
],
|
|
"verify": [
|
|
{
|
|
"command": "kubectl get pods -n {namespace} | grep -v Completed | grep -v Terminating",
|
|
"description": "確認新 Pod 已建立",
|
|
"can_fail": True,
|
|
},
|
|
],
|
|
"rollback": [], # 刪除 Pod 無法回滾,但 Deployment 會自動重建
|
|
},
|
|
}
|
|
|
|
|
|
class ActionPlannerAgent(BaseAgent[ActionPlan]):
|
|
"""
|
|
執行計畫生成專家 Agent
|
|
|
|
分析流程:
|
|
1. 解析輸入的問題/指令
|
|
2. 匹配最佳執行模板
|
|
3. 填充參數生成完整計畫
|
|
4. 計算預估執行時間
|
|
|
|
使用方式:
|
|
```python
|
|
agent = ActionPlannerAgent()
|
|
result = await agent.analyze({
|
|
"problem": "Pod 頻繁重啟",
|
|
"target_service": "api",
|
|
"namespace": "awoooi-prod",
|
|
})
|
|
print(result.execute_steps) # [ActionStep(...), ...]
|
|
```
|
|
"""
|
|
|
|
AGENT_NAME = "action-planner"
|
|
AGENT_DESCRIPTION = "行動規劃師,制定修復步驟與回滾方案"
|
|
AGENT_TOOLS = ["Read", "Glob"]
|
|
|
|
def __init__(
|
|
self,
|
|
timeout_sec: float = 30.0,
|
|
default_namespace: str = "awoooi-prod",
|
|
):
|
|
"""
|
|
初始化 ActionPlannerAgent
|
|
|
|
Args:
|
|
timeout_sec: 執行超時時間
|
|
default_namespace: 預設命名空間
|
|
"""
|
|
super().__init__(timeout_sec)
|
|
self.default_namespace = default_namespace
|
|
|
|
async def analyze(self, context: dict[str, Any]) -> ActionPlan:
|
|
"""
|
|
生成執行計畫
|
|
|
|
Args:
|
|
context: 分析上下文
|
|
- problem: 問題描述
|
|
- suggested_action: 建議的動作 (restart/scale/rollback)
|
|
- target_service: 目標服務
|
|
- namespace: 命名空間
|
|
- replicas: 副本數 (scale 用)
|
|
|
|
Returns:
|
|
ActionPlan 包含完整執行計畫
|
|
"""
|
|
start_time = time.time()
|
|
|
|
self.logger.info(
|
|
"action_planning_start",
|
|
problem=context.get("problem", "")[:100],
|
|
target=context.get("target_service"),
|
|
)
|
|
|
|
try:
|
|
# 1. 決定動作類型
|
|
action_type = self._determine_action_type(context)
|
|
|
|
# 2. 取得模板
|
|
template = ACTION_TEMPLATES.get(action_type, ACTION_TEMPLATES["restart"])
|
|
|
|
# 3. 準備參數
|
|
params = self._prepare_params(context)
|
|
|
|
# 4. 生成步驟
|
|
pre_check_steps = self._generate_steps(
|
|
template.get("pre_check", []),
|
|
params,
|
|
ActionPhase.PRE_CHECK,
|
|
)
|
|
|
|
execute_steps = self._generate_steps(
|
|
template.get("execute", []),
|
|
params,
|
|
ActionPhase.EXECUTE,
|
|
)
|
|
|
|
verify_steps = self._generate_steps(
|
|
template.get("verify", []),
|
|
params,
|
|
ActionPhase.VERIFY,
|
|
)
|
|
|
|
rollback_steps = self._generate_steps(
|
|
template.get("rollback", []),
|
|
params,
|
|
ActionPhase.ROLLBACK,
|
|
)
|
|
|
|
# 5. 計算預估時間
|
|
estimated_duration = self._estimate_duration(
|
|
pre_check_steps + execute_steps + verify_steps
|
|
)
|
|
|
|
# 6. 提取主要 kubectl 指令
|
|
kubectl_commands = [
|
|
step.command for step in execute_steps
|
|
if step.command.startswith("kubectl")
|
|
]
|
|
|
|
latency_ms = int((time.time() - start_time) * 1000)
|
|
|
|
# 7. 生成分析摘要
|
|
analysis = self._generate_analysis(
|
|
template["action_type"],
|
|
params.get("target", "unknown"),
|
|
len(execute_steps),
|
|
)
|
|
|
|
result = ActionPlan(
|
|
agent_name=self.AGENT_NAME,
|
|
status=AgentStatus.SUCCESS,
|
|
confidence=0.0, # 🔴 規則匹配/模板,非 AI 分析
|
|
analysis=analysis,
|
|
latency_ms=latency_ms,
|
|
action_type=template["action_type"],
|
|
pre_check_steps=pre_check_steps,
|
|
execute_steps=execute_steps,
|
|
verify_steps=verify_steps,
|
|
rollback_steps=rollback_steps,
|
|
estimated_duration_sec=estimated_duration,
|
|
requires_approval=template.get("requires_approval", True),
|
|
kubectl_commands=kubectl_commands,
|
|
)
|
|
|
|
self.logger.info(
|
|
"action_planning_complete",
|
|
action_type=result.action_type.value,
|
|
step_count=len(execute_steps),
|
|
latency_ms=latency_ms,
|
|
)
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
latency_ms = int((time.time() - start_time) * 1000)
|
|
|
|
self.logger.exception(
|
|
"action_planning_error",
|
|
error=str(e),
|
|
)
|
|
|
|
return ActionPlan(
|
|
agent_name=self.AGENT_NAME,
|
|
status=AgentStatus.FAILED,
|
|
confidence=0.0,
|
|
analysis=f"計畫生成失敗: {str(e)}",
|
|
latency_ms=latency_ms,
|
|
error=str(e),
|
|
requires_approval=True,
|
|
)
|
|
|
|
def _determine_action_type(self, context: dict[str, Any]) -> str:
|
|
"""
|
|
根據上下文決定最佳動作類型
|
|
|
|
解析 problem 或 suggested_action 來決定
|
|
"""
|
|
# 如果有明確指定
|
|
suggested = context.get("suggested_action", "").lower()
|
|
if suggested in ACTION_TEMPLATES:
|
|
return suggested
|
|
|
|
# 從 problem 推斷
|
|
problem = context.get("problem", "").lower()
|
|
|
|
# 關鍵字匹配
|
|
if any(kw in problem for kw in ["crash", "restart", "oom", "killed"]):
|
|
return "restart"
|
|
|
|
if any(kw in problem for kw in ["slow", "latency", "capacity", "scale"]):
|
|
return "scale"
|
|
|
|
if any(kw in problem for kw in ["error", "failed", "rollback", "undo"]):
|
|
return "rollback"
|
|
|
|
if any(kw in problem for kw in ["stuck", "pending", "delete pod"]):
|
|
return "delete_pod"
|
|
|
|
# 預設: 重啟 (最安全)
|
|
return "restart"
|
|
|
|
def _prepare_params(self, context: dict[str, Any]) -> dict[str, str]:
|
|
"""準備模板參數"""
|
|
target = context.get("target_service", "unknown")
|
|
namespace = context.get("namespace", self.default_namespace)
|
|
|
|
# 處理 target 可能是列表的情況
|
|
if isinstance(target, list):
|
|
target = target[0] if target else "unknown"
|
|
|
|
return {
|
|
"target": target,
|
|
"namespace": namespace,
|
|
"replicas": str(context.get("replicas", 3)),
|
|
"original_replicas": str(context.get("original_replicas", 1)),
|
|
}
|
|
|
|
def _generate_steps(
|
|
self,
|
|
template_steps: list[dict[str, Any]],
|
|
params: dict[str, str],
|
|
phase: ActionPhase,
|
|
) -> list[ActionStep]:
|
|
"""從模板生成實際步驟"""
|
|
steps: list[ActionStep] = []
|
|
|
|
for i, tmpl in enumerate(template_steps):
|
|
command = tmpl["command"].format(**params)
|
|
description = tmpl["description"].format(**params)
|
|
|
|
steps.append(ActionStep(
|
|
command=command,
|
|
description=description,
|
|
phase=phase,
|
|
timeout_sec=tmpl.get("timeout_sec", 60),
|
|
can_fail=tmpl.get("can_fail", False),
|
|
order=i,
|
|
))
|
|
|
|
return steps
|
|
|
|
def _estimate_duration(self, steps: list[ActionStep]) -> int:
|
|
"""估計執行時間 (秒)"""
|
|
total = 0
|
|
for step in steps:
|
|
# 假設每個步驟平均執行時間為 timeout 的 1/3
|
|
total += step.timeout_sec // 3
|
|
return max(total, 30) # 最少 30 秒
|
|
|
|
def _generate_analysis(
|
|
self,
|
|
action_type: ActionType,
|
|
target: str,
|
|
step_count: int,
|
|
) -> str:
|
|
"""生成分析摘要"""
|
|
action_desc = {
|
|
ActionType.RESTART: "滾動重啟",
|
|
ActionType.SCALE: "擴縮容",
|
|
ActionType.ROLLBACK: "版本回滾",
|
|
ActionType.DELETE: "資源清理",
|
|
ActionType.PATCH: "配置修補",
|
|
ActionType.APPLY: "配置應用",
|
|
ActionType.EXEC: "指令執行",
|
|
ActionType.CUSTOM: "自訂操作",
|
|
}
|
|
|
|
return (
|
|
f"建議執行 {action_desc.get(action_type, '操作')} "
|
|
f"於 {target},共 {step_count} 個步驟"
|
|
)
|
|
|
|
def _build_prompt(self, context: dict[str, Any]) -> str:
|
|
"""建構 LLM Prompt (Phase 9.4 擴展)"""
|
|
return f"""你是 AWOOOI 的行動規劃師。
|
|
根據以下問題制定修復計畫:
|
|
|
|
問題描述: {context.get("problem", "N/A")}
|
|
目標服務: {context.get("target_service", "N/A")}
|
|
命名空間: {context.get("namespace", "awoooi-prod")}
|
|
|
|
注意:
|
|
- 所有 kubectl 必須帶 -n {{namespace}}
|
|
- 必須包含前置檢查、執行步驟、驗證步驟、回滾方案
|
|
|
|
輸出 JSON:
|
|
```json
|
|
{{
|
|
"action_type": "restart|scale|rollback|delete",
|
|
"pre_check_steps": [
|
|
{{"command": "kubectl ...", "description": "..."}}
|
|
],
|
|
"execute_steps": [
|
|
{{"command": "kubectl ...", "description": "..."}}
|
|
],
|
|
"verify_steps": [
|
|
{{"command": "kubectl ...", "description": "..."}}
|
|
],
|
|
"rollback_steps": [
|
|
{{"command": "kubectl ...", "description": "..."}}
|
|
],
|
|
"estimated_duration_sec": 60,
|
|
"analysis": "一句話摘要",
|
|
"confidence": 0-1
|
|
}}
|
|
```"""
|
|
|
|
def _parse_response(self, response: str) -> dict[str, Any]:
|
|
"""解析 LLM 回應"""
|
|
return self._extract_json(response)
|