Files
awoooi/apps/api/src/agents/action_planner.py
OG T 938df7f291 fix(api): 全面清除假信心分數 - 遵循 feedback_confidence_truthfulness.md
🔴 違規修正: 規則匹配/Expert System 不是 AI 分析,confidence 必須 = 0.0

修正檔案:
- agents/action_planner.py: 0.9 → 0.0
- agents/blast_radius.py: 0.85/0.5/0.9 → 0.0
- agents/security.py: 計算公式 → 0.0
- signoz_webhook.py: 0.7 → 0.0
- auto_approve.py: default 0.5 → 0.0
- ci_auto_repair.py: 整個計算函數 → return 0.0
- error_analyzer_service.py: default 0.5 → 0.0
- intent_classifier.py: 計算公式 → 0.0
- openclaw.py: default 0.5 → 0.0
- resource_resolver.py: 0.8 → 0.0
- k8s_naming.py: 0.9/0.7 → 0.0

只有 LLM 真實分析返回的 confidence 才能 > 0

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-29 16:00:46 +08:00

571 lines
18 KiB
Python

"""
Action Planner Agent - 執行計畫生成專家
========================================
職責:
- 生成結構化執行計畫
- 定義 rollback 策略
- 設定驗證步驟
- 回傳完整 ActionPlan
符合 ADR-009 ActionPlannerAgent 規範
"""
import time
from dataclasses import dataclass, field
from enum import Enum
from typing import Any
import structlog
from src.agents.base import AgentResult, AgentStatus, BaseAgent
logger = structlog.get_logger(__name__)
# =============================================================================
# Action Plan Types
# =============================================================================
class ActionType(str, Enum):
"""執行動作類型"""
RESTART = "restart" # 重啟服務
SCALE = "scale" # 擴縮容
ROLLBACK = "rollback" # 回滾版本
DELETE = "delete" # 刪除資源
PATCH = "patch" # 修補配置
EXEC = "exec" # 執行指令
APPLY = "apply" # 應用變更
CUSTOM = "custom" # 自訂
class ActionPhase(str, Enum):
"""執行階段"""
PRE_CHECK = "pre_check" # 前置檢查
EXECUTE = "execute" # 主要執行
VERIFY = "verify" # 驗證結果
ROLLBACK = "rollback" # 回滾 (如果失敗)
@dataclass
class ActionStep:
"""
單一執行步驟
包含:
- command: 要執行的指令
- description: 步驟說明
- phase: 執行階段
- timeout_sec: 超時時間
- can_fail: 是否允許失敗
"""
command: str
description: str
phase: ActionPhase
timeout_sec: int = 60
can_fail: bool = False
order: int = 0
def to_dict(self) -> dict[str, Any]:
return {
"command": self.command,
"description": self.description,
"phase": self.phase.value,
"timeout_sec": self.timeout_sec,
"can_fail": self.can_fail,
"order": self.order,
}
@dataclass
class ActionPlan(AgentResult):
"""
ActionPlannerAgent 分析結果
完整的執行計畫,包含:
- action_type: 動作類型
- pre_check_steps: 前置檢查
- execute_steps: 主要執行步驟
- verify_steps: 驗證步驟
- rollback_steps: 回滾步驟
- estimated_duration: 預估執行時間
"""
action_type: ActionType = ActionType.CUSTOM
pre_check_steps: list[ActionStep] = field(default_factory=list)
execute_steps: list[ActionStep] = field(default_factory=list)
verify_steps: list[ActionStep] = field(default_factory=list)
rollback_steps: list[ActionStep] = field(default_factory=list)
estimated_duration_sec: int = 0
requires_approval: bool = True
kubectl_commands: list[str] = field(default_factory=list)
def to_dict(self) -> dict[str, Any]:
"""轉換為 dict"""
base = super().to_dict()
base.update({
"action_type": self.action_type.value,
"pre_check_steps": [s.to_dict() for s in self.pre_check_steps],
"execute_steps": [s.to_dict() for s in self.execute_steps],
"verify_steps": [s.to_dict() for s in self.verify_steps],
"rollback_steps": [s.to_dict() for s in self.rollback_steps],
"estimated_duration_sec": self.estimated_duration_sec,
"requires_approval": self.requires_approval,
"kubectl_commands": self.kubectl_commands,
})
return base
def get_all_steps(self) -> list[ActionStep]:
"""取得所有步驟 (按順序)"""
all_steps = (
self.pre_check_steps
+ self.execute_steps
+ self.verify_steps
)
return sorted(all_steps, key=lambda s: s.order)
def get_primary_command(self) -> str | None:
"""取得主要執行指令"""
if self.execute_steps:
return self.execute_steps[0].command
return None
# =============================================================================
# Action Templates
# =============================================================================
# 預定義的執行計畫模板
ACTION_TEMPLATES: dict[str, dict[str, Any]] = {
"restart": {
"action_type": ActionType.RESTART,
"requires_approval": False, # 重啟相對安全
"pre_check": [
{
"command": "kubectl get deployment {target} -n {namespace} -o wide",
"description": "確認目標 Deployment 存在且健康",
},
{
"command": "kubectl get pods -l app={target} -n {namespace} --no-headers | wc -l",
"description": "確認目前 Pod 數量",
},
],
"execute": [
{
"command": "kubectl rollout restart deployment/{target} -n {namespace}",
"description": "執行滾動重啟",
},
],
"verify": [
{
"command": "kubectl rollout status deployment/{target} -n {namespace} --timeout=120s",
"description": "等待滾動更新完成",
"timeout_sec": 120,
},
{
"command": "kubectl get pods -l app={target} -n {namespace} -o wide",
"description": "確認新 Pod 狀態",
},
],
"rollback": [
{
"command": "kubectl rollout undo deployment/{target} -n {namespace}",
"description": "回滾到上一個版本",
},
],
},
"scale": {
"action_type": ActionType.SCALE,
"requires_approval": False,
"pre_check": [
{
"command": "kubectl get deployment {target} -n {namespace} -o jsonpath='{.spec.replicas}'",
"description": "記錄目前副本數",
},
],
"execute": [
{
"command": "kubectl scale deployment/{target} --replicas={replicas} -n {namespace}",
"description": "調整副本數至 {replicas}",
},
],
"verify": [
{
"command": "kubectl rollout status deployment/{target} -n {namespace} --timeout=60s",
"description": "等待擴縮容完成",
"timeout_sec": 60,
},
],
"rollback": [
{
"command": "kubectl scale deployment/{target} --replicas={original_replicas} -n {namespace}",
"description": "恢復原始副本數",
},
],
},
"rollback": {
"action_type": ActionType.ROLLBACK,
"requires_approval": True, # 回滾需要審核
"pre_check": [
{
"command": "kubectl rollout history deployment/{target} -n {namespace}",
"description": "查看版本歷史",
},
],
"execute": [
{
"command": "kubectl rollout undo deployment/{target} -n {namespace}",
"description": "回滾到上一個版本",
},
],
"verify": [
{
"command": "kubectl rollout status deployment/{target} -n {namespace} --timeout=120s",
"description": "等待回滾完成",
"timeout_sec": 120,
},
{
"command": "kubectl get pods -l app={target} -n {namespace} -o wide",
"description": "確認 Pod 狀態",
},
],
"rollback": [
{
"command": "kubectl rollout undo deployment/{target} -n {namespace}",
"description": "再次回滾 (恢復原版本)",
},
],
},
"delete_pod": {
"action_type": ActionType.DELETE,
"requires_approval": True, # 刪除需要審核
"pre_check": [
{
"command": "kubectl get pod {target} -n {namespace} -o wide",
"description": "確認目標 Pod 存在",
},
],
"execute": [
{
"command": "kubectl delete pod {target} -n {namespace}",
"description": "刪除異常 Pod (觸發重建)",
},
],
"verify": [
{
"command": "kubectl get pods -n {namespace} | grep -v Completed | grep -v Terminating",
"description": "確認新 Pod 已建立",
"can_fail": True,
},
],
"rollback": [], # 刪除 Pod 無法回滾,但 Deployment 會自動重建
},
}
class ActionPlannerAgent(BaseAgent[ActionPlan]):
"""
執行計畫生成專家 Agent
分析流程:
1. 解析輸入的問題/指令
2. 匹配最佳執行模板
3. 填充參數生成完整計畫
4. 計算預估執行時間
使用方式:
```python
agent = ActionPlannerAgent()
result = await agent.analyze({
"problem": "Pod 頻繁重啟",
"target_service": "api",
"namespace": "awoooi-prod",
})
print(result.execute_steps) # [ActionStep(...), ...]
```
"""
AGENT_NAME = "action-planner"
AGENT_DESCRIPTION = "行動規劃師,制定修復步驟與回滾方案"
AGENT_TOOLS = ["Read", "Glob"]
def __init__(
self,
timeout_sec: float = 30.0,
default_namespace: str = "awoooi-prod",
):
"""
初始化 ActionPlannerAgent
Args:
timeout_sec: 執行超時時間
default_namespace: 預設命名空間
"""
super().__init__(timeout_sec)
self.default_namespace = default_namespace
async def analyze(self, context: dict[str, Any]) -> ActionPlan:
"""
生成執行計畫
Args:
context: 分析上下文
- problem: 問題描述
- suggested_action: 建議的動作 (restart/scale/rollback)
- target_service: 目標服務
- namespace: 命名空間
- replicas: 副本數 (scale 用)
Returns:
ActionPlan 包含完整執行計畫
"""
start_time = time.time()
self.logger.info(
"action_planning_start",
problem=context.get("problem", "")[:100],
target=context.get("target_service"),
)
try:
# 1. 決定動作類型
action_type = self._determine_action_type(context)
# 2. 取得模板
template = ACTION_TEMPLATES.get(action_type, ACTION_TEMPLATES["restart"])
# 3. 準備參數
params = self._prepare_params(context)
# 4. 生成步驟
pre_check_steps = self._generate_steps(
template.get("pre_check", []),
params,
ActionPhase.PRE_CHECK,
)
execute_steps = self._generate_steps(
template.get("execute", []),
params,
ActionPhase.EXECUTE,
)
verify_steps = self._generate_steps(
template.get("verify", []),
params,
ActionPhase.VERIFY,
)
rollback_steps = self._generate_steps(
template.get("rollback", []),
params,
ActionPhase.ROLLBACK,
)
# 5. 計算預估時間
estimated_duration = self._estimate_duration(
pre_check_steps + execute_steps + verify_steps
)
# 6. 提取主要 kubectl 指令
kubectl_commands = [
step.command for step in execute_steps
if step.command.startswith("kubectl")
]
latency_ms = int((time.time() - start_time) * 1000)
# 7. 生成分析摘要
analysis = self._generate_analysis(
template["action_type"],
params.get("target", "unknown"),
len(execute_steps),
)
result = ActionPlan(
agent_name=self.AGENT_NAME,
status=AgentStatus.SUCCESS,
confidence=0.0, # 🔴 規則匹配/模板,非 AI 分析
analysis=analysis,
latency_ms=latency_ms,
action_type=template["action_type"],
pre_check_steps=pre_check_steps,
execute_steps=execute_steps,
verify_steps=verify_steps,
rollback_steps=rollback_steps,
estimated_duration_sec=estimated_duration,
requires_approval=template.get("requires_approval", True),
kubectl_commands=kubectl_commands,
)
self.logger.info(
"action_planning_complete",
action_type=result.action_type.value,
step_count=len(execute_steps),
latency_ms=latency_ms,
)
return result
except Exception as e:
latency_ms = int((time.time() - start_time) * 1000)
self.logger.exception(
"action_planning_error",
error=str(e),
)
return ActionPlan(
agent_name=self.AGENT_NAME,
status=AgentStatus.FAILED,
confidence=0.0,
analysis=f"計畫生成失敗: {str(e)}",
latency_ms=latency_ms,
error=str(e),
requires_approval=True,
)
def _determine_action_type(self, context: dict[str, Any]) -> str:
"""
根據上下文決定最佳動作類型
解析 problem 或 suggested_action 來決定
"""
# 如果有明確指定
suggested = context.get("suggested_action", "").lower()
if suggested in ACTION_TEMPLATES:
return suggested
# 從 problem 推斷
problem = context.get("problem", "").lower()
# 關鍵字匹配
if any(kw in problem for kw in ["crash", "restart", "oom", "killed"]):
return "restart"
if any(kw in problem for kw in ["slow", "latency", "capacity", "scale"]):
return "scale"
if any(kw in problem for kw in ["error", "failed", "rollback", "undo"]):
return "rollback"
if any(kw in problem for kw in ["stuck", "pending", "delete pod"]):
return "delete_pod"
# 預設: 重啟 (最安全)
return "restart"
def _prepare_params(self, context: dict[str, Any]) -> dict[str, str]:
"""準備模板參數"""
target = context.get("target_service", "unknown")
namespace = context.get("namespace", self.default_namespace)
# 處理 target 可能是列表的情況
if isinstance(target, list):
target = target[0] if target else "unknown"
return {
"target": target,
"namespace": namespace,
"replicas": str(context.get("replicas", 3)),
"original_replicas": str(context.get("original_replicas", 1)),
}
def _generate_steps(
self,
template_steps: list[dict[str, Any]],
params: dict[str, str],
phase: ActionPhase,
) -> list[ActionStep]:
"""從模板生成實際步驟"""
steps: list[ActionStep] = []
for i, tmpl in enumerate(template_steps):
command = tmpl["command"].format(**params)
description = tmpl["description"].format(**params)
steps.append(ActionStep(
command=command,
description=description,
phase=phase,
timeout_sec=tmpl.get("timeout_sec", 60),
can_fail=tmpl.get("can_fail", False),
order=i,
))
return steps
def _estimate_duration(self, steps: list[ActionStep]) -> int:
"""估計執行時間 (秒)"""
total = 0
for step in steps:
# 假設每個步驟平均執行時間為 timeout 的 1/3
total += step.timeout_sec // 3
return max(total, 30) # 最少 30 秒
def _generate_analysis(
self,
action_type: ActionType,
target: str,
step_count: int,
) -> str:
"""生成分析摘要"""
action_desc = {
ActionType.RESTART: "滾動重啟",
ActionType.SCALE: "擴縮容",
ActionType.ROLLBACK: "版本回滾",
ActionType.DELETE: "資源清理",
ActionType.PATCH: "配置修補",
ActionType.APPLY: "配置應用",
ActionType.EXEC: "指令執行",
ActionType.CUSTOM: "自訂操作",
}
return (
f"建議執行 {action_desc.get(action_type, '操作')} "
f"{target},共 {step_count} 個步驟"
)
def _build_prompt(self, context: dict[str, Any]) -> str:
"""建構 LLM Prompt (Phase 9.4 擴展)"""
return f"""你是 AWOOOI 的行動規劃師。
根據以下問題制定修復計畫:
問題描述: {context.get("problem", "N/A")}
目標服務: {context.get("target_service", "N/A")}
命名空間: {context.get("namespace", "awoooi-prod")}
注意:
- 所有 kubectl 必須帶 -n {{namespace}}
- 必須包含前置檢查、執行步驟、驗證步驟、回滾方案
輸出 JSON:
```json
{{
"action_type": "restart|scale|rollback|delete",
"pre_check_steps": [
{{"command": "kubectl ...", "description": "..."}}
],
"execute_steps": [
{{"command": "kubectl ...", "description": "..."}}
],
"verify_steps": [
{{"command": "kubectl ...", "description": "..."}}
],
"rollback_steps": [
{{"command": "kubectl ...", "description": "..."}}
],
"estimated_duration_sec": 60,
"analysis": "一句話摘要",
"confidence": 0-1
}}
```"""
def _parse_response(self, response: str) -> dict[str, Any]:
"""解析 LLM 回應"""
return self._extract_json(response)