- Import sorting (I001) - Unused imports (F401) - f-string without placeholders (F541) - Loop variable unused (B007) - zip() strict parameter (B905) - Exception chaining (B904) - collections.abc imports (UP035) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
665 lines
19 KiB
Python
665 lines
19 KiB
Python
"""
|
|
Agent Service - Phase 17 技術債修復
|
|
===================================
|
|
Phase 17 R4: 從 agents.py Router 抽離 Redis 操作
|
|
|
|
設計原則:
|
|
- Router 層只做 HTTP 路由,不直接存取 Redis
|
|
- Service 層封裝所有業務邏輯與 Redis 操作
|
|
- 使用 Protocol 定義介面,支援依賴注入
|
|
|
|
版本: v1.0
|
|
建立: 2026-03-26 (台北時區)
|
|
建立者: Claude Code (Phase 17 技術債修復)
|
|
"""
|
|
|
|
import json
|
|
from enum import Enum
|
|
from typing import Any, Protocol, runtime_checkable
|
|
from uuid import uuid4
|
|
|
|
from src.core.logging import get_logger
|
|
from src.core.redis_client import get_redis
|
|
from src.core.sse import EventType, SSEEvent, get_publisher
|
|
from src.models.incident import Incident, IncidentStatus, Severity, Signal
|
|
from src.services.consensus_engine import get_consensus_engine
|
|
from src.utils.timezone import now_taipei, now_taipei_iso
|
|
|
|
logger = get_logger("awoooi.agent_service")
|
|
|
|
|
|
# =============================================================================
|
|
# Constants
|
|
# =============================================================================
|
|
|
|
TASK_PREFIX = "agent_task:"
|
|
INCIDENT_PREFIX = "incident:"
|
|
TASK_TTL = 604800 # 7 天
|
|
|
|
|
|
# =============================================================================
|
|
# Task States
|
|
# =============================================================================
|
|
|
|
class TaskState(str, Enum):
|
|
"""分析任務狀態"""
|
|
PENDING = "pending" # 等待中
|
|
ANALYZING = "analyzing" # 分析中
|
|
CONSENSUS = "consensus" # 共識計算中
|
|
COMPLETED = "completed" # 已完成
|
|
FAILED = "failed" # 失敗
|
|
|
|
|
|
# =============================================================================
|
|
# Protocol Interface
|
|
# =============================================================================
|
|
|
|
@runtime_checkable
|
|
class IAgentTaskRepository(Protocol):
|
|
"""
|
|
Agent Task Repository Protocol
|
|
|
|
職責: Agent Task CRUD 操作 (Redis Working Memory)
|
|
"""
|
|
|
|
async def create_task(
|
|
self,
|
|
task_id: str,
|
|
incident_id: str,
|
|
trigger: str = "manual",
|
|
) -> dict[str, Any]:
|
|
"""建立新的分析任務"""
|
|
...
|
|
|
|
async def get_task(self, task_id: str) -> dict[str, Any] | None:
|
|
"""取得任務資料"""
|
|
...
|
|
|
|
async def update_task_state(
|
|
self,
|
|
task_id: str,
|
|
state: TaskState,
|
|
progress: int = 0,
|
|
current_step: str | None = None,
|
|
agents_completed: int = 0,
|
|
**extra_data: Any,
|
|
) -> bool:
|
|
"""更新任務狀態"""
|
|
...
|
|
|
|
async def save_task_result(
|
|
self,
|
|
task_id: str,
|
|
result_data: dict[str, Any],
|
|
) -> bool:
|
|
"""儲存任務結果"""
|
|
...
|
|
|
|
async def get_incident(self, incident_id: str) -> Incident | None:
|
|
"""從 Redis 取得 Incident"""
|
|
...
|
|
|
|
|
|
# =============================================================================
|
|
# Repository Implementation
|
|
# =============================================================================
|
|
|
|
class AgentTaskRedisRepository:
|
|
"""
|
|
Agent Task Redis Repository
|
|
|
|
實作 IAgentTaskRepository Protocol
|
|
所有 Redis 操作都封裝在此
|
|
"""
|
|
|
|
async def create_task(
|
|
self,
|
|
task_id: str,
|
|
incident_id: str,
|
|
trigger: str = "manual",
|
|
) -> dict[str, Any]:
|
|
"""
|
|
建立新的分析任務
|
|
|
|
Args:
|
|
task_id: 任務 ID
|
|
incident_id: 關聯的 Incident ID
|
|
trigger: 觸發來源 (manual/auto)
|
|
|
|
Returns:
|
|
建立的任務資料
|
|
"""
|
|
redis_client = get_redis()
|
|
task_key = f"{TASK_PREFIX}{task_id}"
|
|
|
|
task_data = {
|
|
"task_id": task_id,
|
|
"state": TaskState.PENDING.value,
|
|
"progress": 0,
|
|
"current_step": "任務已建立",
|
|
"agents_completed": 0,
|
|
"total_agents": 4,
|
|
"incident_id": incident_id,
|
|
"started_at": now_taipei_iso(),
|
|
"trigger": trigger,
|
|
}
|
|
|
|
await redis_client.set(
|
|
task_key,
|
|
json.dumps(task_data),
|
|
ex=TASK_TTL,
|
|
)
|
|
|
|
logger.info(
|
|
"task_created",
|
|
task_id=task_id,
|
|
incident_id=incident_id,
|
|
trigger=trigger,
|
|
)
|
|
|
|
return task_data
|
|
|
|
async def get_task(self, task_id: str) -> dict[str, Any] | None:
|
|
"""
|
|
取得任務資料
|
|
|
|
Args:
|
|
task_id: 任務 ID
|
|
|
|
Returns:
|
|
任務資料字典,不存在則返回 None
|
|
"""
|
|
redis_client = get_redis()
|
|
task_key = f"{TASK_PREFIX}{task_id}"
|
|
|
|
data = await redis_client.get(task_key)
|
|
if data is None:
|
|
return None
|
|
|
|
return json.loads(data)
|
|
|
|
async def update_task_state(
|
|
self,
|
|
task_id: str,
|
|
state: TaskState,
|
|
progress: int = 0,
|
|
current_step: str | None = None,
|
|
agents_completed: int = 0,
|
|
**extra_data: Any,
|
|
) -> bool:
|
|
"""
|
|
更新任務狀態
|
|
|
|
Args:
|
|
task_id: 任務 ID
|
|
state: 新狀態
|
|
progress: 進度百分比 (0-100)
|
|
current_step: 目前步驟描述
|
|
agents_completed: 已完成的 Agent 數量
|
|
**extra_data: 額外資料
|
|
|
|
Returns:
|
|
是否更新成功
|
|
"""
|
|
redis_client = get_redis()
|
|
task_key = f"{TASK_PREFIX}{task_id}"
|
|
|
|
try:
|
|
# 讀取現有資料
|
|
existing = await redis_client.get(task_key)
|
|
if existing:
|
|
task_data = json.loads(existing)
|
|
else:
|
|
task_data = {"task_id": task_id}
|
|
|
|
# 更新欄位
|
|
task_data.update({
|
|
"state": state.value,
|
|
"progress": progress,
|
|
"current_step": current_step,
|
|
"agents_completed": agents_completed,
|
|
**extra_data,
|
|
})
|
|
|
|
await redis_client.set(
|
|
task_key,
|
|
json.dumps(task_data),
|
|
ex=TASK_TTL,
|
|
)
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.exception(
|
|
"update_task_state_error",
|
|
task_id=task_id,
|
|
error=str(e),
|
|
)
|
|
return False
|
|
|
|
async def save_task_result(
|
|
self,
|
|
task_id: str,
|
|
result_data: dict[str, Any],
|
|
) -> bool:
|
|
"""
|
|
儲存任務結果
|
|
|
|
Args:
|
|
task_id: 任務 ID
|
|
result_data: 完整結果資料
|
|
|
|
Returns:
|
|
是否儲存成功
|
|
"""
|
|
redis_client = get_redis()
|
|
task_key = f"{TASK_PREFIX}{task_id}"
|
|
|
|
try:
|
|
await redis_client.set(
|
|
task_key,
|
|
json.dumps(result_data),
|
|
ex=TASK_TTL,
|
|
)
|
|
|
|
logger.info(
|
|
"task_result_saved",
|
|
task_id=task_id,
|
|
state=result_data.get("state"),
|
|
)
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.exception(
|
|
"save_task_result_error",
|
|
task_id=task_id,
|
|
error=str(e),
|
|
)
|
|
return False
|
|
|
|
async def get_incident(self, incident_id: str) -> Incident | None:
|
|
"""
|
|
從 Redis 取得 Incident
|
|
|
|
Args:
|
|
incident_id: Incident ID
|
|
|
|
Returns:
|
|
Incident 物件,不存在則返回 None
|
|
"""
|
|
redis_client = get_redis()
|
|
key = f"{INCIDENT_PREFIX}{incident_id}"
|
|
|
|
data = await redis_client.get(key)
|
|
if data is None:
|
|
return None
|
|
|
|
return Incident.model_validate_json(data)
|
|
|
|
|
|
# =============================================================================
|
|
# Agent Service
|
|
# =============================================================================
|
|
|
|
class AgentService:
|
|
"""
|
|
Agent Service - Agent Teams 業務邏輯
|
|
|
|
職責:
|
|
1. 任務生命週期管理
|
|
2. 協調 ConsensusEngine 執行分析
|
|
3. 推送 SSE 進度通知
|
|
|
|
使用方式:
|
|
service = get_agent_service()
|
|
task_id = await service.create_analysis_task(incident_id)
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
repository: IAgentTaskRepository | None = None,
|
|
) -> None:
|
|
"""
|
|
初始化 Agent Service
|
|
|
|
Args:
|
|
repository: Task Repository (預設使用 Redis 實作)
|
|
"""
|
|
self._repository = repository or AgentTaskRedisRepository()
|
|
|
|
# =========================================================================
|
|
# Task Management
|
|
# =========================================================================
|
|
|
|
def generate_task_id(self) -> str:
|
|
"""產生新的 Task ID"""
|
|
return f"TASK-{now_taipei().strftime('%Y%m%d')}-{uuid4().hex[:8].upper()}"
|
|
|
|
async def create_analysis_task(
|
|
self,
|
|
incident: Incident,
|
|
trigger: str = "manual",
|
|
) -> str:
|
|
"""
|
|
建立分析任務
|
|
|
|
Args:
|
|
incident: 要分析的 Incident
|
|
trigger: 觸發來源 (manual/auto)
|
|
|
|
Returns:
|
|
task_id
|
|
"""
|
|
task_id = self.generate_task_id()
|
|
|
|
await self._repository.create_task(
|
|
task_id=task_id,
|
|
incident_id=incident.incident_id,
|
|
trigger=trigger,
|
|
)
|
|
|
|
logger.info(
|
|
"analysis_task_created",
|
|
task_id=task_id,
|
|
incident_id=incident.incident_id,
|
|
severity=incident.severity.value,
|
|
)
|
|
|
|
return task_id
|
|
|
|
async def get_task_status(self, task_id: str) -> dict[str, Any] | None:
|
|
"""取得任務狀態"""
|
|
return await self._repository.get_task(task_id)
|
|
|
|
async def get_task_result(self, task_id: str) -> dict[str, Any] | None:
|
|
"""取得任務結果"""
|
|
return await self._repository.get_task(task_id)
|
|
|
|
async def get_incident(self, incident_id: str) -> Incident | None:
|
|
"""取得 Incident"""
|
|
return await self._repository.get_incident(incident_id)
|
|
|
|
# =========================================================================
|
|
# Analysis Execution
|
|
# =========================================================================
|
|
|
|
async def run_analysis(
|
|
self,
|
|
task_id: str,
|
|
incident: Incident,
|
|
) -> None:
|
|
"""
|
|
執行 Agent Teams 分析
|
|
|
|
流程:
|
|
1. 更新狀態為 ANALYZING
|
|
2. 收集各專家意見
|
|
3. 計算共識
|
|
4. 儲存結果
|
|
5. 推送 SSE 通知
|
|
|
|
Args:
|
|
task_id: 任務 ID
|
|
incident: 要分析的 Incident
|
|
"""
|
|
consensus_engine = get_consensus_engine()
|
|
|
|
try:
|
|
# Step 1: 更新狀態
|
|
await self._repository.update_task_state(
|
|
task_id,
|
|
TaskState.ANALYZING,
|
|
progress=10,
|
|
current_step="正在收集專家意見...",
|
|
)
|
|
|
|
# 推送 SSE 進度
|
|
publisher = await get_publisher()
|
|
await publisher.publish(SSEEvent(
|
|
type=EventType.AI_THINKING,
|
|
data={
|
|
"task_id": task_id,
|
|
"state": TaskState.ANALYZING.value,
|
|
"progress": 10,
|
|
"message": "Agent Teams 分析開始",
|
|
},
|
|
))
|
|
|
|
# Step 2: 收集意見
|
|
opinions = await consensus_engine.gather_opinions(
|
|
incident, timeout_sec=25.0
|
|
)
|
|
|
|
await self._repository.update_task_state(
|
|
task_id,
|
|
TaskState.CONSENSUS,
|
|
progress=60,
|
|
current_step="正在計算共識...",
|
|
agents_completed=len(opinions),
|
|
)
|
|
|
|
await publisher.publish(SSEEvent(
|
|
type=EventType.AI_THINKING,
|
|
data={
|
|
"task_id": task_id,
|
|
"state": TaskState.CONSENSUS.value,
|
|
"progress": 60,
|
|
"message": f"已收集 {len(opinions)} 位專家意見",
|
|
},
|
|
))
|
|
|
|
# Step 3: 計算共識
|
|
(
|
|
consensus_score,
|
|
recommended_action,
|
|
dissenting,
|
|
) = consensus_engine.calculate_consensus(opinions)
|
|
|
|
await self._repository.update_task_state(
|
|
task_id,
|
|
TaskState.CONSENSUS,
|
|
progress=80,
|
|
current_step="正在產生最終決策...",
|
|
)
|
|
|
|
# Step 4: 產生最終決策
|
|
result = await consensus_engine.generate_final_decision(
|
|
incident=incident,
|
|
opinions=opinions,
|
|
consensus_score=consensus_score,
|
|
recommended_action_type=recommended_action,
|
|
dissenting=dissenting,
|
|
)
|
|
|
|
# Step 5: 儲存完整結果
|
|
task_data = {
|
|
"task_id": task_id,
|
|
"state": TaskState.COMPLETED.value,
|
|
"progress": 100,
|
|
"current_step": "分析完成",
|
|
"agents_completed": len(opinions),
|
|
"total_agents": 4,
|
|
"consensus_id": result.consensus_id,
|
|
"incident_id": incident.incident_id,
|
|
"consensus_score": result.consensus_score,
|
|
"recommended_action": result.recommended_action,
|
|
"recommended_kubectl": result.recommended_kubectl,
|
|
"risk_level": result.risk_level,
|
|
"final_reasoning": result.final_reasoning,
|
|
"opinions": [op.to_dict() for op in result.opinions],
|
|
"dissenting_opinions": result.dissenting_opinions,
|
|
"completed_at": now_taipei_iso(),
|
|
}
|
|
|
|
await self._repository.save_task_result(task_id, task_data)
|
|
|
|
# 推送完成通知
|
|
await publisher.publish(SSEEvent(
|
|
type=EventType.AI_THINKING,
|
|
data={
|
|
"task_id": task_id,
|
|
"state": TaskState.COMPLETED.value,
|
|
"progress": 100,
|
|
"message": "分析完成",
|
|
"consensus_score": result.consensus_score,
|
|
"recommended_action": result.recommended_action,
|
|
},
|
|
))
|
|
|
|
logger.info(
|
|
"analysis_completed",
|
|
task_id=task_id,
|
|
consensus_id=result.consensus_id,
|
|
consensus_score=result.consensus_score,
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.exception(
|
|
"analysis_failed",
|
|
task_id=task_id,
|
|
error=str(e),
|
|
)
|
|
|
|
# 更新為失敗狀態
|
|
task_data = {
|
|
"task_id": task_id,
|
|
"state": TaskState.FAILED.value,
|
|
"progress": 0,
|
|
"error": str(e),
|
|
"completed_at": now_taipei_iso(),
|
|
}
|
|
|
|
await self._repository.save_task_result(task_id, task_data)
|
|
|
|
# 推送失敗通知
|
|
publisher = await get_publisher()
|
|
await publisher.publish(SSEEvent(
|
|
type=EventType.ERROR,
|
|
data={
|
|
"task_id": task_id,
|
|
"state": TaskState.FAILED.value,
|
|
"error": str(e),
|
|
},
|
|
))
|
|
|
|
# =========================================================================
|
|
# Incident Integration
|
|
# =========================================================================
|
|
|
|
def should_trigger_agent_analysis(self, incident: Incident) -> bool:
|
|
"""
|
|
判斷是否需要觸發 Agent Teams 分析
|
|
|
|
條件 (任一符合):
|
|
- P0/P1 緊急事件
|
|
- 多個服務受影響 (>2)
|
|
- 多個告警 (>3)
|
|
|
|
Args:
|
|
incident: 要判斷的 Incident
|
|
|
|
Returns:
|
|
是否應觸發分析
|
|
"""
|
|
return (
|
|
# P0/P1 緊急事件
|
|
incident.severity in (Severity.P0, Severity.P1)
|
|
# 或多個服務受影響
|
|
or len(incident.affected_services) > 2
|
|
# 或多個告警
|
|
or len(incident.signals) > 3
|
|
)
|
|
|
|
async def trigger_for_incident(
|
|
self,
|
|
incident_id: str,
|
|
) -> tuple[str | None, Incident | None]:
|
|
"""
|
|
為 Incident 觸發 Agent Teams 分析 (如果符合條件)
|
|
|
|
Args:
|
|
incident_id: Incident ID
|
|
|
|
Returns:
|
|
(task_id, incident) - task_id 為 None 表示未觸發
|
|
"""
|
|
# 讀取 Incident
|
|
incident = await self._repository.get_incident(incident_id)
|
|
if incident is None:
|
|
logger.warning("trigger_skipped_not_found", incident_id=incident_id)
|
|
return None, None
|
|
|
|
# 判斷是否需要 Agent Teams
|
|
if not self.should_trigger_agent_analysis(incident):
|
|
logger.debug(
|
|
"trigger_skipped_simple_case",
|
|
incident_id=incident_id,
|
|
severity=incident.severity.value,
|
|
)
|
|
return None, incident
|
|
|
|
# 建立任務
|
|
task_id = await self.create_analysis_task(incident, trigger="auto")
|
|
|
|
logger.info(
|
|
"auto_trigger_success",
|
|
task_id=task_id,
|
|
incident_id=incident_id,
|
|
severity=incident.severity.value,
|
|
)
|
|
|
|
return task_id, incident
|
|
|
|
# =========================================================================
|
|
# Incident Factory
|
|
# =========================================================================
|
|
|
|
def create_adhoc_incident(
|
|
self,
|
|
severity: str,
|
|
affected_services: list[str],
|
|
alert_names: list[str] | None = None,
|
|
) -> Incident:
|
|
"""
|
|
建立臨時 Incident (用於直接分析請求)
|
|
|
|
Args:
|
|
severity: 嚴重度 (P0/P1/P2/P3)
|
|
affected_services: 受影響服務列表
|
|
alert_names: 告警名稱列表
|
|
|
|
Returns:
|
|
建立的 Incident
|
|
"""
|
|
signals = []
|
|
if alert_names:
|
|
for alert_name in alert_names:
|
|
signals.append(Signal(
|
|
alert_name=alert_name,
|
|
severity=Severity(severity),
|
|
source="manual",
|
|
fired_at=now_taipei(),
|
|
))
|
|
|
|
return Incident(
|
|
severity=Severity(severity),
|
|
status=IncidentStatus.INVESTIGATING,
|
|
signals=signals,
|
|
affected_services=affected_services,
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Singleton
|
|
# =============================================================================
|
|
|
|
_agent_service: AgentService | None = None
|
|
|
|
|
|
def get_agent_service() -> AgentService:
|
|
"""取得 Agent Service 實例 (Singleton)"""
|
|
global _agent_service
|
|
if _agent_service is None:
|
|
_agent_service = AgentService()
|
|
return _agent_service
|