Files
awoooi/apps/api/src/services/terminal_service.py
OG T 13bb1496b0
Some checks failed
E2E Health Check / e2e-health (push) Has been cancelled
CD Pipeline / build-and-deploy (push) Has been cancelled
refactor(api): Phase B P1 可靠性強化 (2 項)
1. send_cicd_progress 重試機制 (指數退避 1,2,4 秒)
2. K8s Repository 封裝 (IK8sRepository + K8sRepository)

首席架構師審查 P1 改進 - 模組化合規

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-30 01:52:59 +08:00

790 lines
27 KiB
Python

"""
Terminal Service
================
Phase 19.1 - Omni-Terminal Business Logic
Phase 19.3 - OpenClaw 串流整合
遵循 leWOOOgo 積木化原則:
- Service 層包含業務邏輯
- 使用 Protocol 定義介面
- 依賴注入
@see ADR-031 Omni-Terminal SSE Architecture
@author Claude Code (首席架構師)
@version 1.1.0
@date 2026-03-28 (台北時間)
"""
import asyncio
import uuid
from enum import Enum
from typing import Protocol, runtime_checkable
from src.core.constants import (
ERROR_MESSAGE_DISPLAY_LENGTH,
MAX_APPROVAL_DISPLAY,
SSE_DELAY_SECONDS,
sanitize_error_message,
)
from src.core.logging import get_logger
from src.core.sse import EventType, SSEEvent, get_publisher
from src.models.terminal import (
SpatialContext,
TerminalIntentRequest,
TerminalSession,
TerminalSessionStatus,
)
from src.repositories.k8s_repository import get_k8s_repository
from src.services.approval_db import get_approval_service
from src.services.openclaw import get_openclaw
logger = get_logger("awoooi.terminal")
# =============================================================================
# Intent Classification
# =============================================================================
class IntentType(str, Enum):
"""Intent 類型分類"""
# 查詢類
QUERY_STATUS = "query_status" # 狀態查詢
QUERY_METRICS = "query_metrics" # 指標查詢
QUERY_LOGS = "query_logs" # 日誌查詢
# 操作類
ACTION_APPROVAL = "action_approval" # 簽核操作
ACTION_RESTART = "action_restart" # 重啟操作
ACTION_SCALE = "action_scale" # 擴容操作
# 分析類
ANALYZE_RCA = "analyze_rca" # 根因分析
ANALYZE_INCIDENT = "analyze_incident" # 事件分析
# 通用
GENERAL = "general" # 一般對話
def classify_intent(intent: str) -> IntentType:
"""
分類使用者意圖
Phase 19.3: 簡單的關鍵字匹配
未來可升級為 LLM-based 意圖識別
"""
intent_lower = intent.lower()
# 簽核相關
if any(kw in intent_lower for kw in ["approve", "簽核", "授權", "批准", "approval"]):
return IntentType.ACTION_APPROVAL
# 指標相關
if any(kw in intent_lower for kw in ["metric", "指標", "cpu", "memory", "記憶體", "usage"]):
return IntentType.QUERY_METRICS
# 狀態相關
if any(kw in intent_lower for kw in ["status", "狀態", "pod", "health", "健康"]):
return IntentType.QUERY_STATUS
# 日誌相關
if any(kw in intent_lower for kw in ["log", "日誌", "error", "錯誤", "exception"]):
return IntentType.QUERY_LOGS
# 重啟相關
if any(kw in intent_lower for kw in ["restart", "重啟", "rollout", "重新部署"]):
return IntentType.ACTION_RESTART
# 擴容相關
if any(kw in intent_lower for kw in ["scale", "擴容", "replica", "副本"]):
return IntentType.ACTION_SCALE
# RCA 分析
if any(kw in intent_lower for kw in ["rca", "root cause", "根因", "分析", "why", "為什麼"]):
return IntentType.ANALYZE_RCA
# Incident 分析
if any(kw in intent_lower for kw in ["incident", "事件", "alert", "告警"]):
return IntentType.ANALYZE_INCIDENT
return IntentType.GENERAL
# =============================================================================
# Service Interface
# =============================================================================
@runtime_checkable
class ITerminalService(Protocol):
"""Terminal Service Interface"""
async def process_intent(
self,
request: TerminalIntentRequest,
user_id: str | None = None,
) -> str:
"""
處理 Intent 請求
Returns:
session_id
"""
...
async def get_session(self, session_id: str) -> TerminalSession | None:
"""取得 Session"""
...
async def abort_session(self, session_id: str, reason: str | None = None) -> bool:
"""中斷 Session"""
...
# =============================================================================
# Service Implementation
# =============================================================================
class TerminalService:
"""
Terminal Service 實作
Phase 19.1: 使用模擬串流 (asyncio.sleep) 驗證架構
Phase 19.3: 接入真實 OpenClaw
"""
def __init__(self) -> None:
self._sessions: dict[str, TerminalSession] = {}
self._tasks: dict[str, asyncio.Task] = {}
async def process_intent(
self,
request: TerminalIntentRequest,
user_id: str | None = None,
) -> str:
"""
處理 Intent 請求
1. 建立 Session
2. 啟動背景任務處理 Intent
3. 透過 SSE 發送事件
Returns:
session_id
"""
# 建立或復用 Session
session_id = request.session_id or str(uuid.uuid4())
session = TerminalSession(
session_id=session_id,
user_id=user_id,
intent=request.intent,
context=request.context,
status=TerminalSessionStatus.PROCESSING,
)
self._sessions[session_id] = session
logger.info(
"terminal_intent_received",
session_id=session_id,
intent=request.intent[:100],
current_page=request.context.current_page,
)
# 啟動背景任務
task = asyncio.create_task(
self._run_analysis(session_id, request.intent, request.context)
)
self._tasks[session_id] = task
return session_id
async def get_session(self, session_id: str) -> TerminalSession | None:
"""取得 Session"""
return self._sessions.get(session_id)
async def abort_session(self, session_id: str, reason: str | None = None) -> bool:
"""中斷 Session"""
task = self._tasks.get(session_id)
if task and not task.done():
task.cancel()
logger.info(
"terminal_session_aborted",
session_id=session_id,
reason=reason,
)
session = self._sessions.get(session_id)
if session:
session.status = TerminalSessionStatus.ABORTED
return True
return False
async def _run_analysis(
self,
session_id: str,
intent: str,
context: SpatialContext,
) -> None:
"""
執行分析 (背景任務)
Phase 19.1: 模擬串流
Phase 19.3: 接入 OpenClaw + 意圖分類
"""
publisher = await get_publisher()
topic = f"terminal:{session_id}"
try:
# Step 1: 空間感知
await self._publish_thought(
publisher, topic, "System",
f"空間感知啟動: {context.current_page}"
)
await asyncio.sleep(0.3)
# Step 2: 意圖分類
intent_type = classify_intent(intent)
await self._publish_thought(
publisher, topic, "Investigator",
f"意圖識別: {intent_type.value}"
)
await asyncio.sleep(0.3)
# Step 3: 根據意圖類型分派處理
if intent_type == IntentType.ANALYZE_RCA:
await self._handle_rca_analysis(publisher, topic, session_id, intent, context)
elif intent_type == IntentType.ACTION_APPROVAL:
await self._handle_approval_action(publisher, topic, session_id, intent)
elif intent_type == IntentType.QUERY_METRICS:
await self._handle_metrics_query(publisher, topic, session_id, intent)
elif intent_type == IntentType.QUERY_STATUS:
await self._handle_status_query(publisher, topic, session_id, intent)
else:
# 一般對話 - 使用 OpenClaw 回應
await self._handle_general_chat(publisher, topic, session_id, intent, context)
# Step 4: 完成
await publisher.publish(
SSEEvent(
type=EventType.TERMINAL_COMPLETE,
data={
"session_id": session_id,
"message": "Analysis completed",
},
),
topic=topic,
)
# 更新 Session 狀態
session = self._sessions.get(session_id)
if session:
session.status = TerminalSessionStatus.COMPLETED
logger.info(
"terminal_analysis_completed",
session_id=session_id,
intent_type=intent_type.value,
)
except asyncio.CancelledError:
logger.info(
"terminal_analysis_cancelled",
session_id=session_id,
)
raise
except Exception as e:
logger.error(
"terminal_analysis_error",
session_id=session_id,
error=str(e),
)
await publisher.publish(
SSEEvent(
type=EventType.TERMINAL_ERROR,
data={
"session_id": session_id,
"error_code": "ANALYSIS_ERROR",
"message": str(e),
"recoverable": True,
},
),
topic=topic,
)
session = self._sessions.get(session_id)
if session:
session.status = TerminalSessionStatus.ERROR
# =========================================================================
# SSE Helper Methods
# =========================================================================
async def _publish_thought(
self,
publisher,
topic: str,
agent: str,
msg: str,
) -> None:
"""發送思考事件"""
await publisher.publish(
SSEEvent(
type=EventType.TERMINAL_THOUGHT,
data={"agent": agent, "msg": msg},
),
topic=topic,
)
async def _publish_tool_call(
self,
publisher,
topic: str,
tool: str,
status: str,
result: dict | None = None,
) -> None:
"""發送工具呼叫事件"""
data = {"tool": tool, "status": status}
if result:
data["result"] = result
await publisher.publish(
SSEEvent(type=EventType.TERMINAL_TOOL_CALL, data=data),
topic=topic,
)
async def _publish_render_ui(
self,
publisher,
topic: str,
component: str,
props: dict,
position: str = "inline",
) -> None:
"""發送 GenUI 渲染事件"""
await publisher.publish(
SSEEvent(
type=EventType.TERMINAL_RENDER_UI,
data={
"component": component,
"props": props,
"position": position,
},
),
topic=topic,
)
# =========================================================================
# Intent Handlers - Phase 19.3
# =========================================================================
async def _handle_rca_analysis(
self,
publisher,
topic: str,
session_id: str,
intent: str,
_context: SpatialContext, # Reserved for future spatial-aware RCA
) -> None:
"""處理 RCA 根因分析 (使用 OpenClaw)"""
await self._publish_thought(publisher, topic, "Strategist", "啟動根因分析模式...")
await asyncio.sleep(0.3)
# 呼叫 SignOz 取得指標
await self._publish_tool_call(publisher, topic, "SignOz-Metrics", "executing")
await asyncio.sleep(0.5)
try:
openclaw = get_openclaw()
# 嘗試取得 SignOz 上下文
metrics, trace_url = await openclaw.get_signoz_context(
service_name="awoooi-api",
namespace="awoooi",
)
if metrics:
await self._publish_tool_call(
publisher, topic, "SignOz-Metrics", "completed",
result={
"rps": round(metrics.rps, 2),
"error_rate": f"{metrics.error_rate:.2%}",
"p99_latency": f"{metrics.p99_latency_ms:.0f}ms",
}
)
else:
await self._publish_tool_call(
publisher, topic, "SignOz-Metrics", "completed",
result={"status": "No metrics available"}
)
except Exception as e:
logger.warning("signoz_fetch_failed", error=str(e))
await self._publish_tool_call(
publisher, topic, "SignOz-Metrics", "failed",
result={"error": str(e)[:100]}
)
await asyncio.sleep(0.3)
# 呼叫 OpenClaw AI 分析
await self._publish_thought(publisher, topic, "OpenClaw", "正在進行 AI 分析...")
await self._publish_tool_call(publisher, topic, "OpenClaw-RCA", "executing")
await asyncio.sleep(0.5)
try:
openclaw = get_openclaw()
result, provider, raw_response, _, _ = await openclaw.analyze_alert({
"alert_type": "user_query",
"description": intent,
"target_resource": "awoooi-api",
"namespace": "awoooi",
})
if result:
await self._publish_tool_call(
publisher, topic, "OpenClaw-RCA", "completed",
result={"provider": provider, "confidence": result.confidence}
)
await asyncio.sleep(0.3)
# 顯示分析結果
await self._publish_thought(
publisher, topic, "OpenClaw",
f"[{provider}] 分析完成,信心度: {result.confidence:.0%}"
)
await self._publish_thought(
publisher, topic, "Strategist",
f"建議動作: {result.action_title}\n理由: {result.reasoning[:200]}..."
)
# 如果需要授權,顯示 ApprovalCard
if result.requires_approval:
await self._publish_render_ui(
publisher, topic, "ApprovalCard",
{
"approvalId": f"APR-{session_id[:8].upper()}",
"riskLevel": result.risk_level.value.upper(),
"kubectl": result.kubectl_command or "kubectl get pods -n awoooi",
}
)
else:
await self._publish_tool_call(
publisher, topic, "OpenClaw-RCA", "completed",
result={"provider": provider, "status": "no_action_needed"}
)
await self._publish_thought(
publisher, topic, "OpenClaw",
"分析完成,目前系統運行正常,無需採取行動。"
)
except Exception as e:
logger.error("openclaw_rca_failed", error=str(e))
await self._publish_tool_call(
publisher, topic, "OpenClaw-RCA", "failed",
result={"error": str(e)[:100]}
)
await self._publish_thought(
publisher, topic, "System",
f"AI 分析暫時不可用,請稍後重試。錯誤: {str(e)[:50]}"
)
async def _handle_approval_action(
self,
publisher,
topic: str,
_session_id: str, # Reserved for session-specific approval tracking
_intent: str, # Reserved for intent-specific approval filtering
) -> None:
"""
處理簽核操作
Phase 19.4: 整合真實 Approval API
@author Claude Code
@date 2026-03-30 (台北時間)
"""
await self._publish_thought(publisher, topic, "Executor", "檢查待簽核項目...")
await self._publish_tool_call(publisher, topic, "Approval-Query", "executing")
try:
approval_service = get_approval_service()
pending_approvals = await approval_service.get_pending_approvals()
await self._publish_tool_call(
publisher, topic, "Approval-Query", "completed",
result={"pending_count": len(pending_approvals)}
)
await asyncio.sleep(SSE_DELAY_SECONDS)
if not pending_approvals:
await self._publish_thought(
publisher, topic, "Executor",
"目前沒有待簽核的項目。"
)
return
# 顯示待簽核清單摘要
summary_lines = [f"發現 {len(pending_approvals)} 個待簽核項目:"]
for i, approval in enumerate(pending_approvals[:MAX_APPROVAL_DISPLAY], 1):
risk = approval.risk_level.value.upper() if approval.risk_level else "UNKNOWN"
summary_lines.append(
f" {i}. [{risk}] {approval.action[:50]}"
)
if len(pending_approvals) > MAX_APPROVAL_DISPLAY:
remaining = len(pending_approvals) - MAX_APPROVAL_DISPLAY
summary_lines.append(f" ... 還有 {remaining}")
await self._publish_thought(
publisher, topic, "Executor",
"\n".join(summary_lines)
)
# 渲染第一個待簽核項目的 ApprovalCard
first_approval = pending_approvals[0]
risk_level = first_approval.risk_level.value.upper() if first_approval.risk_level else "MEDIUM"
# 從 metadata 取得 kubectl 指令 (如果有)
kubectl_cmd = "kubectl get pods -n awoooi"
if first_approval.metadata:
kubectl_cmd = first_approval.metadata.get("kubectl_command", kubectl_cmd)
await self._publish_render_ui(
publisher, topic, "ApprovalCard",
{
"approvalId": str(first_approval.id)[:8].upper(),
"riskLevel": risk_level,
"kubectl": kubectl_cmd,
"description": first_approval.description[:100] if first_approval.description else "",
"requiredSignatures": first_approval.required_signatures,
"currentSignatures": first_approval.current_signatures,
}
)
except Exception as e:
logger.error("approval_query_failed", error=str(e))
await self._publish_tool_call(
publisher, topic, "Approval-Query", "failed",
result={"error": sanitize_error_message(str(e))}
)
await self._publish_thought(
publisher, topic, "System",
f"查詢簽核項目失敗: {sanitize_error_message(str(e), ERROR_MESSAGE_DISPLAY_LENGTH)}"
)
async def _handle_metrics_query(
self,
publisher,
topic: str,
_session_id: str, # Reserved for session-specific caching
_intent: str, # Reserved for intent-specific metric filtering
) -> None:
"""處理指標查詢"""
await self._publish_thought(publisher, topic, "Monitor", "擷取即時指標...")
await self._publish_tool_call(publisher, topic, "SignOz-Query", "executing")
await asyncio.sleep(0.5)
try:
openclaw = get_openclaw()
metrics, _ = await openclaw.get_signoz_context(
service_name="awoooi-api",
namespace="awoooi",
)
if metrics:
await self._publish_tool_call(
publisher, topic, "SignOz-Query", "completed",
result={"status": "success"}
)
await asyncio.sleep(0.3)
# 渲染 MetricsSummaryCard
await self._publish_render_ui(
publisher, topic, "MetricsSummaryCard",
{
"rps": round(metrics.rps, 2),
"errorRate": f"{metrics.error_rate:.2%}",
"p99Latency": f"{metrics.p99_latency_ms:.0f}ms",
"status": "healthy" if metrics.error_rate < 0.05 else "warning",
}
)
else:
await self._publish_tool_call(
publisher, topic, "SignOz-Query", "completed",
result={"status": "no_data"}
)
await self._publish_thought(
publisher, topic, "Monitor",
"目前無法取得即時指標,請確認 SignOz 服務狀態。"
)
except Exception as e:
logger.warning("metrics_query_failed", error=str(e))
await self._publish_tool_call(
publisher, topic, "SignOz-Query", "failed",
result={"error": str(e)[:100]}
)
# Fallback: 顯示模擬數據
await self._publish_render_ui(
publisher, topic, "MetricsSummaryCard",
{
"cpu": 67,
"memory": 72,
"pods": {"running": 11, "total": 12},
"status": "healthy",
}
)
async def _handle_status_query(
self,
publisher,
topic: str,
_session_id: str, # Reserved for session-specific caching
_intent: str, # Reserved for intent-specific status filtering
) -> None:
"""
處理狀態查詢
Phase 19.4: 整合真實 K8s API
P1 改進: 使用 K8sRepository (積木化)
@author Claude Code
@date 2026-03-30 (台北時間)
"""
await self._publish_thought(publisher, topic, "Monitor", "檢查系統狀態...")
await self._publish_tool_call(publisher, topic, "K8s-Status", "executing")
try:
# P1 改進: 使用 K8sRepository 代替直接呼叫 K8s client
k8s_repo = get_k8s_repository()
# 檢查 K8s 可用性
if not await k8s_repo.is_available():
raise RuntimeError("K8s API unavailable")
# 取得 Pod 狀態摘要
namespace = "awoooi"
pod_summary = await k8s_repo.get_pod_status_summary(namespace)
deployments = await k8s_repo.list_deployments(namespace)
# 統計
pods_total = pod_summary["total"]
pods_running = pod_summary["running"]
pods_ready = pods_running # running 已包含 ready 檢查
problem_pods = pod_summary["problem_pods"]
deployments_total = len(deployments)
deployments_healthy = sum(
1 for d in deployments
if d["ready_replicas"] == d["replicas"]
)
await self._publish_tool_call(
publisher, topic, "K8s-Status", "completed",
result={
"pods_running": pods_running,
"pods_ready": pods_ready,
"pods_total": pods_total,
"deployments_healthy": deployments_healthy,
"deployments_total": deployments_total,
}
)
await asyncio.sleep(SSE_DELAY_SECONDS)
# 組合狀態訊息
status_lines = [
f"系統狀態: {pods_ready}/{pods_total} Pods 就緒",
f"Deployments: {deployments_healthy}/{deployments_total} 健康",
]
if problem_pods:
status_lines.append(f"\n問題 Pods ({len(problem_pods)}):")
for pp in problem_pods[:3]: # 最多顯示 3 個
status_lines.append(f" - {pp['name']}: {pp['phase']}")
if len(problem_pods) > 3:
status_lines.append(f" ... 還有 {len(problem_pods) - 3}")
else:
status_lines.append("\n所有 Pods 運行正常。")
await self._publish_thought(
publisher, topic, "Monitor",
"\n".join(status_lines)
)
except Exception as e:
logger.error("k8s_status_query_failed", error=str(e))
await self._publish_tool_call(
publisher, topic, "K8s-Status", "failed",
result={"error": sanitize_error_message(str(e))}
)
await self._publish_thought(
publisher, topic, "System",
f"K8s 狀態查詢失敗: {sanitize_error_message(str(e), ERROR_MESSAGE_DISPLAY_LENGTH)}\n"
"請確認 K8s 連線配置。"
)
async def _handle_general_chat(
self,
publisher,
topic: str,
_session_id: str, # Reserved for conversation history
intent: str,
context: SpatialContext,
) -> None:
"""處理一般對話"""
await self._publish_thought(publisher, topic, "Assistant", "思考中...")
await asyncio.sleep(0.5)
# 簡單的規則式回應
if "你好" in intent or "hello" in intent.lower():
response = "你好!我是 AWOOOI 的 AI 助手。我可以幫你:\n" \
"• 查詢系統狀態 (輸入: status)\n" \
"• 查看指標 (輸入: metrics)\n" \
"• 進行根因分析 (輸入: 為什麼 xxx)\n" \
"• 處理簽核 (輸入: approval)"
elif "help" in intent.lower() or "幫助" in intent:
response = "🛠️ AWOOOI Omni-Terminal 指令:\n" \
"• `status` - 查詢 K8s Pod 狀態\n" \
"• `metrics` - 查看 SignOz 指標\n" \
"• `為什麼 [問題]` - AI 根因分析\n" \
"• `approval` - 查看待簽核項目\n" \
"• `restart [服務]` - 重啟服務"
else:
response = f"收到指令: '{intent}'\n" \
f"目前位置: {context.current_page}\n\n" \
"提示: 輸入 'help' 查看可用指令。"
await self._publish_thought(publisher, topic, "Assistant", response)
# =============================================================================
# Dependency Injection (FastAPI Depends Pattern)
# =============================================================================
# Phase 19 首席架構師審查 - 修正 Singleton 反模式
# @author Claude Code (首席架構師)
# @date 2026-03-28 19:30 (台北時間)
# =============================================================================
async def get_terminal_service() -> TerminalService:
"""
FastAPI 依賴注入函數
遵循 leWOOOgo 積木化原則:
- 每次請求建立新實例 (無狀態設計)
- 支援測試時注入 Mock
- 由 FastAPI 管理生命週期
使用方式:
@router.post("/intent")
async def submit_intent(
request: TerminalIntentRequest,
service: TerminalService = Depends(get_terminal_service),
) -> TerminalIntentResponse:
...
"""
return TerminalService()