""" Agent (OpenClaw) Endpoints ADR-005: BFF 架構 - 所有 AI 調用經過 BFF Phase 1.2: 真實 Ollama 串接 """ import json import logging from datetime import datetime from typing import Literal from uuid import UUID, uuid4 import httpx from fastapi import APIRouter, Query from fastapi.responses import StreamingResponse from pydantic import BaseModel router = APIRouter() logger = logging.getLogger(__name__) # ==================== Ollama Config ==================== # 2026-05-19 Codex: agent thinking stream follows GCP-A → GCP-B → 111. def _get_ollama_endpoints(): from src.services.ollama_endpoint_resolver import resolve_ollama_order return resolve_ollama_order("interactive") OLLAMA_MODEL = "llama3.2:latest" # 可根據實際部署調整 OLLAMA_TIMEOUT = 120.0 # 串流超時 class ChatRequest(BaseModel): message: str conversation_id: UUID | None = None context: dict | None = None class SuggestedAction(BaseModel): id: str label: str description: str | None = None risk_level: Literal["low", "medium", "high", "critical"] class ChatResponse(BaseModel): message: str conversation_id: UUID actions: list[SuggestedAction] | None = None requires_approval: bool = False approval_id: UUID | None = None class AgentStatus(BaseModel): status: Literal["idle", "thinking", "executing", "waiting_approval"] active_conversations: int current_task: str | None = None last_activity: datetime | None = None @router.post("/chat", response_model=ChatResponse) async def chat_with_agent(request: ChatRequest) -> ChatResponse: """與 OpenClaw 對話""" conversation_id = request.conversation_id or uuid4() # TODO: 實際調用 OpenClaw return ChatResponse( message=f"收到訊息: {request.message}", conversation_id=conversation_id, requires_approval=False, ) @router.post("/chat/stream") async def chat_with_agent_stream(request: ChatRequest) -> StreamingResponse: """與 OpenClaw 對話 (SSE 串流)""" async def generate(): # TODO: 實際串流 yield "data: Hello from OpenClaw\n\n" yield "data: [DONE]\n\n" return StreamingResponse( generate(), media_type="text/event-stream", ) @router.get("/status", response_model=AgentStatus) async def get_agent_status() -> AgentStatus: """OpenClaw 狀態""" return AgentStatus( status="idle", active_conversations=0, current_task=None, last_activity=datetime.utcnow(), ) @router.get("/thinking") async def get_agent_thinking( prompt: str = Query( default="你是 AWOOOI 智能運維助手。請簡短分析一下目前系統的健康狀態,用中文回答。", description="發送給 AI 的提示詞", ), model: str = Query(default=OLLAMA_MODEL, description="Ollama 模型名稱"), ) -> StreamingResponse: """ OpenClaw 思考軌跡 (SSE 串流) Phase 1.2: 真實串接設定中的 Ollama provider pool """ async def generate_thinking_stream(): """串接 Ollama 並轉換為 SSE 格式""" # 1. 開始思考 yield f"data: {json.dumps({'type': 'thinking', 'content': '正在連接 AI 模型...'}, ensure_ascii=False)}\n\n" last_error = "" async with httpx.AsyncClient(timeout=OLLAMA_TIMEOUT) as client: # 2. 發送請求到 Ollama yield f"data: {json.dumps({'type': 'thinking', 'content': f'模型: {model}'}, ensure_ascii=False)}\n\n" for endpoint in _get_ollama_endpoints(): if not endpoint.url: continue try: async with client.stream( "POST", f"{endpoint.url}/api/generate", json={ "model": model, "prompt": prompt, "stream": True, }, ) as response: if response.status_code != 200: last_error = f"HTTP {response.status_code}" logger.warning( "agent_thinking_ollama_http_error", provider=endpoint.provider_name, status=response.status_code, ) continue yield f"data: {json.dumps({'type': 'thinking', 'content': '開始接收 AI 回應...'}, ensure_ascii=False)}\n\n" # 3. 串流讀取 Ollama 回應 buffer = "" async for line in response.aiter_lines(): if not line: continue try: chunk = json.loads(line) token = chunk.get("response", "") done = chunk.get("done", False) if token: # 累積 token,每 10 字符或遇到標點符號時發送 buffer += token if len(buffer) >= 10 or any(p in buffer for p in "。!?,、\n"): yield f"data: {json.dumps({'type': 'thinking', 'content': buffer}, ensure_ascii=False)}\n\n" buffer = "" if done: # 發送剩餘 buffer if buffer: yield f"data: {json.dumps({'type': 'thinking', 'content': buffer}, ensure_ascii=False)}\n\n" # 發送完成訊息 yield f"data: {json.dumps({'type': 'result', 'content': '分析完成'}, ensure_ascii=False)}\n\n" yield "data: [DONE]\n\n" return except json.JSONDecodeError as e: logger.warning(f"JSON 解析失敗: {line[:100]}... - {e}") continue except (httpx.ConnectError, httpx.TimeoutException) as e: last_error = type(e).__name__ logger.error( "agent_thinking_ollama_endpoint_failed", provider=endpoint.provider_name, error=str(e), ) except Exception as e: last_error = str(e) logger.error( "agent_thinking_unknown_error", provider=endpoint.provider_name, error=str(e), ) error_content = f"Ollama 全端點不可用: {last_error or 'unknown'}" yield f"data: {json.dumps({'type': 'error', 'content': error_content}, ensure_ascii=False)}\n\n" # 4. 結束標記 yield "data: [DONE]\n\n" return StreamingResponse( generate_thinking_stream(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no", # 禁用 Nginx 緩衝 }, )