Files
awoooi/apps/api/src/routes/agent.py
Your Name 35fe37c82a
All checks were successful
Code Review / ai-code-review (push) Successful in 23s
CD Pipeline / tests (push) Successful in 5m51s
CD Pipeline / build-and-deploy (push) Successful in 3m29s
CD Pipeline / post-deploy-checks (push) Successful in 1m14s
fix(api): route direct ollama callers through ordered fallback
2026-05-19 12:56:13 +08:00

205 lines
7.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Agent (OpenClaw) Endpoints
ADR-005: BFF 架構 - 所有 AI 調用經過 BFF
Phase 1.2: 真實 Ollama 串接
"""
import json
import logging
from datetime import datetime
from typing import Literal
from uuid import UUID, uuid4
import httpx
from fastapi import APIRouter, Query
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
router = APIRouter()
logger = logging.getLogger(__name__)
# ==================== Ollama Config ====================
# 2026-05-19 Codex: agent thinking stream follows GCP-A → GCP-B → 111.
def _get_ollama_endpoints():
from src.services.ollama_endpoint_resolver import resolve_ollama_order
return resolve_ollama_order("interactive")
OLLAMA_MODEL = "llama3.2:latest" # 可根據實際部署調整
OLLAMA_TIMEOUT = 120.0 # 串流超時
class ChatRequest(BaseModel):
message: str
conversation_id: UUID | None = None
context: dict | None = None
class SuggestedAction(BaseModel):
id: str
label: str
description: str | None = None
risk_level: Literal["low", "medium", "high", "critical"]
class ChatResponse(BaseModel):
message: str
conversation_id: UUID
actions: list[SuggestedAction] | None = None
requires_approval: bool = False
approval_id: UUID | None = None
class AgentStatus(BaseModel):
status: Literal["idle", "thinking", "executing", "waiting_approval"]
active_conversations: int
current_task: str | None = None
last_activity: datetime | None = None
@router.post("/chat", response_model=ChatResponse)
async def chat_with_agent(request: ChatRequest) -> ChatResponse:
"""與 OpenClaw 對話"""
conversation_id = request.conversation_id or uuid4()
# TODO: 實際調用 OpenClaw
return ChatResponse(
message=f"收到訊息: {request.message}",
conversation_id=conversation_id,
requires_approval=False,
)
@router.post("/chat/stream")
async def chat_with_agent_stream(request: ChatRequest) -> StreamingResponse:
"""與 OpenClaw 對話 (SSE 串流)"""
async def generate():
# TODO: 實際串流
yield "data: Hello from OpenClaw\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(
generate(),
media_type="text/event-stream",
)
@router.get("/status", response_model=AgentStatus)
async def get_agent_status() -> AgentStatus:
"""OpenClaw 狀態"""
return AgentStatus(
status="idle",
active_conversations=0,
current_task=None,
last_activity=datetime.utcnow(),
)
@router.get("/thinking")
async def get_agent_thinking(
prompt: str = Query(
default="你是 AWOOOI 智能運維助手。請簡短分析一下目前系統的健康狀態,用中文回答。",
description="發送給 AI 的提示詞",
),
model: str = Query(default=OLLAMA_MODEL, description="Ollama 模型名稱"),
) -> StreamingResponse:
"""
OpenClaw 思考軌跡 (SSE 串流)
Phase 1.2: 真實串接設定中的 Ollama provider pool
"""
async def generate_thinking_stream():
"""串接 Ollama 並轉換為 SSE 格式"""
# 1. 開始思考
yield f"data: {json.dumps({'type': 'thinking', 'content': '正在連接 AI 模型...'}, ensure_ascii=False)}\n\n"
last_error = ""
async with httpx.AsyncClient(timeout=OLLAMA_TIMEOUT) as client:
# 2. 發送請求到 Ollama
yield f"data: {json.dumps({'type': 'thinking', 'content': f'模型: {model}'}, ensure_ascii=False)}\n\n"
for endpoint in _get_ollama_endpoints():
if not endpoint.url:
continue
try:
async with client.stream(
"POST",
f"{endpoint.url}/api/generate",
json={
"model": model,
"prompt": prompt,
"stream": True,
},
) as response:
if response.status_code != 200:
last_error = f"HTTP {response.status_code}"
logger.warning(
"agent_thinking_ollama_http_error",
provider=endpoint.provider_name,
status=response.status_code,
)
continue
yield f"data: {json.dumps({'type': 'thinking', 'content': '開始接收 AI 回應...'}, ensure_ascii=False)}\n\n"
# 3. 串流讀取 Ollama 回應
buffer = ""
async for line in response.aiter_lines():
if not line:
continue
try:
chunk = json.loads(line)
token = chunk.get("response", "")
done = chunk.get("done", False)
if token:
# 累積 token每 10 字符或遇到標點符號時發送
buffer += token
if len(buffer) >= 10 or any(p in buffer for p in "。!?,、\n"):
yield f"data: {json.dumps({'type': 'thinking', 'content': buffer}, ensure_ascii=False)}\n\n"
buffer = ""
if done:
# 發送剩餘 buffer
if buffer:
yield f"data: {json.dumps({'type': 'thinking', 'content': buffer}, ensure_ascii=False)}\n\n"
# 發送完成訊息
yield f"data: {json.dumps({'type': 'result', 'content': '分析完成'}, ensure_ascii=False)}\n\n"
yield "data: [DONE]\n\n"
return
except json.JSONDecodeError as e:
logger.warning(f"JSON 解析失敗: {line[:100]}... - {e}")
continue
except (httpx.ConnectError, httpx.TimeoutException) as e:
last_error = type(e).__name__
logger.error(
"agent_thinking_ollama_endpoint_failed",
provider=endpoint.provider_name,
error=str(e),
)
except Exception as e:
last_error = str(e)
logger.error(
"agent_thinking_unknown_error",
provider=endpoint.provider_name,
error=str(e),
)
error_content = f"Ollama 全端點不可用: {last_error or 'unknown'}"
yield f"data: {json.dumps({'type': 'error', 'content': error_content}, ensure_ascii=False)}\n\n"
# 4. 結束標記
yield "data: [DONE]\n\n"
return StreamingResponse(
generate_thinking_stream(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no", # 禁用 Nginx 緩衝
},
)