Files
awoooi/apps/api/src/services/image_analysis_service.py
Your Name 35fe37c82a
All checks were successful
Code Review / ai-code-review (push) Successful in 23s
CD Pipeline / tests (push) Successful in 5m51s
CD Pipeline / build-and-deploy (push) Successful in 3m29s
CD Pipeline / post-deploy-checks (push) Successful in 1m14s
fix(api): route direct ollama callers through ordered fallback
2026-05-19 12:56:13 +08:00

251 lines
8.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
AWOOOI — Image Analysis Service (Phase 34, ADR-067)
===================================================
使用 llava:latest 分析 Telegram 傳入的圖片
觸發:
- Telegram 傳圖 (photo attachment) 自動觸發
- /screenshot 指令
限制:
- 圖片 < 5MB
- MIME 驗證 (image/jpeg, image/png, image/webp)
- 每 chat_id 每分鐘最多 3 次 (Redis rate limit)
安全:
- 暫存到 /tmp/tg_photo_{file_id}.jpg
- finally 清理暫存檔
2026-04-10 Claude Sonnet 4.6 Asia/Taipei
"""
from __future__ import annotations
import base64
import time
from pathlib import Path
from typing import TYPE_CHECKING
import httpx
import structlog
from src.services.model_registry import get_model
from src.services.ollama_endpoint_resolver import resolve_ollama_order
if TYPE_CHECKING:
pass
logger = structlog.get_logger(__name__)
# D1 集中化 2026-04-11: 從 models.json providers.ollama.models.image_analysis 讀取
_MODEL = get_model("ollama", "image_analysis")
_TIMEOUT_S = 120.0
_MAX_SIZE_BYTES = 5 * 1024 * 1024 # 5MB
_ALLOWED_MIME = {"image/jpeg", "image/png", "image/webp", "image/gif"}
_RATE_LIMIT = 3 # 每 chat_id 每分鐘最多 3 次
_RATE_WINDOW = 60 # 秒
class ImageAnalysisService:
"""Telegram 圖片分析服務"""
def __init__(self) -> None:
self._http: httpx.AsyncClient | None = None
async def _get_http(self) -> httpx.AsyncClient:
if self._http is None or self._http.is_closed:
self._http = httpx.AsyncClient(timeout=httpx.Timeout(_TIMEOUT_S, connect=10.0))
return self._http
async def analyze(
self,
chat_id: str,
file_id: str,
file_bytes: bytes,
mime_type: str = "image/jpeg",
question: str = "請用繁體中文描述這張圖片,如果是截圖或介面請分析其內容和問題",
) -> str | None:
"""
分析圖片,回傳繁中說明
"""
# Rate limit 檢查
if not await self._check_rate_limit(chat_id):
return "⚠️ 圖片分析頻率超限(每分鐘最多 3 次),請稍後再試"
# 大小檢查
if len(file_bytes) > _MAX_SIZE_BYTES:
return f"⚠️ 圖片過大({len(file_bytes)//1024}KB限制 5MB"
# MIME 檢查
if mime_type not in _ALLOWED_MIME:
return f"⚠️ 不支援的圖片格式:{mime_type}"
tmp_path = Path(f"/tmp/tg_photo_{file_id}.jpg")
try:
tmp_path.write_bytes(file_bytes)
result = await self._call_llava(tmp_path, question)
if result:
logger.info("image_analysis_done", chat_id=chat_id, file_id=file_id[:8])
return result
finally:
try:
tmp_path.unlink(missing_ok=True)
except Exception:
pass
async def _check_rate_limit(self, chat_id: str) -> bool:
"""Redis sliding window rate limit"""
try:
from src.core.redis_client import get_redis
redis = await get_redis()
if redis is None:
return True # Redis 不可用時放行
key = f"img_ratelimit:{chat_id}"
now = int(time.time())
window_start = now - _RATE_WINDOW
pipe = redis.pipeline()
pipe.zremrangebyscore(key, 0, window_start)
pipe.zcard(key)
pipe.zadd(key, {str(now): now})
pipe.expire(key, _RATE_WINDOW * 2)
results = await pipe.execute()
count = results[1]
return count < _RATE_LIMIT
except Exception:
return True # 失敗時放行
async def _call_llava(self, image_path: Path, question: str) -> str | None:
"""呼叫 llava:latest via Ollama /api/generate with base64 image"""
timed_out = False
try:
image_b64 = base64.b64encode(image_path.read_bytes()).decode()
http = await self._get_http()
for endpoint in resolve_ollama_order("image_analysis"):
if not endpoint.url:
continue
try:
resp = await http.post(
f"{endpoint.url}/api/generate",
json={
"model": _MODEL,
"prompt": question,
"images": [image_b64],
"stream": False,
"options": {
"num_predict": 512,
"temperature": 0.3,
},
},
)
if resp.status_code == 200:
text = resp.json().get("response", "").strip()
return text or None
logger.warning(
"image_analysis_ollama_error",
provider=endpoint.provider_name,
status=resp.status_code,
)
except httpx.TimeoutException:
timed_out = True
logger.warning(
"image_analysis_timeout",
provider=endpoint.provider_name,
path=str(image_path),
)
except Exception as e:
logger.error(
"image_analysis_failed",
provider=endpoint.provider_name,
error=str(e),
)
if timed_out:
return "⚠️ 圖片分析超時llava 處理中),請稍後重試"
return None
except Exception as e:
logger.error("image_analysis_failed", error=str(e))
return None
async def download_and_analyze(
self,
chat_id: str,
file_id: str,
question: str = "請用繁體中文描述這張圖片,如果是截圖或介面請分析其內容和問題",
) -> None:
"""
從 Telegram 下載圖片後分析並發送結果
供 telegram webhook handler 呼叫(含 download + analyze + send
"""
try:
file_bytes = await self._download_telegram_file(file_id)
if not file_bytes:
return
result = await self.analyze(
chat_id=chat_id,
file_id=file_id,
file_bytes=file_bytes,
question=question,
)
if result:
# Phase 34: OpenClaw 在 SRE 群組回覆圖片分析結果llava vision
from src.services.telegram_gateway import get_telegram_gateway
tg = get_telegram_gateway()
await tg.initialize()
await tg.send_as_openclaw(f"🖼️ <b>圖片分析</b>\n{result}")
except Exception as e:
logger.warning("download_and_analyze_failed", error=str(e))
async def _download_telegram_file(self, file_id: str) -> bytes | None:
"""透過 Telegram Bot API 下載圖片"""
try:
from src.core.config import get_settings
cfg = get_settings()
# Phase 34: polling 用 OPENCLAW_TG_BOT_TOKEN 下載圖片
bot_token = cfg.OPENCLAW_TG_BOT_TOKEN or cfg.TELEGRAM_BOT_TOKEN
http = await self._get_http()
# Step 1: getFile → file_path
get_file_resp = await http.get(
f"https://api.telegram.org/bot{bot_token}/getFile",
params={"file_id": file_id},
timeout=httpx.Timeout(15.0, connect=5.0),
)
if get_file_resp.status_code != 200:
logger.warning("tg_getfile_failed", status=get_file_resp.status_code)
return None
file_path = get_file_resp.json().get("result", {}).get("file_path")
if not file_path:
return None
# Step 2: 下載檔案
dl_resp = await http.get(
f"https://api.telegram.org/file/bot{bot_token}/{file_path}",
timeout=httpx.Timeout(30.0, connect=5.0),
)
if dl_resp.status_code == 200:
return dl_resp.content
except Exception as e:
logger.warning("tg_download_failed", file_id=file_id[:8], error=str(e))
return None
async def close(self) -> None:
if self._http and not self._http.is_closed:
await self._http.aclose()
_instance: ImageAnalysisService | None = None
def get_image_analysis_service() -> ImageAnalysisService:
global _instance
if _instance is None:
_instance = ImageAnalysisService()
return _instance
def set_image_analysis_service(svc: ImageAnalysisService) -> None:
global _instance
_instance = svc