447 lines
17 KiB
Python
447 lines
17 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
services/ppt_vision_service.py
|
||
Operation Ollama-First v5.0 / Phase 14 — PPT 視覺自審
|
||
|
||
設計原則:
|
||
- 用 minicpm-v(GCP Primary 已拉,5.5GB)對 PPT 截圖做品質檢查
|
||
- 替代 qwen2-vl:7b(Ollama registry 暫無)
|
||
- 用途:PPT 生成後自動跑視覺檢查,找:
|
||
1. 圖表 layout 異常(被切掉、重疊)
|
||
2. 文字溢出框
|
||
3. 空白區塊(資料未填滿)
|
||
4. 配色衝突
|
||
- feature flag PPT_VISION_ENABLED 預設 OFF
|
||
- 失敗自動 skip(不阻擋 PPT 生成主流程)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
import os
|
||
import time
|
||
import base64
|
||
import logging
|
||
from dataclasses import dataclass, field
|
||
from typing import Optional, Dict, Any, List
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Feature flag + 配置
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
PPT_VISION_MODEL = os.getenv('PPT_VISION_MODEL', 'minicpm-v:latest')
|
||
PPT_VISION_TIMEOUT = int(os.getenv('PPT_VISION_TIMEOUT', '60'))
|
||
|
||
|
||
def is_ppt_vision_enabled() -> bool:
|
||
"""Runtime check(避免 import-time freeze)"""
|
||
return os.getenv('PPT_VISION_ENABLED', 'false').strip().lower() in ('true', '1', 'yes', 'on')
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# 結果容器
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
@dataclass
|
||
class VisionResult:
|
||
success: bool
|
||
issues_found: List[str] = field(default_factory=list) # 問題清單
|
||
confidence: float = 0.0 # 0-1,模型自評
|
||
raw_response: str = ''
|
||
duration_ms: int = 0
|
||
error: Optional[str] = None
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Vision 檢查 prompt(繁中強制)
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
PPT_VISION_SYSTEM_PROMPT = """你是 momo 電商 PPT 排版品質審核員。
|
||
|
||
【任務】檢查截圖找出視覺異常,回繁中清單格式:
|
||
- 圖表被切掉 / 元素重疊 / 文字溢出框 / 空白區塊(資料未填滿)/ 配色衝突
|
||
- 商品名稱顯示不完整 / 數字單位錯誤 / 標題遮擋
|
||
|
||
【輸出格式】
|
||
若無問題:回「✅ 無視覺異常」
|
||
若有問題:每行一個問題,格式「⚠️ <問題類型>:<具體描述>」
|
||
|
||
【限制】
|
||
- 只檢查視覺,不評估內容對錯
|
||
- 用繁體中文(台灣用語),絕對禁止簡體字
|
||
- 不要寫過多解釋,每個問題一行精簡描述
|
||
"""
|
||
|
||
|
||
class PPTVisionService:
|
||
"""minicpm-v 視覺檢查服務."""
|
||
|
||
def __init__(self, model: str = PPT_VISION_MODEL):
|
||
self.model = model
|
||
|
||
def is_available(self) -> bool:
|
||
return is_ppt_vision_enabled()
|
||
|
||
def check_ppt_file(self, pptx_path: str, max_slides: int = 5) -> Dict[str, Any]:
|
||
"""檢查整份 .pptx — Phase 26 整合到 PPT 生成流程。
|
||
|
||
流程:
|
||
1. LibreOffice headless 轉 png(每張 slide 一張)
|
||
2. 對前 N 張跑 check_image
|
||
3. 彙總 issues + 平均 confidence
|
||
4. fail-safe:LibreOffice 不在 / 轉檔失敗 → 回 skip 不阻擋主流程
|
||
|
||
Returns:
|
||
{
|
||
'success': bool,
|
||
'slides_checked': int,
|
||
'total_issues': int,
|
||
'issues_by_slide': [(slide_num, [issues...]), ...],
|
||
'error': str | None,
|
||
}
|
||
"""
|
||
import os
|
||
import subprocess
|
||
import tempfile
|
||
|
||
result = {
|
||
'success': False, 'slides_checked': 0, 'total_issues': 0,
|
||
'issues_by_slide': [], 'error': None,
|
||
}
|
||
|
||
if not self.is_available():
|
||
result['error'] = 'PPT_VISION_ENABLED=false'
|
||
return result
|
||
|
||
if not os.path.isfile(pptx_path):
|
||
result['error'] = f'pptx not found: {pptx_path}'
|
||
return result
|
||
|
||
# 1. LibreOffice 轉 png
|
||
with tempfile.TemporaryDirectory() as tmpdir:
|
||
try:
|
||
proc = subprocess.run(
|
||
['libreoffice', '--headless', '--convert-to', 'png',
|
||
'--outdir', tmpdir, pptx_path],
|
||
capture_output=True, timeout=60,
|
||
)
|
||
if proc.returncode != 0:
|
||
result['error'] = f'libreoffice convert failed: {proc.stderr.decode()[:200]}'
|
||
return result
|
||
except FileNotFoundError:
|
||
result['error'] = 'libreoffice not installed (skip vision check)'
|
||
return result
|
||
except subprocess.TimeoutExpired:
|
||
result['error'] = 'libreoffice convert timeout (60s)'
|
||
return result
|
||
except Exception as e:
|
||
result['error'] = f'{type(e).__name__}: {str(e)[:200]}'
|
||
return result
|
||
|
||
# LibreOffice 對 .pptx 預設只輸出第一頁;多頁需 --convert-to png:impress_png_Export
|
||
png_files = sorted([
|
||
os.path.join(tmpdir, f) for f in os.listdir(tmpdir)
|
||
if f.lower().endswith('.png')
|
||
])
|
||
|
||
if not png_files:
|
||
result['error'] = 'libreoffice 未產出 png (可能需要 --convert-to png:impress_png_Export)'
|
||
return result
|
||
|
||
# 2. 對前 N 張跑 check_image
|
||
import time as _time
|
||
t0 = _time.monotonic()
|
||
confidences = []
|
||
for idx, png in enumerate(png_files[:max_slides]):
|
||
try:
|
||
vr = self.check_image(png)
|
||
if vr.success:
|
||
result['slides_checked'] += 1
|
||
confidences.append(vr.confidence)
|
||
if vr.issues_found:
|
||
result['total_issues'] += len(vr.issues_found)
|
||
result['issues_by_slide'].append((idx + 1, vr.issues_found))
|
||
except Exception as exc:
|
||
logger.warning(f"[PPTVision] slide {idx+1} check failed: {exc}")
|
||
|
||
result['success'] = result['slides_checked'] > 0
|
||
duration_ms = int((_time.monotonic() - t0) * 1000)
|
||
|
||
# Phase 38:寫入 ppt_audit_results 留歷史(失敗安全)
|
||
try:
|
||
self._persist_audit_result(
|
||
pptx_path=pptx_path,
|
||
result=result,
|
||
avg_confidence=(sum(confidences) / len(confidences)) if confidences else 0.0,
|
||
duration_ms=duration_ms,
|
||
)
|
||
except Exception as e:
|
||
logger.warning(f"[PPTVision] persist audit result failed: {e}")
|
||
|
||
return result
|
||
|
||
def _persist_audit_result(self, pptx_path: str, result: Dict[str, Any],
|
||
avg_confidence: float, duration_ms: int) -> None:
|
||
"""Phase 38: 把每次 audit 結果寫入 ppt_audit_results 表。
|
||
|
||
失敗安全:DB 寫入失敗只 log warning,不擋主流程。
|
||
"""
|
||
import os
|
||
from datetime import datetime as _dt
|
||
from sqlalchemy import text as _sa_text
|
||
from database.manager import get_session
|
||
|
||
# 推論 audit_status
|
||
if result.get('error'):
|
||
err = result['error']
|
||
if 'libreoffice not installed' in err or 'PPT_VISION_ENABLED' in err:
|
||
status = 'skipped'
|
||
else:
|
||
status = 'error'
|
||
elif result.get('total_issues', 0) > 0:
|
||
status = 'failed'
|
||
elif result.get('success'):
|
||
status = 'passed'
|
||
else:
|
||
status = 'error'
|
||
|
||
# issues_found JSONB 序列化
|
||
import json as _json
|
||
issues_json = _json.dumps([
|
||
{'slide': slide_num, 'issues': issues}
|
||
for slide_num, issues in result.get('issues_by_slide', [])
|
||
], ensure_ascii=False)
|
||
|
||
try:
|
||
size_kb = round(os.path.getsize(pptx_path) / 1024, 1) if os.path.isfile(pptx_path) else None
|
||
mtime = _dt.fromtimestamp(os.path.getmtime(pptx_path)) if os.path.isfile(pptx_path) else None
|
||
except OSError:
|
||
size_kb = None
|
||
mtime = None
|
||
|
||
session = get_session()
|
||
try:
|
||
session.execute(
|
||
_sa_text("""
|
||
INSERT INTO ppt_audit_results
|
||
(pptx_filename, pptx_size_kb, pptx_mtime, vision_enabled,
|
||
audit_status, issues_count, issues_found, confidence,
|
||
duration_ms, error_msg)
|
||
VALUES
|
||
(:fname, :sz, :mt, :ve, :st, :ic, CAST(:if AS JSONB),
|
||
:cf, :du, :em)
|
||
"""),
|
||
{
|
||
'fname': os.path.basename(pptx_path),
|
||
'sz': size_kb,
|
||
'mt': mtime,
|
||
've': True, # 進到這裡代表 vision 已 enabled
|
||
'st': status,
|
||
'ic': result.get('total_issues', 0),
|
||
'if': issues_json,
|
||
'cf': round(avg_confidence, 3),
|
||
'du': duration_ms,
|
||
'em': result.get('error', None),
|
||
},
|
||
)
|
||
session.commit()
|
||
finally:
|
||
session.close()
|
||
|
||
def check_image(self, image_path: str) -> VisionResult:
|
||
"""檢查單張 PPT 截圖。
|
||
|
||
Args:
|
||
image_path: 本地檔案路徑(jpg/png)
|
||
|
||
Returns:
|
||
VisionResult.issues_found 含問題清單;無問題則空 list + confidence=1.0
|
||
"""
|
||
start = time.monotonic()
|
||
|
||
if not self.is_available():
|
||
return VisionResult(
|
||
success=False,
|
||
error='PPT_VISION_ENABLED=false (Phase 14 預設 OFF)',
|
||
)
|
||
|
||
if not os.path.isfile(image_path):
|
||
return VisionResult(
|
||
success=False,
|
||
error=f'image not found: {image_path}',
|
||
)
|
||
|
||
# 讀檔並 base64 編碼
|
||
try:
|
||
with open(image_path, 'rb') as f:
|
||
img_bytes = f.read()
|
||
img_b64 = base64.b64encode(img_bytes).decode('ascii')
|
||
except Exception as e:
|
||
return VisionResult(
|
||
success=False,
|
||
error=f'read image failed: {type(e).__name__}: {str(e)[:200]}',
|
||
)
|
||
|
||
try:
|
||
from services.ollama_service import OllamaService
|
||
|
||
ollama = OllamaService(model=self.model)
|
||
resp = ollama.generate(
|
||
prompt='請檢查這張 momo 電商 PPT 截圖,找出視覺異常。',
|
||
model=self.model,
|
||
system_prompt=PPT_VISION_SYSTEM_PROMPT,
|
||
temperature=0.2,
|
||
timeout=PPT_VISION_TIMEOUT,
|
||
options={'num_predict': 512},
|
||
images=[img_b64],
|
||
)
|
||
duration_ms = int((time.monotonic() - start) * 1000)
|
||
|
||
if not resp.success:
|
||
return VisionResult(
|
||
success=False, duration_ms=duration_ms,
|
||
error=resp.error or 'ollama vision failed',
|
||
)
|
||
|
||
raw = (resp.content or '').strip()
|
||
|
||
# 解析輸出:每行一個 ⚠️ 開頭的視為 issue;✅ 無視覺異常則空 list
|
||
issues = []
|
||
for line in raw.split('\n'):
|
||
line = line.strip()
|
||
if line.startswith('⚠️') or line.startswith('warning:') or line.startswith('警告'):
|
||
issues.append(line)
|
||
|
||
if '✅' in raw and '無視覺異常' in raw and not issues:
|
||
# 確認是 OK
|
||
return VisionResult(
|
||
success=True, issues_found=[],
|
||
confidence=1.0, raw_response=raw,
|
||
duration_ms=duration_ms,
|
||
)
|
||
|
||
return VisionResult(
|
||
success=True, issues_found=issues,
|
||
confidence=0.85 if issues else 0.5,
|
||
raw_response=raw,
|
||
duration_ms=duration_ms,
|
||
)
|
||
|
||
except Exception as e:
|
||
duration_ms = int((time.monotonic() - start) * 1000)
|
||
return VisionResult(
|
||
success=False, duration_ms=duration_ms,
|
||
error=f'{type(e).__name__}: {str(e)[:200]}',
|
||
)
|
||
|
||
|
||
# 全域單例
|
||
ppt_vision_service = PPTVisionService()
|
||
|
||
|
||
def audit_recent_ppts(reports_dir: str | None = None, hours: int = 24,
|
||
max_files: int = 10) -> Dict[str, Any]:
|
||
"""Phase 26 整合 hook — 每日 22:00 cron 跑:掃 reports/ 當天新增 .pptx 跑視覺檢查。
|
||
|
||
Args:
|
||
reports_dir: PPT 輸出目錄,未提供時改用 REPORTS_DIR 環境變數
|
||
hours: 掃過去 N 小時內的檔
|
||
max_files: 一次最多查 N 個檔(避免一次跑太久)
|
||
|
||
Returns:
|
||
{
|
||
'audited_files': [...],
|
||
'total_issues': int,
|
||
'errors': [...],
|
||
}
|
||
"""
|
||
import os
|
||
import time
|
||
|
||
summary = {'audited_files': [], 'total_issues': 0, 'errors': []}
|
||
|
||
if reports_dir is None:
|
||
reports_dir = os.environ.get('REPORTS_DIR', '/app/data/reports')
|
||
|
||
if not is_ppt_vision_enabled():
|
||
summary['errors'].append('PPT_VISION_ENABLED=false')
|
||
return summary
|
||
|
||
if not os.path.isdir(reports_dir):
|
||
summary['errors'].append(f'{reports_dir} not found')
|
||
return summary
|
||
|
||
# 掃當天新增 .pptx
|
||
cutoff = time.time() - hours * 3600
|
||
pptx_files = []
|
||
for f in os.listdir(reports_dir):
|
||
if not f.lower().endswith('.pptx'):
|
||
continue
|
||
full = os.path.join(reports_dir, f)
|
||
try:
|
||
if os.path.getmtime(full) >= cutoff:
|
||
pptx_files.append((os.path.getmtime(full), full))
|
||
except OSError:
|
||
continue
|
||
pptx_files.sort(reverse=True)
|
||
pptx_files = pptx_files[:max_files]
|
||
|
||
svc = PPTVisionService()
|
||
for mtime, path in pptx_files:
|
||
try:
|
||
result = svc.check_ppt_file(path)
|
||
entry = {
|
||
'path': path,
|
||
'slides_checked': result.get('slides_checked', 0),
|
||
'issues': result.get('total_issues', 0),
|
||
'issues_by_slide': result.get('issues_by_slide', []),
|
||
'error': result.get('error'),
|
||
}
|
||
summary['audited_files'].append(entry)
|
||
summary['total_issues'] += entry['issues']
|
||
if entry['error']:
|
||
summary['errors'].append(f"{path}: {entry['error']}")
|
||
except Exception as exc:
|
||
summary['errors'].append(f'{path}: {type(exc).__name__}: {str(exc)[:150]}')
|
||
|
||
return summary
|
||
|
||
|
||
def push_ppt_audit_to_telegram(summary: Dict[str, Any]) -> bool:
|
||
"""有 issues 才推 Telegram(避免靜默報「無問題」洗版)"""
|
||
if summary['total_issues'] <= 0:
|
||
return False
|
||
try:
|
||
from services.telegram_templates import _send_telegram_raw
|
||
except Exception:
|
||
return False
|
||
|
||
lines = [f"🔍 <b>PPT 視覺審核({len(summary['audited_files'])} 份)</b>"]
|
||
lines.append('━' * 18)
|
||
for entry in summary['audited_files']:
|
||
if entry['issues'] > 0:
|
||
fname = os.path.basename(entry['path']) if hasattr(__import__('os'), 'path') else entry['path']
|
||
import os as _os
|
||
fname = _os.path.basename(entry['path'])
|
||
lines.append(f"\n📊 <code>{fname}</code> ({entry['slides_checked']} slides, "
|
||
f"<b>{entry['issues']} issues</b>)")
|
||
for slide_num, issues in entry['issues_by_slide'][:3]: # 每檔最多列 3 張
|
||
for iss in issues[:2]: # 每張 slide 最多列 2 個 issue
|
||
lines.append(f" Slide {slide_num}: {iss[:120]}")
|
||
|
||
msg = '\n'.join(lines)
|
||
try:
|
||
_send_telegram_raw(msg)
|
||
return True
|
||
except Exception:
|
||
return False
|
||
|
||
|
||
__all__ = [
|
||
'PPTVisionService',
|
||
'VisionResult',
|
||
'ppt_vision_service',
|
||
'is_ppt_vision_enabled',
|
||
'PPT_VISION_SYSTEM_PROMPT',
|
||
'audit_recent_ppts',
|
||
'push_ppt_audit_to_telegram',
|
||
]
|