Files
ewoooc/services/ppt_vision_service.py
OoO 72a7c385d5
All checks were successful
CD Pipeline / deploy (push) Successful in 2m54s
feat(p26): PPT 視覺審核 daily 22:00 cron — minicpm-v 自動掃當天新生 .pptx
Operation Ollama-First v5.0 / Phase 26 — PPT 自我審視整合

services/ppt_vision_service.py 擴充:
- check_ppt_file(pptx_path, max_slides=5) — 整檔視覺檢查
  • LibreOffice headless 轉每張 slide 為 png
  • 對前 N 張跑 check_image
  • 彙總 issues + 平均 confidence
  • fail-safe:LibreOffice 不在 / 轉檔失敗 → 回 skip 不阻擋
- audit_recent_ppts(reports_dir, hours=24, max_files=10)
  • 掃 reports/ 過去 24h 新生 .pptx(getmtime filter)
  • 對每個檔跑 check_ppt_file
  • 彙總總 issues
- push_ppt_audit_to_telegram(summary)
  • 有 issues 才推 Telegram(避免「無問題」洗版)
  • 每檔最多 3 張 slide / 每張 2 個 issue 列出

run_scheduler.py — 每日 22:00 cron
- run_ppt_vision_audit task wrapper
- PPT_VISION_ENABLED=false 時 service 內部 skip(不打 LLM)

設計哲學:
不動既有 5 個 prs.save() 呼叫點(risk 高)→ 改寫獨立 daily cron 集中處理
零侵入 PPT 生成主流程 / 零 risk regression / feature flag OFF 預設

部署需求:
LibreOffice headless(apt install libreoffice)— 不在則 cron task 自動 skip + log

tests/test_ppt_vision_audit.py (9 tests 全綠)
- flag OFF skip / 目錄不存在 / 無 .pptx
- 舊檔(>hours)filter / LibreOffice 不在 fail-safe
- check_ppt_file flag/missing 容錯
- Telegram 推播:無 issues 不推 / 有 issues 推

regression: ppt_vision_service 既有 6 tests 全綠

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 11:16:11 +08:00

391 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
services/ppt_vision_service.py
Operation Ollama-First v5.0 / Phase 14 — PPT 視覺自審
設計原則:
- 用 minicpm-vGCP Primary 已拉5.5GB)對 PPT 截圖做品質檢查
- 替代 qwen2-vl:7bOllama registry 暫無)
- 用途PPT 生成後自動跑視覺檢查,找:
1. 圖表 layout 異常(被切掉、重疊)
2. 文字溢出框
3. 空白區塊(資料未填滿)
4. 配色衝突
- feature flag PPT_VISION_ENABLED 預設 OFF
- 失敗自動 skip不阻擋 PPT 生成主流程)
"""
from __future__ import annotations
import os
import time
import base64
import logging
from dataclasses import dataclass, field
from typing import Optional, Dict, Any, List
import requests
logger = logging.getLogger(__name__)
# ─────────────────────────────────────────────────────────────────────────────
# Feature flag + 配置
# ─────────────────────────────────────────────────────────────────────────────
PPT_VISION_MODEL = os.getenv('PPT_VISION_MODEL', 'minicpm-v:latest')
PPT_VISION_TIMEOUT = int(os.getenv('PPT_VISION_TIMEOUT', '60'))
def is_ppt_vision_enabled() -> bool:
"""Runtime check避免 import-time freeze"""
return os.getenv('PPT_VISION_ENABLED', 'false').strip().lower() in ('true', '1', 'yes', 'on')
# ─────────────────────────────────────────────────────────────────────────────
# 結果容器
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class VisionResult:
success: bool
issues_found: List[str] = field(default_factory=list) # 問題清單
confidence: float = 0.0 # 0-1模型自評
raw_response: str = ''
duration_ms: int = 0
error: Optional[str] = None
# ─────────────────────────────────────────────────────────────────────────────
# Vision 檢查 prompt繁中強制
# ─────────────────────────────────────────────────────────────────────────────
PPT_VISION_SYSTEM_PROMPT = """你是 momo 電商 PPT 排版品質審核員。
【任務】檢查截圖找出視覺異常,回繁中清單格式:
- 圖表被切掉 / 元素重疊 / 文字溢出框 / 空白區塊(資料未填滿)/ 配色衝突
- 商品名稱顯示不完整 / 數字單位錯誤 / 標題遮擋
【輸出格式】
若無問題:回「✅ 無視覺異常」
若有問題:每行一個問題,格式「⚠️ <問題類型><具體描述>」
【限制】
- 只檢查視覺,不評估內容對錯
- 用繁體中文(台灣用語),絕對禁止簡體字
- 不要寫過多解釋,每個問題一行精簡描述
"""
class PPTVisionService:
"""minicpm-v 視覺檢查服務."""
def __init__(self, model: str = PPT_VISION_MODEL):
self.model = model
def is_available(self) -> bool:
return is_ppt_vision_enabled()
def check_ppt_file(self, pptx_path: str, max_slides: int = 5) -> Dict[str, Any]:
"""檢查整份 .pptx — Phase 26 整合到 PPT 生成流程。
流程:
1. LibreOffice headless 轉 png每張 slide 一張)
2. 對前 N 張跑 check_image
3. 彙總 issues + 平均 confidence
4. fail-safeLibreOffice 不在 / 轉檔失敗 → 回 skip 不阻擋主流程
Returns:
{
'success': bool,
'slides_checked': int,
'total_issues': int,
'issues_by_slide': [(slide_num, [issues...]), ...],
'error': str | None,
}
"""
import os
import subprocess
import tempfile
result = {
'success': False, 'slides_checked': 0, 'total_issues': 0,
'issues_by_slide': [], 'error': None,
}
if not self.is_available():
result['error'] = 'PPT_VISION_ENABLED=false'
return result
if not os.path.isfile(pptx_path):
result['error'] = f'pptx not found: {pptx_path}'
return result
# 1. LibreOffice 轉 png
with tempfile.TemporaryDirectory() as tmpdir:
try:
proc = subprocess.run(
['libreoffice', '--headless', '--convert-to', 'png',
'--outdir', tmpdir, pptx_path],
capture_output=True, timeout=60,
)
if proc.returncode != 0:
result['error'] = f'libreoffice convert failed: {proc.stderr.decode()[:200]}'
return result
except FileNotFoundError:
result['error'] = 'libreoffice not installed (skip vision check)'
return result
except subprocess.TimeoutExpired:
result['error'] = 'libreoffice convert timeout (60s)'
return result
except Exception as e:
result['error'] = f'{type(e).__name__}: {str(e)[:200]}'
return result
# LibreOffice 對 .pptx 預設只輸出第一頁;多頁需 --convert-to png:impress_png_Export
png_files = sorted([
os.path.join(tmpdir, f) for f in os.listdir(tmpdir)
if f.lower().endswith('.png')
])
if not png_files:
result['error'] = 'libreoffice 未產出 png (可能需要 --convert-to png:impress_png_Export)'
return result
# 2. 對前 N 張跑 check_image
for idx, png in enumerate(png_files[:max_slides]):
try:
vr = self.check_image(png)
if vr.success:
result['slides_checked'] += 1
if vr.issues_found:
result['total_issues'] += len(vr.issues_found)
result['issues_by_slide'].append((idx + 1, vr.issues_found))
except Exception as exc:
logger.warning(f"[PPTVision] slide {idx+1} check failed: {exc}")
result['success'] = result['slides_checked'] > 0
return result
def check_image(self, image_path: str) -> VisionResult:
"""檢查單張 PPT 截圖。
Args:
image_path: 本地檔案路徑jpg/png
Returns:
VisionResult.issues_found 含問題清單;無問題則空 list + confidence=1.0
"""
start = time.monotonic()
if not self.is_available():
return VisionResult(
success=False,
error='PPT_VISION_ENABLED=false (Phase 14 預設 OFF)',
)
if not os.path.isfile(image_path):
return VisionResult(
success=False,
error=f'image not found: {image_path}',
)
# 讀檔並 base64 編碼
try:
with open(image_path, 'rb') as f:
img_bytes = f.read()
img_b64 = base64.b64encode(img_bytes).decode('ascii')
except Exception as e:
return VisionResult(
success=False,
error=f'read image failed: {type(e).__name__}: {str(e)[:200]}',
)
# 透過 resolve_ollama_host 取主機(享受三主機 retry 鏈)
try:
from services.ollama_service import resolve_ollama_host, mark_unhealthy
host = resolve_ollama_host()
except Exception as e:
return VisionResult(
success=False,
error=f'resolve host failed: {e}',
)
# Ollama /api/generate 支援 images 欄位base64 list
payload = {
'model': self.model,
'system': PPT_VISION_SYSTEM_PROMPT,
'prompt': '請檢查這張 momo 電商 PPT 截圖,找出視覺異常。',
'images': [img_b64],
'stream': False,
'options': {'temperature': 0.2, 'num_predict': 512},
}
try:
resp = requests.post(
f"{host.rstrip('/')}/api/generate",
json=payload,
timeout=PPT_VISION_TIMEOUT,
)
duration_ms = int((time.monotonic() - start) * 1000)
if resp.status_code != 200:
# mark_unhealthy 讓下次自動切其他主機
mark_unhealthy(host)
return VisionResult(
success=False, duration_ms=duration_ms,
error=f'HTTP {resp.status_code}: {resp.text[:200]}',
)
data = resp.json()
raw = (data.get('response') or '').strip()
# 解析輸出:每行一個 ⚠️ 開頭的視為 issue✅ 無視覺異常則空 list
issues = []
for line in raw.split('\n'):
line = line.strip()
if line.startswith('⚠️') or line.startswith('warning:') or line.startswith('警告'):
issues.append(line)
if '' in raw and '無視覺異常' in raw and not issues:
# 確認是 OK
return VisionResult(
success=True, issues_found=[],
confidence=1.0, raw_response=raw,
duration_ms=duration_ms,
)
return VisionResult(
success=True, issues_found=issues,
confidence=0.85 if issues else 0.5,
raw_response=raw,
duration_ms=duration_ms,
)
except requests.Timeout:
try:
mark_unhealthy(host)
except Exception:
pass
duration_ms = int((time.monotonic() - start) * 1000)
return VisionResult(
success=False, duration_ms=duration_ms,
error=f'timeout ({PPT_VISION_TIMEOUT}s)',
)
except Exception as e:
try:
mark_unhealthy(host)
except Exception:
pass
duration_ms = int((time.monotonic() - start) * 1000)
return VisionResult(
success=False, duration_ms=duration_ms,
error=f'{type(e).__name__}: {str(e)[:200]}',
)
# 全域單例
ppt_vision_service = PPTVisionService()
def audit_recent_ppts(reports_dir: str = 'reports', hours: int = 24,
max_files: int = 10) -> Dict[str, Any]:
"""Phase 26 整合 hook — 每日 22:00 cron 跑:掃 reports/ 當天新增 .pptx 跑視覺檢查。
Args:
reports_dir: PPT 輸出目錄
hours: 掃過去 N 小時內的檔
max_files: 一次最多查 N 個檔(避免一次跑太久)
Returns:
{
'audited_files': [...],
'total_issues': int,
'errors': [...],
}
"""
import os
import time
summary = {'audited_files': [], 'total_issues': 0, 'errors': []}
if not is_ppt_vision_enabled():
summary['errors'].append('PPT_VISION_ENABLED=false')
return summary
if not os.path.isdir(reports_dir):
summary['errors'].append(f'{reports_dir} not found')
return summary
# 掃當天新增 .pptx
cutoff = time.time() - hours * 3600
pptx_files = []
for f in os.listdir(reports_dir):
if not f.lower().endswith('.pptx'):
continue
full = os.path.join(reports_dir, f)
try:
if os.path.getmtime(full) >= cutoff:
pptx_files.append((os.path.getmtime(full), full))
except OSError:
continue
pptx_files.sort(reverse=True)
pptx_files = pptx_files[:max_files]
svc = PPTVisionService()
for mtime, path in pptx_files:
try:
result = svc.check_ppt_file(path)
entry = {
'path': path,
'slides_checked': result.get('slides_checked', 0),
'issues': result.get('total_issues', 0),
'issues_by_slide': result.get('issues_by_slide', []),
'error': result.get('error'),
}
summary['audited_files'].append(entry)
summary['total_issues'] += entry['issues']
if entry['error']:
summary['errors'].append(f"{path}: {entry['error']}")
except Exception as exc:
summary['errors'].append(f'{path}: {type(exc).__name__}: {str(exc)[:150]}')
return summary
def push_ppt_audit_to_telegram(summary: Dict[str, Any]) -> bool:
"""有 issues 才推 Telegram避免靜默報「無問題」洗版"""
if summary['total_issues'] <= 0:
return False
try:
from services.telegram_templates import _send_telegram_raw
except Exception:
return False
lines = [f"🔍 <b>PPT 視覺審核({len(summary['audited_files'])} 份)</b>"]
lines.append('' * 18)
for entry in summary['audited_files']:
if entry['issues'] > 0:
fname = os.path.basename(entry['path']) if hasattr(__import__('os'), 'path') else entry['path']
import os as _os
fname = _os.path.basename(entry['path'])
lines.append(f"\n📊 <code>{fname}</code> ({entry['slides_checked']} slides, "
f"<b>{entry['issues']} issues</b>)")
for slide_num, issues in entry['issues_by_slide'][:3]: # 每檔最多列 3 張
for iss in issues[:2]: # 每張 slide 最多列 2 個 issue
lines.append(f" Slide {slide_num}: {iss[:120]}")
msg = '\n'.join(lines)
try:
_send_telegram_raw(msg)
return True
except Exception:
return False
__all__ = [
'PPTVisionService',
'VisionResult',
'ppt_vision_service',
'is_ppt_vision_enabled',
'PPT_VISION_SYSTEM_PROMPT',
'audit_recent_ppts',
'push_ppt_audit_to_telegram',
]