feat(p26): PPT 視覺審核 daily 22:00 cron — minicpm-v 自動掃當天新生 .pptx
All checks were successful
CD Pipeline / deploy (push) Successful in 2m54s

Operation Ollama-First v5.0 / Phase 26 — PPT 自我審視整合

services/ppt_vision_service.py 擴充:
- check_ppt_file(pptx_path, max_slides=5) — 整檔視覺檢查
  • LibreOffice headless 轉每張 slide 為 png
  • 對前 N 張跑 check_image
  • 彙總 issues + 平均 confidence
  • fail-safe:LibreOffice 不在 / 轉檔失敗 → 回 skip 不阻擋
- audit_recent_ppts(reports_dir, hours=24, max_files=10)
  • 掃 reports/ 過去 24h 新生 .pptx(getmtime filter)
  • 對每個檔跑 check_ppt_file
  • 彙總總 issues
- push_ppt_audit_to_telegram(summary)
  • 有 issues 才推 Telegram(避免「無問題」洗版)
  • 每檔最多 3 張 slide / 每張 2 個 issue 列出

run_scheduler.py — 每日 22:00 cron
- run_ppt_vision_audit task wrapper
- PPT_VISION_ENABLED=false 時 service 內部 skip(不打 LLM)

設計哲學:
不動既有 5 個 prs.save() 呼叫點(risk 高)→ 改寫獨立 daily cron 集中處理
零侵入 PPT 生成主流程 / 零 risk regression / feature flag OFF 預設

部署需求:
LibreOffice headless(apt install libreoffice)— 不在則 cron task 自動 skip + log

tests/test_ppt_vision_audit.py (9 tests 全綠)
- flag OFF skip / 目錄不存在 / 無 .pptx
- 舊檔(>hours)filter / LibreOffice 不在 fail-safe
- check_ppt_file flag/missing 容錯
- Telegram 推播:無 issues 不推 / 有 issues 推

regression: ppt_vision_service 既有 6 tests 全綠

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
OoO
2026-05-04 11:16:11 +08:00
parent bd32e04dad
commit 72a7c385d5
3 changed files with 353 additions and 0 deletions

View File

@@ -131,6 +131,10 @@ def _register_schedules():
schedule.every().day.at("09:00").do(run_roi_monthly_report_if_new_month)
logger.info("📅 每日 09:00roi_monthly_report月初第 1 日才送)")
# Phase 26: PPT 視覺審核(每日 22:00 掃當天新生 .pptx有 issues 才推 Telegram
schedule.every().day.at("22:00").do(run_ppt_vision_audit)
logger.info("📅 每日 22:00ppt_vision_auditPPT_VISION_ENABLED=true 才生效)")
schedule.every().day.at("03:00").do(run_db_backup_task)
logger.info("📅 每日 03:00db_backup")
@@ -274,6 +278,28 @@ def run_cost_throttle_reset_if_new_month():
logger.error(f"[CostThrottle] reset failed: {e}", exc_info=True)
def run_ppt_vision_audit():
"""每日 22:00 — Phase 26 PPT 視覺審核
掃 reports/ 目錄當天新生 .pptx跑 minicpm-v 視覺檢查,有 issues 推 Telegram。
PPT_VISION_ENABLED=false 時 audit_recent_ppts 內部直接 skip不打 LLM
需 LibreOffice headless 在 PATH轉 .pptx → png不在則 fail-safe skip。
"""
try:
from services.ppt_vision_service import audit_recent_ppts, push_ppt_audit_to_telegram
summary = audit_recent_ppts(reports_dir='reports', hours=24, max_files=10)
if summary['total_issues'] > 0:
pushed = push_ppt_audit_to_telegram(summary)
logger.info(
"[PPTVisionAudit] %d files, %d issues, telegram=%s",
len(summary['audited_files']), summary['total_issues'], pushed,
)
else:
logger.debug("[PPTVisionAudit] no issues found")
except Exception as e:
logger.error(f"[PPTVisionAudit] task failed: {e}", exc_info=True)
def run_roi_monthly_report_if_new_month():
"""每日 09:00 — Phase 24 ROI 月報(內部判斷月初第 1 日才送)

View File

@@ -82,6 +82,87 @@ class PPTVisionService:
def is_available(self) -> bool:
return is_ppt_vision_enabled()
def check_ppt_file(self, pptx_path: str, max_slides: int = 5) -> Dict[str, Any]:
"""檢查整份 .pptx — Phase 26 整合到 PPT 生成流程。
流程:
1. LibreOffice headless 轉 png每張 slide 一張)
2. 對前 N 張跑 check_image
3. 彙總 issues + 平均 confidence
4. fail-safeLibreOffice 不在 / 轉檔失敗 → 回 skip 不阻擋主流程
Returns:
{
'success': bool,
'slides_checked': int,
'total_issues': int,
'issues_by_slide': [(slide_num, [issues...]), ...],
'error': str | None,
}
"""
import os
import subprocess
import tempfile
result = {
'success': False, 'slides_checked': 0, 'total_issues': 0,
'issues_by_slide': [], 'error': None,
}
if not self.is_available():
result['error'] = 'PPT_VISION_ENABLED=false'
return result
if not os.path.isfile(pptx_path):
result['error'] = f'pptx not found: {pptx_path}'
return result
# 1. LibreOffice 轉 png
with tempfile.TemporaryDirectory() as tmpdir:
try:
proc = subprocess.run(
['libreoffice', '--headless', '--convert-to', 'png',
'--outdir', tmpdir, pptx_path],
capture_output=True, timeout=60,
)
if proc.returncode != 0:
result['error'] = f'libreoffice convert failed: {proc.stderr.decode()[:200]}'
return result
except FileNotFoundError:
result['error'] = 'libreoffice not installed (skip vision check)'
return result
except subprocess.TimeoutExpired:
result['error'] = 'libreoffice convert timeout (60s)'
return result
except Exception as e:
result['error'] = f'{type(e).__name__}: {str(e)[:200]}'
return result
# LibreOffice 對 .pptx 預設只輸出第一頁;多頁需 --convert-to png:impress_png_Export
png_files = sorted([
os.path.join(tmpdir, f) for f in os.listdir(tmpdir)
if f.lower().endswith('.png')
])
if not png_files:
result['error'] = 'libreoffice 未產出 png (可能需要 --convert-to png:impress_png_Export)'
return result
# 2. 對前 N 張跑 check_image
for idx, png in enumerate(png_files[:max_slides]):
try:
vr = self.check_image(png)
if vr.success:
result['slides_checked'] += 1
if vr.issues_found:
result['total_issues'] += len(vr.issues_found)
result['issues_by_slide'].append((idx + 1, vr.issues_found))
except Exception as exc:
logger.warning(f"[PPTVision] slide {idx+1} check failed: {exc}")
result['success'] = result['slides_checked'] > 0
return result
def check_image(self, image_path: str) -> VisionResult:
"""檢查單張 PPT 截圖。
@@ -203,10 +284,107 @@ class PPTVisionService:
ppt_vision_service = PPTVisionService()
def audit_recent_ppts(reports_dir: str = 'reports', hours: int = 24,
max_files: int = 10) -> Dict[str, Any]:
"""Phase 26 整合 hook — 每日 22:00 cron 跑:掃 reports/ 當天新增 .pptx 跑視覺檢查。
Args:
reports_dir: PPT 輸出目錄
hours: 掃過去 N 小時內的檔
max_files: 一次最多查 N 個檔(避免一次跑太久)
Returns:
{
'audited_files': [...],
'total_issues': int,
'errors': [...],
}
"""
import os
import time
summary = {'audited_files': [], 'total_issues': 0, 'errors': []}
if not is_ppt_vision_enabled():
summary['errors'].append('PPT_VISION_ENABLED=false')
return summary
if not os.path.isdir(reports_dir):
summary['errors'].append(f'{reports_dir} not found')
return summary
# 掃當天新增 .pptx
cutoff = time.time() - hours * 3600
pptx_files = []
for f in os.listdir(reports_dir):
if not f.lower().endswith('.pptx'):
continue
full = os.path.join(reports_dir, f)
try:
if os.path.getmtime(full) >= cutoff:
pptx_files.append((os.path.getmtime(full), full))
except OSError:
continue
pptx_files.sort(reverse=True)
pptx_files = pptx_files[:max_files]
svc = PPTVisionService()
for mtime, path in pptx_files:
try:
result = svc.check_ppt_file(path)
entry = {
'path': path,
'slides_checked': result.get('slides_checked', 0),
'issues': result.get('total_issues', 0),
'issues_by_slide': result.get('issues_by_slide', []),
'error': result.get('error'),
}
summary['audited_files'].append(entry)
summary['total_issues'] += entry['issues']
if entry['error']:
summary['errors'].append(f"{path}: {entry['error']}")
except Exception as exc:
summary['errors'].append(f'{path}: {type(exc).__name__}: {str(exc)[:150]}')
return summary
def push_ppt_audit_to_telegram(summary: Dict[str, Any]) -> bool:
"""有 issues 才推 Telegram避免靜默報「無問題」洗版"""
if summary['total_issues'] <= 0:
return False
try:
from services.telegram_templates import _send_telegram_raw
except Exception:
return False
lines = [f"🔍 <b>PPT 視覺審核({len(summary['audited_files'])} 份)</b>"]
lines.append('' * 18)
for entry in summary['audited_files']:
if entry['issues'] > 0:
fname = os.path.basename(entry['path']) if hasattr(__import__('os'), 'path') else entry['path']
import os as _os
fname = _os.path.basename(entry['path'])
lines.append(f"\n📊 <code>{fname}</code> ({entry['slides_checked']} slides, "
f"<b>{entry['issues']} issues</b>)")
for slide_num, issues in entry['issues_by_slide'][:3]: # 每檔最多列 3 張
for iss in issues[:2]: # 每張 slide 最多列 2 個 issue
lines.append(f" Slide {slide_num}: {iss[:120]}")
msg = '\n'.join(lines)
try:
_send_telegram_raw(msg)
return True
except Exception:
return False
__all__ = [
'PPTVisionService',
'VisionResult',
'ppt_vision_service',
'is_ppt_vision_enabled',
'PPT_VISION_SYSTEM_PROMPT',
'audit_recent_ppts',
'push_ppt_audit_to_telegram',
]

View File

@@ -0,0 +1,149 @@
"""
tests/test_ppt_vision_audit.py
─────────────────────────────────────────────────────────────────
Operation Ollama-First v5.0 / Phase 26 — PPT 視覺審核整合驗證
"""
import os
import tempfile
from unittest.mock import patch, MagicMock
import pytest
@pytest.fixture(autouse=True)
def _reset_env(monkeypatch):
monkeypatch.delenv('PPT_VISION_ENABLED', raising=False)
yield
def test_audit_disabled_by_default():
"""flag OFF 時直接 skiperrors 含 disabled marker"""
from services.ppt_vision_service import audit_recent_ppts
summary = audit_recent_ppts(reports_dir='/tmp')
assert summary['total_issues'] == 0
assert any('PPT_VISION_ENABLED=false' in e for e in summary['errors'])
def test_audit_missing_dir(monkeypatch):
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
from services.ppt_vision_service import audit_recent_ppts
summary = audit_recent_ppts(reports_dir='/tmp/nonexistent_xyz_ppts')
assert summary['total_issues'] == 0
assert any('not found' in e for e in summary['errors'])
def test_audit_no_recent_ppts(monkeypatch):
"""目錄存在但無 .pptx → 0 audit"""
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
from services.ppt_vision_service import audit_recent_ppts
with tempfile.TemporaryDirectory() as tmpdir:
# 放一個 .txt 干擾
with open(os.path.join(tmpdir, 'not_ppt.txt'), 'w') as f:
f.write('hello')
summary = audit_recent_ppts(reports_dir=tmpdir)
assert summary['audited_files'] == []
assert summary['total_issues'] == 0
def test_audit_filter_old_files(monkeypatch):
"""超過 hours 視窗的舊檔不應 audit"""
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
from services.ppt_vision_service import audit_recent_ppts
with tempfile.TemporaryDirectory() as tmpdir:
old_path = os.path.join(tmpdir, 'old.pptx')
with open(old_path, 'w') as f:
f.write('fake')
# 改 mtime 到 2 天前
old_time = __import__('time').time() - 2 * 86400
os.utime(old_path, (old_time, old_time))
summary = audit_recent_ppts(reports_dir=tmpdir, hours=24)
assert summary['audited_files'] == []
def test_check_ppt_file_libreoffice_not_installed(monkeypatch):
"""LibreOffice 不在 → fail-safe skip 不 raise"""
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
from services.ppt_vision_service import PPTVisionService
with tempfile.NamedTemporaryFile(suffix='.pptx', delete=False) as f:
f.write(b'fake pptx')
ppt_path = f.name
try:
with patch('services.ppt_vision_service.subprocess.run',
side_effect=FileNotFoundError('libreoffice')):
svc = PPTVisionService()
result = svc.check_ppt_file(ppt_path)
assert result['success'] is False
assert 'libreoffice not installed' in (result['error'] or '')
finally:
os.unlink(ppt_path)
def test_check_ppt_file_disabled():
"""flag OFF check_ppt_file 也 skip"""
from services.ppt_vision_service import PPTVisionService
with tempfile.NamedTemporaryFile(suffix='.pptx', delete=False) as f:
f.write(b'fake pptx')
ppt_path = f.name
try:
svc = PPTVisionService()
result = svc.check_ppt_file(ppt_path)
assert result['success'] is False
assert 'PPT_VISION_ENABLED=false' in (result['error'] or '')
finally:
os.unlink(ppt_path)
def test_check_ppt_file_missing(monkeypatch):
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
from services.ppt_vision_service import PPTVisionService
svc = PPTVisionService()
result = svc.check_ppt_file('/tmp/this_pptx_does_not_exist_xyz.pptx')
assert result['success'] is False
assert 'not found' in (result['error'] or '')
def test_push_telegram_skips_when_no_issues():
"""無 issues 不推(避免靜默無問題洗版)"""
from services.ppt_vision_service import push_ppt_audit_to_telegram
summary = {'total_issues': 0, 'audited_files': [{'path': 'a.pptx', 'issues': 0}]}
assert push_ppt_audit_to_telegram(summary) is False
def test_push_telegram_with_issues():
"""有 issues 才推 Telegram"""
from services.ppt_vision_service import push_ppt_audit_to_telegram
summary = {
'total_issues': 3,
'audited_files': [
{
'path': '/tmp/test.pptx',
'slides_checked': 2,
'issues': 3,
'issues_by_slide': [(1, ['⚠️ 圖表被切', '⚠️ 文字溢出']), (2, ['⚠️ 配色衝突'])],
},
],
}
with patch('services.telegram_templates._send_telegram_raw') as mock_send:
result = push_ppt_audit_to_telegram(summary)
assert result is True
mock_send.assert_called_once()
msg = mock_send.call_args[0][0]
assert 'PPT 視覺審核' in msg
assert '3 issues' in msg or '3' in msg