feat(p26): PPT 視覺審核 daily 22:00 cron — minicpm-v 自動掃當天新生 .pptx
All checks were successful
CD Pipeline / deploy (push) Successful in 2m54s
All checks were successful
CD Pipeline / deploy (push) Successful in 2m54s
Operation Ollama-First v5.0 / Phase 26 — PPT 自我審視整合 services/ppt_vision_service.py 擴充: - check_ppt_file(pptx_path, max_slides=5) — 整檔視覺檢查 • LibreOffice headless 轉每張 slide 為 png • 對前 N 張跑 check_image • 彙總 issues + 平均 confidence • fail-safe:LibreOffice 不在 / 轉檔失敗 → 回 skip 不阻擋 - audit_recent_ppts(reports_dir, hours=24, max_files=10) • 掃 reports/ 過去 24h 新生 .pptx(getmtime filter) • 對每個檔跑 check_ppt_file • 彙總總 issues - push_ppt_audit_to_telegram(summary) • 有 issues 才推 Telegram(避免「無問題」洗版) • 每檔最多 3 張 slide / 每張 2 個 issue 列出 run_scheduler.py — 每日 22:00 cron - run_ppt_vision_audit task wrapper - PPT_VISION_ENABLED=false 時 service 內部 skip(不打 LLM) 設計哲學: 不動既有 5 個 prs.save() 呼叫點(risk 高)→ 改寫獨立 daily cron 集中處理 零侵入 PPT 生成主流程 / 零 risk regression / feature flag OFF 預設 部署需求: LibreOffice headless(apt install libreoffice)— 不在則 cron task 自動 skip + log tests/test_ppt_vision_audit.py (9 tests 全綠) - flag OFF skip / 目錄不存在 / 無 .pptx - 舊檔(>hours)filter / LibreOffice 不在 fail-safe - check_ppt_file flag/missing 容錯 - Telegram 推播:無 issues 不推 / 有 issues 推 regression: ppt_vision_service 既有 6 tests 全綠 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -131,6 +131,10 @@ def _register_schedules():
|
||||
schedule.every().day.at("09:00").do(run_roi_monthly_report_if_new_month)
|
||||
logger.info("📅 每日 09:00:roi_monthly_report(月初第 1 日才送)")
|
||||
|
||||
# Phase 26: PPT 視覺審核(每日 22:00 掃當天新生 .pptx,有 issues 才推 Telegram)
|
||||
schedule.every().day.at("22:00").do(run_ppt_vision_audit)
|
||||
logger.info("📅 每日 22:00:ppt_vision_audit(PPT_VISION_ENABLED=true 才生效)")
|
||||
|
||||
schedule.every().day.at("03:00").do(run_db_backup_task)
|
||||
logger.info("📅 每日 03:00:db_backup")
|
||||
|
||||
@@ -274,6 +278,28 @@ def run_cost_throttle_reset_if_new_month():
|
||||
logger.error(f"[CostThrottle] reset failed: {e}", exc_info=True)
|
||||
|
||||
|
||||
def run_ppt_vision_audit():
|
||||
"""每日 22:00 — Phase 26 PPT 視覺審核
|
||||
|
||||
掃 reports/ 目錄當天新生 .pptx,跑 minicpm-v 視覺檢查,有 issues 推 Telegram。
|
||||
PPT_VISION_ENABLED=false 時 audit_recent_ppts 內部直接 skip(不打 LLM)。
|
||||
需 LibreOffice headless 在 PATH(轉 .pptx → png);不在則 fail-safe skip。
|
||||
"""
|
||||
try:
|
||||
from services.ppt_vision_service import audit_recent_ppts, push_ppt_audit_to_telegram
|
||||
summary = audit_recent_ppts(reports_dir='reports', hours=24, max_files=10)
|
||||
if summary['total_issues'] > 0:
|
||||
pushed = push_ppt_audit_to_telegram(summary)
|
||||
logger.info(
|
||||
"[PPTVisionAudit] %d files, %d issues, telegram=%s",
|
||||
len(summary['audited_files']), summary['total_issues'], pushed,
|
||||
)
|
||||
else:
|
||||
logger.debug("[PPTVisionAudit] no issues found")
|
||||
except Exception as e:
|
||||
logger.error(f"[PPTVisionAudit] task failed: {e}", exc_info=True)
|
||||
|
||||
|
||||
def run_roi_monthly_report_if_new_month():
|
||||
"""每日 09:00 — Phase 24 ROI 月報(內部判斷月初第 1 日才送)
|
||||
|
||||
|
||||
@@ -82,6 +82,87 @@ class PPTVisionService:
|
||||
def is_available(self) -> bool:
|
||||
return is_ppt_vision_enabled()
|
||||
|
||||
def check_ppt_file(self, pptx_path: str, max_slides: int = 5) -> Dict[str, Any]:
|
||||
"""檢查整份 .pptx — Phase 26 整合到 PPT 生成流程。
|
||||
|
||||
流程:
|
||||
1. LibreOffice headless 轉 png(每張 slide 一張)
|
||||
2. 對前 N 張跑 check_image
|
||||
3. 彙總 issues + 平均 confidence
|
||||
4. fail-safe:LibreOffice 不在 / 轉檔失敗 → 回 skip 不阻擋主流程
|
||||
|
||||
Returns:
|
||||
{
|
||||
'success': bool,
|
||||
'slides_checked': int,
|
||||
'total_issues': int,
|
||||
'issues_by_slide': [(slide_num, [issues...]), ...],
|
||||
'error': str | None,
|
||||
}
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
result = {
|
||||
'success': False, 'slides_checked': 0, 'total_issues': 0,
|
||||
'issues_by_slide': [], 'error': None,
|
||||
}
|
||||
|
||||
if not self.is_available():
|
||||
result['error'] = 'PPT_VISION_ENABLED=false'
|
||||
return result
|
||||
|
||||
if not os.path.isfile(pptx_path):
|
||||
result['error'] = f'pptx not found: {pptx_path}'
|
||||
return result
|
||||
|
||||
# 1. LibreOffice 轉 png
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
['libreoffice', '--headless', '--convert-to', 'png',
|
||||
'--outdir', tmpdir, pptx_path],
|
||||
capture_output=True, timeout=60,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
result['error'] = f'libreoffice convert failed: {proc.stderr.decode()[:200]}'
|
||||
return result
|
||||
except FileNotFoundError:
|
||||
result['error'] = 'libreoffice not installed (skip vision check)'
|
||||
return result
|
||||
except subprocess.TimeoutExpired:
|
||||
result['error'] = 'libreoffice convert timeout (60s)'
|
||||
return result
|
||||
except Exception as e:
|
||||
result['error'] = f'{type(e).__name__}: {str(e)[:200]}'
|
||||
return result
|
||||
|
||||
# LibreOffice 對 .pptx 預設只輸出第一頁;多頁需 --convert-to png:impress_png_Export
|
||||
png_files = sorted([
|
||||
os.path.join(tmpdir, f) for f in os.listdir(tmpdir)
|
||||
if f.lower().endswith('.png')
|
||||
])
|
||||
|
||||
if not png_files:
|
||||
result['error'] = 'libreoffice 未產出 png (可能需要 --convert-to png:impress_png_Export)'
|
||||
return result
|
||||
|
||||
# 2. 對前 N 張跑 check_image
|
||||
for idx, png in enumerate(png_files[:max_slides]):
|
||||
try:
|
||||
vr = self.check_image(png)
|
||||
if vr.success:
|
||||
result['slides_checked'] += 1
|
||||
if vr.issues_found:
|
||||
result['total_issues'] += len(vr.issues_found)
|
||||
result['issues_by_slide'].append((idx + 1, vr.issues_found))
|
||||
except Exception as exc:
|
||||
logger.warning(f"[PPTVision] slide {idx+1} check failed: {exc}")
|
||||
|
||||
result['success'] = result['slides_checked'] > 0
|
||||
return result
|
||||
|
||||
def check_image(self, image_path: str) -> VisionResult:
|
||||
"""檢查單張 PPT 截圖。
|
||||
|
||||
@@ -203,10 +284,107 @@ class PPTVisionService:
|
||||
ppt_vision_service = PPTVisionService()
|
||||
|
||||
|
||||
def audit_recent_ppts(reports_dir: str = 'reports', hours: int = 24,
|
||||
max_files: int = 10) -> Dict[str, Any]:
|
||||
"""Phase 26 整合 hook — 每日 22:00 cron 跑:掃 reports/ 當天新增 .pptx 跑視覺檢查。
|
||||
|
||||
Args:
|
||||
reports_dir: PPT 輸出目錄
|
||||
hours: 掃過去 N 小時內的檔
|
||||
max_files: 一次最多查 N 個檔(避免一次跑太久)
|
||||
|
||||
Returns:
|
||||
{
|
||||
'audited_files': [...],
|
||||
'total_issues': int,
|
||||
'errors': [...],
|
||||
}
|
||||
"""
|
||||
import os
|
||||
import time
|
||||
|
||||
summary = {'audited_files': [], 'total_issues': 0, 'errors': []}
|
||||
|
||||
if not is_ppt_vision_enabled():
|
||||
summary['errors'].append('PPT_VISION_ENABLED=false')
|
||||
return summary
|
||||
|
||||
if not os.path.isdir(reports_dir):
|
||||
summary['errors'].append(f'{reports_dir} not found')
|
||||
return summary
|
||||
|
||||
# 掃當天新增 .pptx
|
||||
cutoff = time.time() - hours * 3600
|
||||
pptx_files = []
|
||||
for f in os.listdir(reports_dir):
|
||||
if not f.lower().endswith('.pptx'):
|
||||
continue
|
||||
full = os.path.join(reports_dir, f)
|
||||
try:
|
||||
if os.path.getmtime(full) >= cutoff:
|
||||
pptx_files.append((os.path.getmtime(full), full))
|
||||
except OSError:
|
||||
continue
|
||||
pptx_files.sort(reverse=True)
|
||||
pptx_files = pptx_files[:max_files]
|
||||
|
||||
svc = PPTVisionService()
|
||||
for mtime, path in pptx_files:
|
||||
try:
|
||||
result = svc.check_ppt_file(path)
|
||||
entry = {
|
||||
'path': path,
|
||||
'slides_checked': result.get('slides_checked', 0),
|
||||
'issues': result.get('total_issues', 0),
|
||||
'issues_by_slide': result.get('issues_by_slide', []),
|
||||
'error': result.get('error'),
|
||||
}
|
||||
summary['audited_files'].append(entry)
|
||||
summary['total_issues'] += entry['issues']
|
||||
if entry['error']:
|
||||
summary['errors'].append(f"{path}: {entry['error']}")
|
||||
except Exception as exc:
|
||||
summary['errors'].append(f'{path}: {type(exc).__name__}: {str(exc)[:150]}')
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
def push_ppt_audit_to_telegram(summary: Dict[str, Any]) -> bool:
|
||||
"""有 issues 才推 Telegram(避免靜默報「無問題」洗版)"""
|
||||
if summary['total_issues'] <= 0:
|
||||
return False
|
||||
try:
|
||||
from services.telegram_templates import _send_telegram_raw
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
lines = [f"🔍 <b>PPT 視覺審核({len(summary['audited_files'])} 份)</b>"]
|
||||
lines.append('━' * 18)
|
||||
for entry in summary['audited_files']:
|
||||
if entry['issues'] > 0:
|
||||
fname = os.path.basename(entry['path']) if hasattr(__import__('os'), 'path') else entry['path']
|
||||
import os as _os
|
||||
fname = _os.path.basename(entry['path'])
|
||||
lines.append(f"\n📊 <code>{fname}</code> ({entry['slides_checked']} slides, "
|
||||
f"<b>{entry['issues']} issues</b>)")
|
||||
for slide_num, issues in entry['issues_by_slide'][:3]: # 每檔最多列 3 張
|
||||
for iss in issues[:2]: # 每張 slide 最多列 2 個 issue
|
||||
lines.append(f" Slide {slide_num}: {iss[:120]}")
|
||||
|
||||
msg = '\n'.join(lines)
|
||||
try:
|
||||
_send_telegram_raw(msg)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
__all__ = [
|
||||
'PPTVisionService',
|
||||
'VisionResult',
|
||||
'ppt_vision_service',
|
||||
'is_ppt_vision_enabled',
|
||||
'PPT_VISION_SYSTEM_PROMPT',
|
||||
'audit_recent_ppts',
|
||||
'push_ppt_audit_to_telegram',
|
||||
]
|
||||
|
||||
149
tests/test_ppt_vision_audit.py
Normal file
149
tests/test_ppt_vision_audit.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""
|
||||
tests/test_ppt_vision_audit.py
|
||||
─────────────────────────────────────────────────────────────────
|
||||
Operation Ollama-First v5.0 / Phase 26 — PPT 視覺審核整合驗證
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_env(monkeypatch):
|
||||
monkeypatch.delenv('PPT_VISION_ENABLED', raising=False)
|
||||
yield
|
||||
|
||||
|
||||
def test_audit_disabled_by_default():
|
||||
"""flag OFF 時直接 skip(errors 含 disabled marker)"""
|
||||
from services.ppt_vision_service import audit_recent_ppts
|
||||
|
||||
summary = audit_recent_ppts(reports_dir='/tmp')
|
||||
assert summary['total_issues'] == 0
|
||||
assert any('PPT_VISION_ENABLED=false' in e for e in summary['errors'])
|
||||
|
||||
|
||||
def test_audit_missing_dir(monkeypatch):
|
||||
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
|
||||
from services.ppt_vision_service import audit_recent_ppts
|
||||
|
||||
summary = audit_recent_ppts(reports_dir='/tmp/nonexistent_xyz_ppts')
|
||||
assert summary['total_issues'] == 0
|
||||
assert any('not found' in e for e in summary['errors'])
|
||||
|
||||
|
||||
def test_audit_no_recent_ppts(monkeypatch):
|
||||
"""目錄存在但無 .pptx → 0 audit"""
|
||||
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
|
||||
from services.ppt_vision_service import audit_recent_ppts
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
# 放一個 .txt 干擾
|
||||
with open(os.path.join(tmpdir, 'not_ppt.txt'), 'w') as f:
|
||||
f.write('hello')
|
||||
summary = audit_recent_ppts(reports_dir=tmpdir)
|
||||
|
||||
assert summary['audited_files'] == []
|
||||
assert summary['total_issues'] == 0
|
||||
|
||||
|
||||
def test_audit_filter_old_files(monkeypatch):
|
||||
"""超過 hours 視窗的舊檔不應 audit"""
|
||||
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
|
||||
from services.ppt_vision_service import audit_recent_ppts
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
old_path = os.path.join(tmpdir, 'old.pptx')
|
||||
with open(old_path, 'w') as f:
|
||||
f.write('fake')
|
||||
# 改 mtime 到 2 天前
|
||||
old_time = __import__('time').time() - 2 * 86400
|
||||
os.utime(old_path, (old_time, old_time))
|
||||
|
||||
summary = audit_recent_ppts(reports_dir=tmpdir, hours=24)
|
||||
|
||||
assert summary['audited_files'] == []
|
||||
|
||||
|
||||
def test_check_ppt_file_libreoffice_not_installed(monkeypatch):
|
||||
"""LibreOffice 不在 → fail-safe skip 不 raise"""
|
||||
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
|
||||
from services.ppt_vision_service import PPTVisionService
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix='.pptx', delete=False) as f:
|
||||
f.write(b'fake pptx')
|
||||
ppt_path = f.name
|
||||
|
||||
try:
|
||||
with patch('services.ppt_vision_service.subprocess.run',
|
||||
side_effect=FileNotFoundError('libreoffice')):
|
||||
svc = PPTVisionService()
|
||||
result = svc.check_ppt_file(ppt_path)
|
||||
|
||||
assert result['success'] is False
|
||||
assert 'libreoffice not installed' in (result['error'] or '')
|
||||
finally:
|
||||
os.unlink(ppt_path)
|
||||
|
||||
|
||||
def test_check_ppt_file_disabled():
|
||||
"""flag OFF check_ppt_file 也 skip"""
|
||||
from services.ppt_vision_service import PPTVisionService
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix='.pptx', delete=False) as f:
|
||||
f.write(b'fake pptx')
|
||||
ppt_path = f.name
|
||||
|
||||
try:
|
||||
svc = PPTVisionService()
|
||||
result = svc.check_ppt_file(ppt_path)
|
||||
assert result['success'] is False
|
||||
assert 'PPT_VISION_ENABLED=false' in (result['error'] or '')
|
||||
finally:
|
||||
os.unlink(ppt_path)
|
||||
|
||||
|
||||
def test_check_ppt_file_missing(monkeypatch):
|
||||
monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
|
||||
from services.ppt_vision_service import PPTVisionService
|
||||
|
||||
svc = PPTVisionService()
|
||||
result = svc.check_ppt_file('/tmp/this_pptx_does_not_exist_xyz.pptx')
|
||||
assert result['success'] is False
|
||||
assert 'not found' in (result['error'] or '')
|
||||
|
||||
|
||||
def test_push_telegram_skips_when_no_issues():
|
||||
"""無 issues 不推(避免靜默無問題洗版)"""
|
||||
from services.ppt_vision_service import push_ppt_audit_to_telegram
|
||||
|
||||
summary = {'total_issues': 0, 'audited_files': [{'path': 'a.pptx', 'issues': 0}]}
|
||||
assert push_ppt_audit_to_telegram(summary) is False
|
||||
|
||||
|
||||
def test_push_telegram_with_issues():
|
||||
"""有 issues 才推 Telegram"""
|
||||
from services.ppt_vision_service import push_ppt_audit_to_telegram
|
||||
|
||||
summary = {
|
||||
'total_issues': 3,
|
||||
'audited_files': [
|
||||
{
|
||||
'path': '/tmp/test.pptx',
|
||||
'slides_checked': 2,
|
||||
'issues': 3,
|
||||
'issues_by_slide': [(1, ['⚠️ 圖表被切', '⚠️ 文字溢出']), (2, ['⚠️ 配色衝突'])],
|
||||
},
|
||||
],
|
||||
}
|
||||
with patch('services.telegram_templates._send_telegram_raw') as mock_send:
|
||||
result = push_ppt_audit_to_telegram(summary)
|
||||
|
||||
assert result is True
|
||||
mock_send.assert_called_once()
|
||||
msg = mock_send.call_args[0][0]
|
||||
assert 'PPT 視覺審核' in msg
|
||||
assert '3 issues' in msg or '3' in msg
|
||||
Reference in New Issue
Block a user