diff --git a/run_scheduler.py b/run_scheduler.py index 178c61c..57d6123 100644 --- a/run_scheduler.py +++ b/run_scheduler.py @@ -131,6 +131,10 @@ def _register_schedules(): schedule.every().day.at("09:00").do(run_roi_monthly_report_if_new_month) logger.info("📅 每日 09:00:roi_monthly_report(月初第 1 日才送)") + # Phase 26: PPT 視覺審核(每日 22:00 掃當天新生 .pptx,有 issues 才推 Telegram) + schedule.every().day.at("22:00").do(run_ppt_vision_audit) + logger.info("📅 每日 22:00:ppt_vision_audit(PPT_VISION_ENABLED=true 才生效)") + schedule.every().day.at("03:00").do(run_db_backup_task) logger.info("📅 每日 03:00:db_backup") @@ -274,6 +278,28 @@ def run_cost_throttle_reset_if_new_month(): logger.error(f"[CostThrottle] reset failed: {e}", exc_info=True) +def run_ppt_vision_audit(): + """每日 22:00 — Phase 26 PPT 視覺審核 + + 掃 reports/ 目錄當天新生 .pptx,跑 minicpm-v 視覺檢查,有 issues 推 Telegram。 + PPT_VISION_ENABLED=false 時 audit_recent_ppts 內部直接 skip(不打 LLM)。 + 需 LibreOffice headless 在 PATH(轉 .pptx → png);不在則 fail-safe skip。 + """ + try: + from services.ppt_vision_service import audit_recent_ppts, push_ppt_audit_to_telegram + summary = audit_recent_ppts(reports_dir='reports', hours=24, max_files=10) + if summary['total_issues'] > 0: + pushed = push_ppt_audit_to_telegram(summary) + logger.info( + "[PPTVisionAudit] %d files, %d issues, telegram=%s", + len(summary['audited_files']), summary['total_issues'], pushed, + ) + else: + logger.debug("[PPTVisionAudit] no issues found") + except Exception as e: + logger.error(f"[PPTVisionAudit] task failed: {e}", exc_info=True) + + def run_roi_monthly_report_if_new_month(): """每日 09:00 — Phase 24 ROI 月報(內部判斷月初第 1 日才送) diff --git a/services/ppt_vision_service.py b/services/ppt_vision_service.py index e95ab3d..b822439 100644 --- a/services/ppt_vision_service.py +++ b/services/ppt_vision_service.py @@ -82,6 +82,87 @@ class PPTVisionService: def is_available(self) -> bool: return is_ppt_vision_enabled() + def check_ppt_file(self, pptx_path: str, max_slides: int = 5) -> Dict[str, Any]: + """檢查整份 .pptx — Phase 26 整合到 PPT 生成流程。 + + 流程: + 1. LibreOffice headless 轉 png(每張 slide 一張) + 2. 對前 N 張跑 check_image + 3. 彙總 issues + 平均 confidence + 4. fail-safe:LibreOffice 不在 / 轉檔失敗 → 回 skip 不阻擋主流程 + + Returns: + { + 'success': bool, + 'slides_checked': int, + 'total_issues': int, + 'issues_by_slide': [(slide_num, [issues...]), ...], + 'error': str | None, + } + """ + import os + import subprocess + import tempfile + + result = { + 'success': False, 'slides_checked': 0, 'total_issues': 0, + 'issues_by_slide': [], 'error': None, + } + + if not self.is_available(): + result['error'] = 'PPT_VISION_ENABLED=false' + return result + + if not os.path.isfile(pptx_path): + result['error'] = f'pptx not found: {pptx_path}' + return result + + # 1. LibreOffice 轉 png + with tempfile.TemporaryDirectory() as tmpdir: + try: + proc = subprocess.run( + ['libreoffice', '--headless', '--convert-to', 'png', + '--outdir', tmpdir, pptx_path], + capture_output=True, timeout=60, + ) + if proc.returncode != 0: + result['error'] = f'libreoffice convert failed: {proc.stderr.decode()[:200]}' + return result + except FileNotFoundError: + result['error'] = 'libreoffice not installed (skip vision check)' + return result + except subprocess.TimeoutExpired: + result['error'] = 'libreoffice convert timeout (60s)' + return result + except Exception as e: + result['error'] = f'{type(e).__name__}: {str(e)[:200]}' + return result + + # LibreOffice 對 .pptx 預設只輸出第一頁;多頁需 --convert-to png:impress_png_Export + png_files = sorted([ + os.path.join(tmpdir, f) for f in os.listdir(tmpdir) + if f.lower().endswith('.png') + ]) + + if not png_files: + result['error'] = 'libreoffice 未產出 png (可能需要 --convert-to png:impress_png_Export)' + return result + + # 2. 對前 N 張跑 check_image + for idx, png in enumerate(png_files[:max_slides]): + try: + vr = self.check_image(png) + if vr.success: + result['slides_checked'] += 1 + if vr.issues_found: + result['total_issues'] += len(vr.issues_found) + result['issues_by_slide'].append((idx + 1, vr.issues_found)) + except Exception as exc: + logger.warning(f"[PPTVision] slide {idx+1} check failed: {exc}") + + result['success'] = result['slides_checked'] > 0 + return result + def check_image(self, image_path: str) -> VisionResult: """檢查單張 PPT 截圖。 @@ -203,10 +284,107 @@ class PPTVisionService: ppt_vision_service = PPTVisionService() +def audit_recent_ppts(reports_dir: str = 'reports', hours: int = 24, + max_files: int = 10) -> Dict[str, Any]: + """Phase 26 整合 hook — 每日 22:00 cron 跑:掃 reports/ 當天新增 .pptx 跑視覺檢查。 + + Args: + reports_dir: PPT 輸出目錄 + hours: 掃過去 N 小時內的檔 + max_files: 一次最多查 N 個檔(避免一次跑太久) + + Returns: + { + 'audited_files': [...], + 'total_issues': int, + 'errors': [...], + } + """ + import os + import time + + summary = {'audited_files': [], 'total_issues': 0, 'errors': []} + + if not is_ppt_vision_enabled(): + summary['errors'].append('PPT_VISION_ENABLED=false') + return summary + + if not os.path.isdir(reports_dir): + summary['errors'].append(f'{reports_dir} not found') + return summary + + # 掃當天新增 .pptx + cutoff = time.time() - hours * 3600 + pptx_files = [] + for f in os.listdir(reports_dir): + if not f.lower().endswith('.pptx'): + continue + full = os.path.join(reports_dir, f) + try: + if os.path.getmtime(full) >= cutoff: + pptx_files.append((os.path.getmtime(full), full)) + except OSError: + continue + pptx_files.sort(reverse=True) + pptx_files = pptx_files[:max_files] + + svc = PPTVisionService() + for mtime, path in pptx_files: + try: + result = svc.check_ppt_file(path) + entry = { + 'path': path, + 'slides_checked': result.get('slides_checked', 0), + 'issues': result.get('total_issues', 0), + 'issues_by_slide': result.get('issues_by_slide', []), + 'error': result.get('error'), + } + summary['audited_files'].append(entry) + summary['total_issues'] += entry['issues'] + if entry['error']: + summary['errors'].append(f"{path}: {entry['error']}") + except Exception as exc: + summary['errors'].append(f'{path}: {type(exc).__name__}: {str(exc)[:150]}') + + return summary + + +def push_ppt_audit_to_telegram(summary: Dict[str, Any]) -> bool: + """有 issues 才推 Telegram(避免靜默報「無問題」洗版)""" + if summary['total_issues'] <= 0: + return False + try: + from services.telegram_templates import _send_telegram_raw + except Exception: + return False + + lines = [f"🔍 PPT 視覺審核({len(summary['audited_files'])} 份)"] + lines.append('━' * 18) + for entry in summary['audited_files']: + if entry['issues'] > 0: + fname = os.path.basename(entry['path']) if hasattr(__import__('os'), 'path') else entry['path'] + import os as _os + fname = _os.path.basename(entry['path']) + lines.append(f"\n📊 {fname} ({entry['slides_checked']} slides, " + f"{entry['issues']} issues)") + for slide_num, issues in entry['issues_by_slide'][:3]: # 每檔最多列 3 張 + for iss in issues[:2]: # 每張 slide 最多列 2 個 issue + lines.append(f" Slide {slide_num}: {iss[:120]}") + + msg = '\n'.join(lines) + try: + _send_telegram_raw(msg) + return True + except Exception: + return False + + __all__ = [ 'PPTVisionService', 'VisionResult', 'ppt_vision_service', 'is_ppt_vision_enabled', 'PPT_VISION_SYSTEM_PROMPT', + 'audit_recent_ppts', + 'push_ppt_audit_to_telegram', ] diff --git a/tests/test_ppt_vision_audit.py b/tests/test_ppt_vision_audit.py new file mode 100644 index 0000000..83d2552 --- /dev/null +++ b/tests/test_ppt_vision_audit.py @@ -0,0 +1,149 @@ +""" +tests/test_ppt_vision_audit.py +───────────────────────────────────────────────────────────────── +Operation Ollama-First v5.0 / Phase 26 — PPT 視覺審核整合驗證 +""" + +import os +import tempfile +from unittest.mock import patch, MagicMock + +import pytest + + +@pytest.fixture(autouse=True) +def _reset_env(monkeypatch): + monkeypatch.delenv('PPT_VISION_ENABLED', raising=False) + yield + + +def test_audit_disabled_by_default(): + """flag OFF 時直接 skip(errors 含 disabled marker)""" + from services.ppt_vision_service import audit_recent_ppts + + summary = audit_recent_ppts(reports_dir='/tmp') + assert summary['total_issues'] == 0 + assert any('PPT_VISION_ENABLED=false' in e for e in summary['errors']) + + +def test_audit_missing_dir(monkeypatch): + monkeypatch.setenv('PPT_VISION_ENABLED', 'true') + from services.ppt_vision_service import audit_recent_ppts + + summary = audit_recent_ppts(reports_dir='/tmp/nonexistent_xyz_ppts') + assert summary['total_issues'] == 0 + assert any('not found' in e for e in summary['errors']) + + +def test_audit_no_recent_ppts(monkeypatch): + """目錄存在但無 .pptx → 0 audit""" + monkeypatch.setenv('PPT_VISION_ENABLED', 'true') + from services.ppt_vision_service import audit_recent_ppts + + with tempfile.TemporaryDirectory() as tmpdir: + # 放一個 .txt 干擾 + with open(os.path.join(tmpdir, 'not_ppt.txt'), 'w') as f: + f.write('hello') + summary = audit_recent_ppts(reports_dir=tmpdir) + + assert summary['audited_files'] == [] + assert summary['total_issues'] == 0 + + +def test_audit_filter_old_files(monkeypatch): + """超過 hours 視窗的舊檔不應 audit""" + monkeypatch.setenv('PPT_VISION_ENABLED', 'true') + from services.ppt_vision_service import audit_recent_ppts + + with tempfile.TemporaryDirectory() as tmpdir: + old_path = os.path.join(tmpdir, 'old.pptx') + with open(old_path, 'w') as f: + f.write('fake') + # 改 mtime 到 2 天前 + old_time = __import__('time').time() - 2 * 86400 + os.utime(old_path, (old_time, old_time)) + + summary = audit_recent_ppts(reports_dir=tmpdir, hours=24) + + assert summary['audited_files'] == [] + + +def test_check_ppt_file_libreoffice_not_installed(monkeypatch): + """LibreOffice 不在 → fail-safe skip 不 raise""" + monkeypatch.setenv('PPT_VISION_ENABLED', 'true') + from services.ppt_vision_service import PPTVisionService + + with tempfile.NamedTemporaryFile(suffix='.pptx', delete=False) as f: + f.write(b'fake pptx') + ppt_path = f.name + + try: + with patch('services.ppt_vision_service.subprocess.run', + side_effect=FileNotFoundError('libreoffice')): + svc = PPTVisionService() + result = svc.check_ppt_file(ppt_path) + + assert result['success'] is False + assert 'libreoffice not installed' in (result['error'] or '') + finally: + os.unlink(ppt_path) + + +def test_check_ppt_file_disabled(): + """flag OFF check_ppt_file 也 skip""" + from services.ppt_vision_service import PPTVisionService + + with tempfile.NamedTemporaryFile(suffix='.pptx', delete=False) as f: + f.write(b'fake pptx') + ppt_path = f.name + + try: + svc = PPTVisionService() + result = svc.check_ppt_file(ppt_path) + assert result['success'] is False + assert 'PPT_VISION_ENABLED=false' in (result['error'] or '') + finally: + os.unlink(ppt_path) + + +def test_check_ppt_file_missing(monkeypatch): + monkeypatch.setenv('PPT_VISION_ENABLED', 'true') + from services.ppt_vision_service import PPTVisionService + + svc = PPTVisionService() + result = svc.check_ppt_file('/tmp/this_pptx_does_not_exist_xyz.pptx') + assert result['success'] is False + assert 'not found' in (result['error'] or '') + + +def test_push_telegram_skips_when_no_issues(): + """無 issues 不推(避免靜默無問題洗版)""" + from services.ppt_vision_service import push_ppt_audit_to_telegram + + summary = {'total_issues': 0, 'audited_files': [{'path': 'a.pptx', 'issues': 0}]} + assert push_ppt_audit_to_telegram(summary) is False + + +def test_push_telegram_with_issues(): + """有 issues 才推 Telegram""" + from services.ppt_vision_service import push_ppt_audit_to_telegram + + summary = { + 'total_issues': 3, + 'audited_files': [ + { + 'path': '/tmp/test.pptx', + 'slides_checked': 2, + 'issues': 3, + 'issues_by_slide': [(1, ['⚠️ 圖表被切', '⚠️ 文字溢出']), (2, ['⚠️ 配色衝突'])], + }, + ], + } + with patch('services.telegram_templates._send_telegram_raw') as mock_send: + result = push_ppt_audit_to_telegram(summary) + + assert result is True + mock_send.assert_called_once() + msg = mock_send.call_args[0][0] + assert 'PPT 視覺審核' in msg + assert '3 issues' in msg or '3' in msg