ewoooc/services/ppt_vision_service.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
services/ppt_vision_service.py
Operation Ollama-First v5.0 / Phase 14 — PPT 視覺自審

設計原則：
- 用 minicpm-v（GCP Primary 已拉，5.5GB）對 PPT 截圖做品質檢查
- 替代 qwen2-vl:7b（Ollama registry 暫無）
- 用途：PPT 生成後自動跑視覺檢查，找：
    1. 圖表 layout 異常（被切掉、重疊）
    2. 文字溢出框
    3. 空白區塊（資料未填滿）
    4. 配色衝突
- feature flag 由部署環境控制；正式 compose 預設 ON，程式本身仍 fail-safe
- 失敗自動 skip（不阻擋 PPT 生成主流程）
"""

from __future__ import annotations
import os
import time
import base64
import json
import logging
import shutil
import threading
from io import BytesIO
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional, Dict, Any, List, Sequence

logger = logging.getLogger(__name__)

# ─────────────────────────────────────────────────────────────────────────────
# Feature flag + 配置
# ─────────────────────────────────────────────────────────────────────────────
PPT_VISION_MODEL = os.getenv('PPT_VISION_MODEL', 'minicpm-v:latest')
PPT_VISION_TIMEOUT = int(os.getenv('PPT_VISION_TIMEOUT', '120'))
PPT_VISION_MAX_SLIDES = int(os.getenv('PPT_VISION_MAX_SLIDES', '1'))
PPT_VISION_IMAGE_MAX_EDGE = int(os.getenv('PPT_VISION_IMAGE_MAX_EDGE', '1280'))
PPT_VISION_IMAGE_QUALITY = int(os.getenv('PPT_VISION_IMAGE_QUALITY', '82'))
_AUDIT_LOCK = threading.Lock()
_LAST_AUDIT_RUN: Dict[str, Any] | None = None
_ACTIVE_AUDIT_TTL_SECONDS = int(os.getenv('PPT_VISION_ACTIVE_TTL_SECONDS', '7200'))


def is_ppt_vision_enabled() -> bool:
    """Runtime check（避免 import-time freeze）"""
    return os.getenv('PPT_VISION_ENABLED', 'false').strip().lower() in ('true', '1', 'yes', 'on')


def get_ppt_vision_runtime_status() -> Dict[str, Any]:
    """Expose why the PPT vision pipeline is or is not ready."""
    env_value = os.getenv('PPT_VISION_ENABLED')
    enabled = is_ppt_vision_enabled()
    model = os.getenv('PPT_VISION_MODEL', PPT_VISION_MODEL)
    converter = shutil.which('libreoffice') or shutil.which('soffice')
    blockers = []
    if not enabled:
        blockers.append('PPT_VISION_ENABLED 未設定為 true')
    if not converter:
        blockers.append('容器內缺少 LibreOffice，無法轉換 PPT 做視覺審核')
    ready = enabled and bool(converter)
    readiness_checks = [
        {
            'key': 'feature_flag',
            'label': '功能開關',
            'value': f"PPT_VISION_ENABLED={env_value if env_value is not None else '未設定'}",
            'status': 'ready' if enabled else 'error',
            'detail': '已允許背景視覺 QA 排程與手動補跑。' if enabled else '目前會阻擋立即視覺 QA 按鈕。',
        },
        {
            'key': 'converter',
            'label': '轉檔器',
            'value': converter or 'not found',
            'status': 'ready' if converter else 'error',
            'detail': '可將 PPTX 轉成投影片截圖。' if converter else '缺轉檔器時無法建立視覺模型輸入。',
        },
        {
            'key': 'vision_model',
            'label': '視覺模型',
            'value': model,
            'status': 'ready' if model else 'planned',
            'detail': '推理時走 Ollama-first 三主機 fallback；頁面載入不同步打模型。',
        },
    ]
    next_actions = []
    if not enabled:
        next_actions.append('在 momo-app / scheduler 環境設定 PPT_VISION_ENABLED=true，重新 recreate 相關 app 容器。')
    if not converter:
        next_actions.append('確認映像已安裝 LibreOffice Impress，完成 rebuild 後再重啟 momo-app / scheduler。')
    if ready:
        next_actions.append('環境已就緒，可在本頁對最近 PPTX 立即補跑視覺 QA。')
    return {
        'enabled': enabled,
        'env_value': env_value if env_value is not None else '未設定（預設 false）',
        'model': model,
        'converter': converter,
        'converter_ready': bool(converter),
        'blockers': blockers,
        'ready': ready,
        'ready_count': sum(1 for item in readiness_checks if item['status'] == 'ready'),
        'check_count': len(readiness_checks),
        'status_label': '可執行' if ready else '環境未就緒',
        'summary': '視覺 QA runtime 已具備功能開關、轉檔器與模型設定。' if ready else '視覺 QA runtime 仍有必要條件未通過。',
        'checked_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        'readiness_checks': readiness_checks,
        'next_actions': next_actions,
    }


def _audit_state_path() -> str:
    return os.getenv(
        'PPT_VISION_STATE_PATH',
        os.path.join(os.getenv('DATA_DIR', os.path.join(os.getcwd(), 'data')), 'ppt_vision_audit_status.json'),
    )


def _now_label() -> str:
    return time.strftime('%Y-%m-%d %H:%M:%S')


def _read_persisted_audit_run() -> Dict[str, Any] | None:
    path = _audit_state_path()
    try:
        if not os.path.isfile(path):
            return None
        with open(path, 'r', encoding='utf-8') as handle:
            payload = json.load(handle)
        return payload if isinstance(payload, dict) else None
    except Exception:
        logger.debug("[PPTVision] read audit state failed", exc_info=True)
        return None


def _write_persisted_audit_run(run: Dict[str, Any]) -> None:
    path = _audit_state_path()
    directory = os.path.dirname(path)
    try:
        os.makedirs(directory, exist_ok=True)
        tmp_path = f"{path}.tmp"
        with open(tmp_path, 'w', encoding='utf-8') as handle:
            json.dump(run, handle, ensure_ascii=False)
        os.replace(tmp_path, path)
    except Exception:
        logger.debug("[PPTVision] write audit state failed", exc_info=True)


def _record_audit_run(run: Dict[str, Any]) -> Dict[str, Any]:
    global _LAST_AUDIT_RUN
    payload = dict(run)
    payload['updated_at'] = payload.get('updated_at') or _now_label()
    payload['pid'] = payload.get('pid') or os.getpid()
    _LAST_AUDIT_RUN = payload
    _write_persisted_audit_run(payload)
    return payload


def _load_last_audit_run() -> Dict[str, Any] | None:
    persisted = _read_persisted_audit_run()
    if not _LAST_AUDIT_RUN:
        return persisted
    if not persisted:
        return _LAST_AUDIT_RUN
    if str(persisted.get('updated_at') or '') >= str(_LAST_AUDIT_RUN.get('updated_at') or ''):
        return persisted
    return _LAST_AUDIT_RUN


def _timestamp_age_seconds(value: str | None) -> float | None:
    if not value:
        return None
    try:
        parsed = datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
        return max(0.0, (datetime.now() - parsed).total_seconds())
    except Exception:
        return None


def _pid_exists(pid: Any) -> bool:
    try:
        pid_int = int(pid or 0)
    except Exception:
        return False
    if pid_int <= 0:
        return False
    try:
        os.kill(pid_int, 0)
        return True
    except OSError:
        return False


def _is_recent_active_audit_run(run: Dict[str, Any] | None) -> bool:
    if not run or run.get('status') not in {'queued', 'running'}:
        return False
    if run.get('pid') and not _pid_exists(run.get('pid')):
        return False
    age = _timestamp_age_seconds(run.get('updated_at') or run.get('started_at') or run.get('queued_at'))
    return age is None or age < _ACTIVE_AUDIT_TTL_SECONDS


def _mark_stale_audit_run(run: Dict[str, Any]) -> Dict[str, Any]:
    payload = dict(run)
    payload.update({
        'ok': False,
        'status': 'error',
        'finished_at': payload.get('finished_at') or _now_label(),
        'error': 'background worker no longer running; audit state marked stale',
    })
    return _record_audit_run(payload)


def _is_vision_infra_error(error: str | None) -> bool:
    text = (error or '').lower()
    return any(marker in text for marker in (
        'all 3 hosts failed',
        'connection',
        'ollama vision failed',
        'timeout',
    ))


def _public_audit_run_payload(run: Dict[str, Any] | None) -> Dict[str, Any] | None:
    if not run:
        return None
    summary = run.get('summary') or {}
    audited_files = []
    for item in summary.get('audited_files') or []:
        path = item.get('path') or ''
        audited_files.append({
            'filename': os.path.basename(path) if path else '',
            'slides_checked': int(item.get('slides_checked') or 0),
            'issues': int(item.get('issues') or 0),
            'error': item.get('error') or '',
        })
    errors = [str(error)[:160] for error in (summary.get('errors') or [])[:3]]
    payload = {
        'ok': bool(run.get('ok')),
        'status': run.get('status') or 'unknown',
        'queued_at': run.get('queued_at') or '',
        'started_at': run.get('started_at') or '',
        'finished_at': run.get('finished_at') or '',
        'updated_at': run.get('updated_at') or '',
        'pid': run.get('pid') or None,
        'filenames': [
            os.path.basename(str(name))
            for name in (run.get('filenames') or [])
            if str(name).lower().endswith('.pptx')
        ],
        'max_files': run.get('max_files'),
        'error': run.get('error') or '',
        'summary': {
            'audited_count': len(audited_files),
            'total_issues': int(summary.get('total_issues') or 0),
            'error_count': len(summary.get('errors') or []),
            'errors': errors,
            'files': audited_files[:5],
        },
    }
    return payload


def get_ppt_vision_audit_status() -> Dict[str, Any]:
    """Return the current/last background visual QA run without touching DB."""
    raw_run = _load_last_audit_run()
    if raw_run and raw_run.get('status') in {'queued', 'running'} and not _is_recent_active_audit_run(raw_run):
        raw_run = _mark_stale_audit_run(raw_run)
    running = _AUDIT_LOCK.locked() or _is_recent_active_audit_run(raw_run)
    last_run = _public_audit_run_payload(raw_run)
    if running:
        status = 'running'
        status_label = '執行中'
        message = '視覺 QA 正在背景審核簡報。'
    elif last_run:
        status = last_run.get('status') or 'unknown'
        status_label = {
            'queued': '已排入',
            'running': '執行中',
            'completed': '已完成',
            'error': '錯誤',
        }.get(status, status)
        message = '最近一次視覺 QA 已完成。' if status == 'completed' else '最近一次視覺 QA 狀態可查。'
    else:
        status = 'idle'
        status_label = '待命'
        message = '尚未有背景視覺 QA 執行紀錄。'
    return {
        'ok': True,
        'running': running,
        'status': status,
        'status_label': status_label,
        'message': message,
        'last_run': last_run,
    }


# ─────────────────────────────────────────────────────────────────────────────
# 結果容器
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class VisionResult:
    success: bool
    issues_found: List[str] = field(default_factory=list)  # 問題清單
    confidence: float = 0.0                                 # 0-1，模型自評
    raw_response: str = ''
    duration_ms: int = 0
    error: Optional[str] = None


# ─────────────────────────────────────────────────────────────────────────────
# Vision 檢查 prompt（繁中強制）
# ─────────────────────────────────────────────────────────────────────────────
PPT_VISION_SYSTEM_PROMPT = """你是 momo 電商 PPT 排版品質審核員。

【任務】檢查截圖找出視覺異常，回繁中清單格式：
- 圖表被切掉 / 元素重疊 / 文字溢出框 / 空白區塊（資料未填滿）/ 配色衝突
- 商品名稱顯示不完整 / 數字單位錯誤 / 標題遮擋

【輸出格式】
若無問題：回「✅ 無視覺異常」
若有問題：每行一個問題，格式「⚠️ <問題類型>：<具體描述>」

【限制】
- 只檢查視覺，不評估內容對錯
- 用繁體中文（台灣用語），絕對禁止簡體字
- 不要寫過多解釋，每個問題一行精簡描述
"""


class PPTVisionService:
    """minicpm-v 視覺檢查服務."""

    def __init__(self, model: str = PPT_VISION_MODEL):
        self.model = model

    def is_available(self) -> bool:
        return is_ppt_vision_enabled()

    def _encode_image_for_vision(self, image_path: str) -> str:
        """Compress slide screenshots before sending to Ollama vision."""
        try:
            from PIL import Image

            with Image.open(image_path) as im:
                image = im.convert('RGB')
                image.thumbnail((PPT_VISION_IMAGE_MAX_EDGE, PPT_VISION_IMAGE_MAX_EDGE))
                buffer = BytesIO()
                image.save(
                    buffer,
                    format='JPEG',
                    quality=max(50, min(PPT_VISION_IMAGE_QUALITY, 95)),
                    optimize=True,
                )
                return base64.b64encode(buffer.getvalue()).decode('ascii')
        except Exception:
            # Pillow is an optimization, not a hard dependency for the vision path.
            with open(image_path, 'rb') as f:
                return base64.b64encode(f.read()).decode('ascii')

    def check_ppt_file(self, pptx_path: str, max_slides: int | None = None) -> Dict[str, Any]:
        """檢查整份 .pptx — Phase 26 整合到 PPT 生成流程。

        流程：
          1. LibreOffice headless 轉 png（每張 slide 一張）
          2. 對前 N 張跑 check_image
          3. 彙總 issues + 平均 confidence
          4. fail-safe：LibreOffice 不在 / 轉檔失敗 → 回 skip 不阻擋主流程

        Returns:
            {
                'success': bool,
                'slides_checked': int,
                'total_issues': int,
                'issues_by_slide': [(slide_num, [issues...]), ...],
                'error': str | None,
            }
        """
        import os
        import subprocess
        import tempfile

        max_slides = max(1, int(max_slides or PPT_VISION_MAX_SLIDES))
        result = {
            'success': False, 'slides_checked': 0, 'total_issues': 0,
            'issues_by_slide': [], 'error': None,
        }

        if not self.is_available():
            result['error'] = 'PPT_VISION_ENABLED=false'
            return result

        if not os.path.isfile(pptx_path):
            result['error'] = f'pptx not found: {pptx_path}'
            return result

        converter = shutil.which('libreoffice') or shutil.which('soffice')
        if not converter:
            result['error'] = 'libreoffice not installed (skip vision check)'
            return result

        def _finish_with_error(message: str, duration_ms: int = 0) -> Dict[str, Any]:
            result['error'] = message
            try:
                self._persist_audit_result(
                    pptx_path=pptx_path,
                    result=result,
                    avg_confidence=0.0,
                    duration_ms=duration_ms,
                )
            except Exception as e:
                logger.warning(f"[PPTVision] persist audit result failed: {e}")
            return result

        # 1. LibreOffice 轉 png
        with tempfile.TemporaryDirectory() as tmpdir:
            convert_started = time.monotonic()
            try:
                proc = subprocess.run(
                    [converter, '--headless', '--convert-to', 'png',
                     '--outdir', tmpdir, pptx_path],
                    capture_output=True, timeout=60,
                )
                if proc.returncode != 0:
                    return _finish_with_error(
                        f'libreoffice convert failed: {proc.stderr.decode()[:200]}',
                        int((time.monotonic() - convert_started) * 1000),
                    )
            except FileNotFoundError:
                result['error'] = 'libreoffice not installed (skip vision check)'
                return result
            except subprocess.TimeoutExpired:
                return _finish_with_error(
                    'libreoffice convert timeout (60s)',
                    int((time.monotonic() - convert_started) * 1000),
                )
            except Exception as e:
                return _finish_with_error(
                    f'{type(e).__name__}: {str(e)[:200]}',
                    int((time.monotonic() - convert_started) * 1000),
                )

            # LibreOffice 對 .pptx 預設只輸出第一頁；多頁需 --convert-to png:impress_png_Export
            png_files = sorted([
                os.path.join(tmpdir, f) for f in os.listdir(tmpdir)
                if f.lower().endswith('.png')
            ])

            if not png_files:
                return _finish_with_error(
                    'libreoffice 未產出 png (可能需要 --convert-to png:impress_png_Export)',
                    int((time.monotonic() - convert_started) * 1000),
                )

            # 2. 對前 N 張跑 check_image
            import time as _time
            t0 = _time.monotonic()
            confidences = []
            slide_errors = []
            for idx, png in enumerate(png_files[:max_slides]):
                try:
                    vr = self.check_image(png)
                    if vr.success:
                        result['slides_checked'] += 1
                        confidences.append(vr.confidence)
                        if vr.issues_found:
                            result['total_issues'] += len(vr.issues_found)
                            result['issues_by_slide'].append((idx + 1, vr.issues_found))
                    else:
                        message = f"slide {idx + 1}: {vr.error or 'vision model failed'}"
                        slide_errors.append(message)
                        if _is_vision_infra_error(vr.error):
                            break
                except Exception as exc:
                    message = f"slide {idx + 1}: {type(exc).__name__}: {str(exc)[:160]}"
                    slide_errors.append(message)
                    logger.warning(f"[PPTVision] slide {idx+1} check failed: {exc}")
                    if _is_vision_infra_error(message):
                        break

            result['success'] = result['slides_checked'] > 0
            if not result['success'] and slide_errors:
                result['error'] = '；'.join(slide_errors[:3])
            if slide_errors:
                result['slide_errors'] = slide_errors
            duration_ms = int((_time.monotonic() - t0) * 1000)

            # Phase 38：寫入 ppt_audit_results 留歷史（失敗安全）
            try:
                self._persist_audit_result(
                    pptx_path=pptx_path,
                    result=result,
                    avg_confidence=(sum(confidences) / len(confidences)) if confidences else 0.0,
                    duration_ms=duration_ms,
                )
            except Exception as e:
                logger.warning(f"[PPTVision] persist audit result failed: {e}")

            return result

    def _persist_audit_result(self, pptx_path: str, result: Dict[str, Any],
                              avg_confidence: float, duration_ms: int) -> None:
        """Phase 38: 把每次 audit 結果寫入 ppt_audit_results 表。

        失敗安全：DB 寫入失敗只 log warning，不擋主流程。
        """
        import os
        from datetime import datetime as _dt
        from sqlalchemy import text as _sa_text
        from database.manager import get_session

        # 推論 audit_status
        if result.get('error'):
            err = result['error']
            if 'libreoffice not installed' in err or 'PPT_VISION_ENABLED' in err:
                status = 'skipped'
            else:
                status = 'error'
        elif result.get('total_issues', 0) > 0:
            status = 'failed'
        elif result.get('success'):
            status = 'passed'
        else:
            status = 'error'

        # issues_found JSONB 序列化
        import json as _json
        issues_json = _json.dumps([
            {'slide': slide_num, 'issues': issues}
            for slide_num, issues in result.get('issues_by_slide', [])
        ], ensure_ascii=False)

        try:
            size_kb = round(os.path.getsize(pptx_path) / 1024, 1) if os.path.isfile(pptx_path) else None
            mtime = _dt.fromtimestamp(os.path.getmtime(pptx_path)) if os.path.isfile(pptx_path) else None
        except OSError:
            size_kb = None
            mtime = None

        session = get_session()
        try:
            session.execute(
                _sa_text("""
                    INSERT INTO ppt_audit_results
                        (pptx_filename, pptx_size_kb, pptx_mtime, vision_enabled,
                         audit_status, issues_count, issues_found, confidence,
                         duration_ms, error_msg)
                    VALUES
                        (:fname, :sz, :mt, :ve, :st, :ic, CAST(:if AS JSONB),
                         :cf, :du, :em)
                """),
                {
                    'fname': os.path.basename(pptx_path),
                    'sz': size_kb,
                    'mt': mtime,
                    've': True,  # 進到這裡代表 vision 已 enabled
                    'st': status,
                    'ic': result.get('total_issues', 0),
                    'if': issues_json,
                    'cf': round(avg_confidence, 3),
                    'du': duration_ms,
                    'em': result.get('error', None),
                },
            )
            session.commit()
        finally:
            session.close()

    def check_image(self, image_path: str) -> VisionResult:
        """檢查單張 PPT 截圖。

        Args:
            image_path: 本地檔案路徑（jpg/png）

        Returns:
            VisionResult.issues_found 含問題清單；無問題則空 list + confidence=1.0
        """
        start = time.monotonic()

        if not self.is_available():
            return VisionResult(
                success=False,
                error='PPT_VISION_ENABLED=false (Phase 14 預設 OFF)',
            )

        if not os.path.isfile(image_path):
            return VisionResult(
                success=False,
                error=f'image not found: {image_path}',
            )

        # 讀檔並 base64 編碼；可用時先壓縮縮圖，避免 Ollama vision 被大圖拖慢。
        try:
            img_b64 = self._encode_image_for_vision(image_path)
        except Exception as e:
            return VisionResult(
                success=False,
                error=f'read image failed: {type(e).__name__}: {str(e)[:200]}',
            )

        try:
            from services.ollama_service import OllamaService

            ollama = OllamaService(model=self.model)
            resp = ollama.generate(
                prompt='請檢查這張 momo 電商 PPT 截圖，找出視覺異常。',
                model=self.model,
                system_prompt=PPT_VISION_SYSTEM_PROMPT,
                temperature=0.2,
                timeout=PPT_VISION_TIMEOUT,
                keep_alive='5m',
                options={'num_predict': 256},
                images=[img_b64],
            )
            duration_ms = int((time.monotonic() - start) * 1000)

            if not resp.success:
                return VisionResult(
                    success=False, duration_ms=duration_ms,
                    error=resp.error or 'ollama vision failed',
                )

            raw = (resp.content or '').strip()

            # 解析輸出：每行一個 ⚠️ 開頭的視為 issue；✅ 無視覺異常則空 list
            issues = []
            for line in raw.split('\n'):
                line = line.strip()
                if line.startswith('⚠️') or line.startswith('warning:') or line.startswith('警告'):
                    issues.append(line)

            if '✅' in raw and '無視覺異常' in raw and not issues:
                # 確認是 OK
                return VisionResult(
                    success=True, issues_found=[],
                    confidence=1.0, raw_response=raw,
                    duration_ms=duration_ms,
                )

            return VisionResult(
                success=True, issues_found=issues,
                confidence=0.85 if issues else 0.5,
                raw_response=raw,
                duration_ms=duration_ms,
            )

        except Exception as e:
            duration_ms = int((time.monotonic() - start) * 1000)
            return VisionResult(
                success=False, duration_ms=duration_ms,
                error=f'{type(e).__name__}: {str(e)[:200]}',
            )


# 全域單例
ppt_vision_service = PPTVisionService()


def audit_recent_ppts(reports_dir: str | None = None, hours: int = 24,
                      max_files: int = 10,
                      filenames: Sequence[str] | None = None) -> Dict[str, Any]:
    """Phase 26 整合 hook — 每日 22:00 cron 跑：掃 reports/ 當天新增 .pptx 跑視覺檢查。

    Args:
        reports_dir: PPT 輸出目錄，未提供時改用 REPORTS_DIR 環境變數
        hours: 掃過去 N 小時內的檔
        max_files: 一次最多查 N 個檔（避免一次跑太久）

    Returns:
        {
            'audited_files': [...],
            'total_issues': int,
            'errors': [...],
        }
    """
    import os
    import time

    summary = {'audited_files': [], 'total_issues': 0, 'errors': []}

    if reports_dir is None:
        reports_dir = os.environ.get('REPORTS_DIR', '/app/data/reports')

    if not is_ppt_vision_enabled():
        summary['errors'].append('PPT_VISION_ENABLED=false')
        return summary

    if not os.path.isdir(reports_dir):
        summary['errors'].append(f'{reports_dir} not found')
        return summary

    requested_names = {
        os.path.basename(str(name))
        for name in (filenames or [])
        if str(name).lower().endswith('.pptx')
    }

    # 掃當天新增 .pptx；若指定 filenames，直接審指定檔，不受 hours 視窗限制。
    cutoff = time.time() - hours * 3600
    pptx_files = []
    for f in os.listdir(reports_dir):
        if not f.lower().endswith('.pptx'):
            continue
        if requested_names and f not in requested_names:
            continue
        full = os.path.join(reports_dir, f)
        try:
            if requested_names or os.path.getmtime(full) >= cutoff:
                pptx_files.append((os.path.getmtime(full), full))
        except OSError:
            continue
    if requested_names:
        found_names = {os.path.basename(path) for _mtime, path in pptx_files}
        for missing in sorted(requested_names - found_names):
            summary['errors'].append(f'{missing}: file not found')
    pptx_files.sort(reverse=True)
    pptx_files = pptx_files[:max_files]

    svc = PPTVisionService()
    for mtime, path in pptx_files:
        try:
            result = svc.check_ppt_file(path)
            entry = {
                'path': path,
                'slides_checked': result.get('slides_checked', 0),
                'issues': result.get('total_issues', 0),
                'issues_by_slide': result.get('issues_by_slide', []),
                'error': result.get('error'),
            }
            summary['audited_files'].append(entry)
            summary['total_issues'] += entry['issues']
            if entry['error']:
                summary['errors'].append(f"{path}: {entry['error']}")
        except Exception as exc:
            summary['errors'].append(f'{path}: {type(exc).__name__}: {str(exc)[:150]}')

    return summary


def start_ppt_vision_audit_background(
    *,
    reports_dir: str | None = None,
    hours: int = 24,
    max_files: int = 10,
    filenames: Sequence[str] | None = None,
) -> Dict[str, Any]:
    """Queue a non-blocking PPT vision audit run for the admin UI."""
    current_run = _load_last_audit_run()
    if _AUDIT_LOCK.locked() or _is_recent_active_audit_run(current_run):
        return {
            'ok': True,
            'status': 'already_running',
            'message': 'PPT vision audit is already running.',
            'last_run': _public_audit_run_payload(current_run),
        }

    clean_filenames = [
        os.path.basename(str(name))
        for name in (filenames or [])
        if str(name).lower().endswith('.pptx')
    ]
    queued_at = _now_label()
    _record_audit_run({
        'ok': True,
        'status': 'queued',
        'queued_at': queued_at,
        'filenames': clean_filenames,
        'max_files': max_files,
    })

    def _run():
        with _AUDIT_LOCK:
            started_at = _now_label()
            _record_audit_run({
                'ok': True,
                'status': 'running',
                'queued_at': queued_at,
                'started_at': started_at,
                'filenames': clean_filenames,
                'max_files': max_files,
            })
            try:
                summary = audit_recent_ppts(
                    reports_dir=reports_dir,
                    hours=hours,
                    max_files=max_files,
                    filenames=clean_filenames or None,
                )
                _record_audit_run({
                    'ok': True,
                    'status': 'completed',
                    'queued_at': queued_at,
                    'started_at': started_at,
                    'finished_at': _now_label(),
                    'filenames': clean_filenames,
                    'max_files': max_files,
                    'summary': summary,
                })
            except Exception as exc:
                _record_audit_run({
                    'ok': False,
                    'status': 'error',
                    'queued_at': queued_at,
                    'started_at': started_at,
                    'finished_at': _now_label(),
                    'filenames': clean_filenames,
                    'max_files': max_files,
                    'error': f'{type(exc).__name__}: {str(exc)[:200]}',
                })
                logger.error("[PPTVision] background audit failed: %s", exc, exc_info=True)

    thread = threading.Thread(target=_run, name='ppt-vision-audit', daemon=True)
    thread.start()
    return {
        'ok': True,
        'status': 'queued',
        'message': 'PPT vision audit queued.',
        'filenames': clean_filenames,
        'max_files': max_files,
    }


def push_ppt_audit_to_telegram(summary: Dict[str, Any]) -> bool:
    """有 issues 才推 Telegram（避免靜默報「無問題」洗版）"""
    if summary['total_issues'] <= 0:
        return False
    try:
        from services.telegram_templates import _send_telegram_raw
    except Exception:
        return False

    lines = [f"🔍 <b>PPT 視覺審核（{len(summary['audited_files'])} 份）</b>"]
    lines.append('━' * 18)
    for entry in summary['audited_files']:
        if entry['issues'] > 0:
            fname = os.path.basename(entry['path']) if hasattr(__import__('os'), 'path') else entry['path']
            import os as _os
            fname = _os.path.basename(entry['path'])
            lines.append(f"\n📊 <code>{fname}</code> ({entry['slides_checked']} slides, "
                          f"<b>{entry['issues']} issues</b>)")
            for slide_num, issues in entry['issues_by_slide'][:3]:  # 每檔最多列 3 張
                for iss in issues[:2]:  # 每張 slide 最多列 2 個 issue
                    lines.append(f"  Slide {slide_num}: {iss[:120]}")

    msg = '\n'.join(lines)
    try:
        _send_telegram_raw(msg)
        return True
    except Exception:
        return False


__all__ = [
    'PPTVisionService',
    'VisionResult',
    'ppt_vision_service',
    'is_ppt_vision_enabled',
    'get_ppt_vision_runtime_status',
    'get_ppt_vision_audit_status',
    'PPT_VISION_SYSTEM_PROMPT',
    'audit_recent_ppts',
    'start_ppt_vision_audit_background',
    'push_ppt_audit_to_telegram',
]