ewoooc/services/ppt_vision_service.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
services/ppt_vision_service.py
Operation Ollama-First v5.0 / Phase 14 — PPT 視覺自審

設計原則：
- 用 minicpm-v（GCP Primary 已拉，5.5GB）對 PPT 截圖做品質檢查
- 替代 qwen2-vl:7b（Ollama registry 暫無）
- 用途：PPT 生成後自動跑視覺檢查，找：
    1. 圖表 layout 異常（被切掉、重疊）
    2. 文字溢出框
    3. 空白區塊（資料未填滿）
    4. 配色衝突
- feature flag PPT_VISION_ENABLED 預設 OFF
- 失敗自動 skip（不阻擋 PPT 生成主流程）
"""

from __future__ import annotations
import os
import time
import base64
import logging
from dataclasses import dataclass, field
from typing import Optional, Dict, Any, List

import requests

logger = logging.getLogger(__name__)

# ─────────────────────────────────────────────────────────────────────────────
# Feature flag + 配置
# ─────────────────────────────────────────────────────────────────────────────
PPT_VISION_MODEL = os.getenv('PPT_VISION_MODEL', 'minicpm-v:latest')
PPT_VISION_TIMEOUT = int(os.getenv('PPT_VISION_TIMEOUT', '60'))


def is_ppt_vision_enabled() -> bool:
    """Runtime check（避免 import-time freeze）"""
    return os.getenv('PPT_VISION_ENABLED', 'false').strip().lower() in ('true', '1', 'yes', 'on')


# ─────────────────────────────────────────────────────────────────────────────
# 結果容器
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class VisionResult:
    success: bool
    issues_found: List[str] = field(default_factory=list)  # 問題清單
    confidence: float = 0.0                                 # 0-1，模型自評
    raw_response: str = ''
    duration_ms: int = 0
    error: Optional[str] = None


# ─────────────────────────────────────────────────────────────────────────────
# Vision 檢查 prompt（繁中強制）
# ─────────────────────────────────────────────────────────────────────────────
PPT_VISION_SYSTEM_PROMPT = """你是 momo 電商 PPT 排版品質審核員。

【任務】檢查截圖找出視覺異常，回繁中清單格式：
- 圖表被切掉 / 元素重疊 / 文字溢出框 / 空白區塊（資料未填滿）/ 配色衝突
- 商品名稱顯示不完整 / 數字單位錯誤 / 標題遮擋

【輸出格式】
若無問題：回「✅ 無視覺異常」
若有問題：每行一個問題，格式「⚠️ <問題類型>：<具體描述>」

【限制】
- 只檢查視覺，不評估內容對錯
- 用繁體中文（台灣用語），絕對禁止簡體字
- 不要寫過多解釋，每個問題一行精簡描述
"""


class PPTVisionService:
    """minicpm-v 視覺檢查服務."""

    def __init__(self, model: str = PPT_VISION_MODEL):
        self.model = model

    def is_available(self) -> bool:
        return is_ppt_vision_enabled()

    def check_ppt_file(self, pptx_path: str, max_slides: int = 5) -> Dict[str, Any]:
        """檢查整份 .pptx — Phase 26 整合到 PPT 生成流程。

        流程：
          1. LibreOffice headless 轉 png（每張 slide 一張）
          2. 對前 N 張跑 check_image
          3. 彙總 issues + 平均 confidence
          4. fail-safe：LibreOffice 不在 / 轉檔失敗 → 回 skip 不阻擋主流程

        Returns:
            {
                'success': bool,
                'slides_checked': int,
                'total_issues': int,
                'issues_by_slide': [(slide_num, [issues...]), ...],
                'error': str | None,
            }
        """
        import os
        import subprocess
        import tempfile

        result = {
            'success': False, 'slides_checked': 0, 'total_issues': 0,
            'issues_by_slide': [], 'error': None,
        }

        if not self.is_available():
            result['error'] = 'PPT_VISION_ENABLED=false'
            return result

        if not os.path.isfile(pptx_path):
            result['error'] = f'pptx not found: {pptx_path}'
            return result

        # 1. LibreOffice 轉 png
        with tempfile.TemporaryDirectory() as tmpdir:
            try:
                proc = subprocess.run(
                    ['libreoffice', '--headless', '--convert-to', 'png',
                     '--outdir', tmpdir, pptx_path],
                    capture_output=True, timeout=60,
                )
                if proc.returncode != 0:
                    result['error'] = f'libreoffice convert failed: {proc.stderr.decode()[:200]}'
                    return result
            except FileNotFoundError:
                result['error'] = 'libreoffice not installed (skip vision check)'
                return result
            except subprocess.TimeoutExpired:
                result['error'] = 'libreoffice convert timeout (60s)'
                return result
            except Exception as e:
                result['error'] = f'{type(e).__name__}: {str(e)[:200]}'
                return result

            # LibreOffice 對 .pptx 預設只輸出第一頁；多頁需 --convert-to png:impress_png_Export
            png_files = sorted([
                os.path.join(tmpdir, f) for f in os.listdir(tmpdir)
                if f.lower().endswith('.png')
            ])

            if not png_files:
                result['error'] = 'libreoffice 未產出 png (可能需要 --convert-to png:impress_png_Export)'
                return result

            # 2. 對前 N 張跑 check_image
            for idx, png in enumerate(png_files[:max_slides]):
                try:
                    vr = self.check_image(png)
                    if vr.success:
                        result['slides_checked'] += 1
                        if vr.issues_found:
                            result['total_issues'] += len(vr.issues_found)
                            result['issues_by_slide'].append((idx + 1, vr.issues_found))
                except Exception as exc:
                    logger.warning(f"[PPTVision] slide {idx+1} check failed: {exc}")

            result['success'] = result['slides_checked'] > 0
            return result

    def check_image(self, image_path: str) -> VisionResult:
        """檢查單張 PPT 截圖。

        Args:
            image_path: 本地檔案路徑（jpg/png）

        Returns:
            VisionResult.issues_found 含問題清單；無問題則空 list + confidence=1.0
        """
        start = time.monotonic()

        if not self.is_available():
            return VisionResult(
                success=False,
                error='PPT_VISION_ENABLED=false (Phase 14 預設 OFF)',
            )

        if not os.path.isfile(image_path):
            return VisionResult(
                success=False,
                error=f'image not found: {image_path}',
            )

        # 讀檔並 base64 編碼
        try:
            with open(image_path, 'rb') as f:
                img_bytes = f.read()
            img_b64 = base64.b64encode(img_bytes).decode('ascii')
        except Exception as e:
            return VisionResult(
                success=False,
                error=f'read image failed: {type(e).__name__}: {str(e)[:200]}',
            )

        # 透過 resolve_ollama_host 取主機（享受三主機 retry 鏈）
        try:
            from services.ollama_service import resolve_ollama_host, mark_unhealthy
            host = resolve_ollama_host()
        except Exception as e:
            return VisionResult(
                success=False,
                error=f'resolve host failed: {e}',
            )

        # Ollama /api/generate 支援 images 欄位（base64 list）
        payload = {
            'model': self.model,
            'system': PPT_VISION_SYSTEM_PROMPT,
            'prompt': '請檢查這張 momo 電商 PPT 截圖，找出視覺異常。',
            'images': [img_b64],
            'stream': False,
            'options': {'temperature': 0.2, 'num_predict': 512},
        }

        try:
            resp = requests.post(
                f"{host.rstrip('/')}/api/generate",
                json=payload,
                timeout=PPT_VISION_TIMEOUT,
            )
            duration_ms = int((time.monotonic() - start) * 1000)

            if resp.status_code != 200:
                # mark_unhealthy 讓下次自動切其他主機
                mark_unhealthy(host)
                return VisionResult(
                    success=False, duration_ms=duration_ms,
                    error=f'HTTP {resp.status_code}: {resp.text[:200]}',
                )

            data = resp.json()
            raw = (data.get('response') or '').strip()

            # 解析輸出：每行一個 ⚠️ 開頭的視為 issue；✅ 無視覺異常則空 list
            issues = []
            for line in raw.split('\n'):
                line = line.strip()
                if line.startswith('⚠️') or line.startswith('warning:') or line.startswith('警告'):
                    issues.append(line)

            if '✅' in raw and '無視覺異常' in raw and not issues:
                # 確認是 OK
                return VisionResult(
                    success=True, issues_found=[],
                    confidence=1.0, raw_response=raw,
                    duration_ms=duration_ms,
                )

            return VisionResult(
                success=True, issues_found=issues,
                confidence=0.85 if issues else 0.5,
                raw_response=raw,
                duration_ms=duration_ms,
            )

        except requests.Timeout:
            try:
                mark_unhealthy(host)
            except Exception:
                pass
            duration_ms = int((time.monotonic() - start) * 1000)
            return VisionResult(
                success=False, duration_ms=duration_ms,
                error=f'timeout ({PPT_VISION_TIMEOUT}s)',
            )
        except Exception as e:
            try:
                mark_unhealthy(host)
            except Exception:
                pass
            duration_ms = int((time.monotonic() - start) * 1000)
            return VisionResult(
                success=False, duration_ms=duration_ms,
                error=f'{type(e).__name__}: {str(e)[:200]}',
            )


# 全域單例
ppt_vision_service = PPTVisionService()


def audit_recent_ppts(reports_dir: str = 'reports', hours: int = 24,
                      max_files: int = 10) -> Dict[str, Any]:
    """Phase 26 整合 hook — 每日 22:00 cron 跑：掃 reports/ 當天新增 .pptx 跑視覺檢查。

    Args:
        reports_dir: PPT 輸出目錄
        hours: 掃過去 N 小時內的檔
        max_files: 一次最多查 N 個檔（避免一次跑太久）

    Returns:
        {
            'audited_files': [...],
            'total_issues': int,
            'errors': [...],
        }
    """
    import os
    import time

    summary = {'audited_files': [], 'total_issues': 0, 'errors': []}

    if not is_ppt_vision_enabled():
        summary['errors'].append('PPT_VISION_ENABLED=false')
        return summary

    if not os.path.isdir(reports_dir):
        summary['errors'].append(f'{reports_dir} not found')
        return summary

    # 掃當天新增 .pptx
    cutoff = time.time() - hours * 3600
    pptx_files = []
    for f in os.listdir(reports_dir):
        if not f.lower().endswith('.pptx'):
            continue
        full = os.path.join(reports_dir, f)
        try:
            if os.path.getmtime(full) >= cutoff:
                pptx_files.append((os.path.getmtime(full), full))
        except OSError:
            continue
    pptx_files.sort(reverse=True)
    pptx_files = pptx_files[:max_files]

    svc = PPTVisionService()
    for mtime, path in pptx_files:
        try:
            result = svc.check_ppt_file(path)
            entry = {
                'path': path,
                'slides_checked': result.get('slides_checked', 0),
                'issues': result.get('total_issues', 0),
                'issues_by_slide': result.get('issues_by_slide', []),
                'error': result.get('error'),
            }
            summary['audited_files'].append(entry)
            summary['total_issues'] += entry['issues']
            if entry['error']:
                summary['errors'].append(f"{path}: {entry['error']}")
        except Exception as exc:
            summary['errors'].append(f'{path}: {type(exc).__name__}: {str(exc)[:150]}')

    return summary


def push_ppt_audit_to_telegram(summary: Dict[str, Any]) -> bool:
    """有 issues 才推 Telegram（避免靜默報「無問題」洗版）"""
    if summary['total_issues'] <= 0:
        return False
    try:
        from services.telegram_templates import _send_telegram_raw
    except Exception:
        return False

    lines = [f"🔍 <b>PPT 視覺審核（{len(summary['audited_files'])} 份）</b>"]
    lines.append('━' * 18)
    for entry in summary['audited_files']:
        if entry['issues'] > 0:
            fname = os.path.basename(entry['path']) if hasattr(__import__('os'), 'path') else entry['path']
            import os as _os
            fname = _os.path.basename(entry['path'])
            lines.append(f"\n📊 <code>{fname}</code> ({entry['slides_checked']} slides, "
                          f"<b>{entry['issues']} issues</b>)")
            for slide_num, issues in entry['issues_by_slide'][:3]:  # 每檔最多列 3 張
                for iss in issues[:2]:  # 每張 slide 最多列 2 個 issue
                    lines.append(f"  Slide {slide_num}: {iss[:120]}")

    msg = '\n'.join(lines)
    try:
        _send_telegram_raw(msg)
        return True
    except Exception:
        return False


__all__ = [
    'PPTVisionService',
    'VisionResult',
    'ppt_vision_service',
    'is_ppt_vision_enabled',
    'PPT_VISION_SYSTEM_PROMPT',
    'audit_recent_ppts',
    'push_ppt_audit_to_telegram',
]