Files
ewoooc/services/ppt_preview_service.py
OoO 7e2f1ac671
All checks were successful
CD Pipeline / deploy (push) Successful in 1m5s
標示 PPT 預覽快取狀態
2026-05-18 19:48:14 +08:00

136 lines
4.7 KiB
Python

"""PPT online preview helpers.
Browsers cannot reliably render .pptx files inline, so the app converts a
deck to a cached PDF and embeds that PDF in the observability preview page.
"""
from __future__ import annotations
import hashlib
import os
import shutil
import subprocess
import tempfile
from dataclasses import dataclass
from pathlib import Path
@dataclass(frozen=True)
class PPTPreviewResult:
ok: bool
pdf_path: str | None = None
cache_hit: bool = False
converter: str | None = None
error: str | None = None
@dataclass(frozen=True)
class PPTPreviewCacheInfo:
pdf_path: str | None = None
cache_exists: bool = False
cache_size_kb: float | None = None
cache_mtime_ts: float | None = None
def find_libreoffice_binary() -> str | None:
return shutil.which("libreoffice") or shutil.which("soffice")
def _preview_cache_path(pptx_path: Path, cache_dir: Path) -> Path:
stat = pptx_path.stat()
cache_key = hashlib.sha256(
f"{pptx_path.resolve()}:{stat.st_size}:{int(stat.st_mtime)}".encode("utf-8")
).hexdigest()[:16]
safe_stem = "".join(ch if ch.isalnum() or ch in ("-", "_") else "_" for ch in pptx_path.stem)[:96]
return cache_dir / f"{safe_stem}_{cache_key}.pdf"
def get_ppt_preview_cache_info(
pptx_path: str | os.PathLike[str],
*,
cache_dir: str | os.PathLike[str] | None = None,
) -> PPTPreviewCacheInfo:
"""只讀取預期 PDF 快取狀態,不啟動 LibreOffice 轉檔。"""
source = Path(pptx_path)
if not source.is_file() or source.suffix.lower() != ".pptx":
return PPTPreviewCacheInfo()
target_dir = Path(cache_dir or os.getenv("PPT_PREVIEW_CACHE_DIR", "/app/data/ppt_previews"))
target_pdf = _preview_cache_path(source, target_dir)
if not target_pdf.is_file() or target_pdf.stat().st_size <= 0:
return PPTPreviewCacheInfo(pdf_path=str(target_pdf), cache_exists=False)
stat = target_pdf.stat()
return PPTPreviewCacheInfo(
pdf_path=str(target_pdf),
cache_exists=True,
cache_size_kb=round(stat.st_size / 1024, 1),
cache_mtime_ts=stat.st_mtime,
)
def build_ppt_preview(
pptx_path: str | os.PathLike[str],
*,
cache_dir: str | os.PathLike[str] | None = None,
timeout_sec: int = 90,
) -> PPTPreviewResult:
source = Path(pptx_path)
if not source.is_file():
return PPTPreviewResult(ok=False, error="pptx not found")
if source.suffix.lower() != ".pptx":
return PPTPreviewResult(ok=False, error="unsupported file type")
converter = find_libreoffice_binary()
if not converter:
return PPTPreviewResult(
ok=False,
error="LibreOffice is not installed in the app container.",
)
target_dir = Path(cache_dir or os.getenv("PPT_PREVIEW_CACHE_DIR", "/app/data/ppt_previews"))
target_dir.mkdir(parents=True, exist_ok=True)
target_pdf = _preview_cache_path(source, target_dir)
if target_pdf.is_file() and target_pdf.stat().st_size > 0:
return PPTPreviewResult(ok=True, pdf_path=str(target_pdf), cache_hit=True, converter=converter)
with tempfile.TemporaryDirectory(prefix="ppt_preview_") as tmp:
tmpdir = Path(tmp)
try:
proc = subprocess.run(
[
converter,
"--headless",
"--convert-to",
"pdf",
"--outdir",
str(tmpdir),
str(source),
],
capture_output=True,
timeout=timeout_sec,
check=False,
)
except subprocess.TimeoutExpired:
return PPTPreviewResult(ok=False, converter=converter, error="LibreOffice conversion timed out.")
except Exception as exc:
return PPTPreviewResult(ok=False, converter=converter, error=f"{type(exc).__name__}: {str(exc)[:200]}")
if proc.returncode != 0:
stderr = proc.stderr.decode("utf-8", errors="ignore").strip()
return PPTPreviewResult(
ok=False,
converter=converter,
error=f"LibreOffice conversion failed: {stderr[:240] or proc.returncode}",
)
generated = tmpdir / f"{source.stem}.pdf"
if not generated.is_file():
candidates = sorted(tmpdir.glob("*.pdf"))
generated = candidates[0] if candidates else generated
if not generated.is_file() or generated.stat().st_size <= 0:
return PPTPreviewResult(ok=False, converter=converter, error="LibreOffice did not produce a PDF.")
shutil.move(str(generated), str(target_pdf))
return PPTPreviewResult(ok=True, pdf_path=str(target_pdf), cache_hit=False, converter=converter)