修正 Ollama fallback 與 PPT vision payload

2026-05-18 21:32:15 +08:00
parent aa8c2c7148
commit 1cf1fd01b1
6 changed files with 119 additions and 12 deletions
--- a/TODO_NEXT_STEPS.txt
+++ b/TODO_NEXT_STEPS.txt
@@ -4,6 +4,7 @@
 ================================================================================

 【已完成】
+   - V10.212 修正 PPT 視覺 QA 的 Ollama 三主機 fallback：當 Primary/Secondary request timeout 超過 unhealthy TTL 時，第三輪仍強制打 111 final fallback；PPT 截圖送模型前轉輕量 JPEG、縮小輸出 token，降低單檔審核耗時。
   - V10.211 補 `/observability/ppt_audit_history` 全類型視覺 QA：審核歷史不再限 daily，頁面新增「立即視覺 QA」非阻塞補跑，結果寫入 `ppt_audit_results`；模型失敗時也保留 slide error，避免產線狀態只剩空白。
   - V10.210 補 `/observability/ppt_audit_history` 審核歷史同頁回放：每筆 daily 視覺審核紀錄的動作欄新增「回放」按鈕，沿用 PDF 預覽抽屜並保留下載/開新頁，讓問題追查不必再回檔案表找簡報。
   - V10.208 修正 `/observability/ppt_audit_history` 同頁預覽抽屜 selector：Modal 標題改用獨立 `data-ppt-preview-modal-title`，避免與多個預覽連結的資料屬性衝突。
--- a/config.py
+++ b/config.py
@@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
 # ==========================================
 # 系統版本與路徑
 # ==========================================
-SYSTEM_VERSION = "V10.211"
+SYSTEM_VERSION = "V10.212"
 LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
 public_url = PUBLIC_URL  # 用於模板顯示

--- a/services/ollama_service.py
+++ b/services/ollama_service.py
@@ -90,17 +90,32 @@ def _mark_unhealthy_best_effort(host: str) -> None:
        logger.debug("[OllamaHost] mark_unhealthy failed for host=%s", host, exc_info=True)


+def _normalize_host(host: str) -> str:
+    return (host or '').rstrip('/')
+
+
+def _canonical_host_chain() -> List[str]:
+    """Return the approved static fallback chain without duplicates."""
+    chain: List[str] = []
+    for host in (OLLAMA_HOST_PRIMARY, OLLAMA_HOST_SECONDARY, OLLAMA_HOST_FALLBACK):
+        clean = _normalize_host(host)
+        if clean and clean not in chain:
+            chain.append(clean)
+    return chain
+
+
 def _is_unhealthy(host: str) -> bool:
    """檢查 host 是否在 unhealthy TTL 內"""
    import time
    if not host:
        return False
-    ts = _unhealthy_marks.get(host.rstrip('/'))
+    clean_host = _normalize_host(host)
+    ts = _unhealthy_marks.get(clean_host)
    if ts is None:
        return False
    if time.time() - ts >= _UNHEALTHY_TTL:
        # TTL 過期，清除
-        _unhealthy_marks.pop(host.rstrip('/'), None)
+        _unhealthy_marks.pop(clean_host, None)
        return False
    return True

@@ -323,12 +338,27 @@ class OllamaService:
        # HOTFIX 三主機 retry 鏈
        attempted_hosts: List[str] = []
        last_error: Optional[str] = None
+        canonical_hosts = _canonical_host_chain()

        for attempt in range(3):
-            current_host = self.host  # property 每次 lazy resolve
+            current_host = _normalize_host(self.host)  # property 每次 lazy resolve
            if current_host in attempted_hosts:
-                # 已試過同主機（cache 還沒過期），跳出避免無限迴圈
-                break
+                # 已試過同主機時，若是標準三主機鏈且 caller 沒指定 host，
+                # 改走尚未嘗試的下一台。避免 request timeout(60s) 大於
+                # unhealthy TTL(30s) 時第三輪又 resolve 回 primary，導致 111
+                # final fallback 永遠沒被打到。
+                next_host = None
+                if self._explicit_host is None and current_host in canonical_hosts:
+                    next_host = next((host for host in canonical_hosts if host not in attempted_hosts), None)
+                if not next_host:
+                    # 非標準 host 或 explicit host 維持原行為：跳出避免無限迴圈。
+                    break
+                logger.info(
+                    "[Ollama] resolver returned previously attempted host=%s; forcing next fallback host=%s",
+                    current_host,
+                    next_host,
+                )
+                current_host = next_host
            attempted_hosts.append(current_host)

            logger.info(f"[Ollama] 嘗試 #{attempt+1}/3 host={current_host} model={model} timeout={request_timeout}s")
--- a/services/ppt_vision_service.py
+++ b/services/ppt_vision_service.py
@@ -23,6 +23,7 @@ import base64
 import logging
 import shutil
 import threading
+from io import BytesIO
 from dataclasses import dataclass, field
 from typing import Optional, Dict, Any, List, Sequence

@@ -32,7 +33,9 @@ logger = logging.getLogger(__name__)
 # Feature flag + 配置
 # ─────────────────────────────────────────────────────────────────────────────
 PPT_VISION_MODEL = os.getenv('PPT_VISION_MODEL', 'minicpm-v:latest')
-PPT_VISION_TIMEOUT = int(os.getenv('PPT_VISION_TIMEOUT', '60'))
+PPT_VISION_TIMEOUT = int(os.getenv('PPT_VISION_TIMEOUT', '45'))
+PPT_VISION_IMAGE_MAX_EDGE = int(os.getenv('PPT_VISION_IMAGE_MAX_EDGE', '1280'))
+PPT_VISION_IMAGE_QUALITY = int(os.getenv('PPT_VISION_IMAGE_QUALITY', '82'))
 _AUDIT_LOCK = threading.Lock()
 _LAST_AUDIT_RUN: Dict[str, Any] | None = None

@@ -105,6 +108,27 @@ class PPTVisionService:
    def is_available(self) -> bool:
        return is_ppt_vision_enabled()

+    def _encode_image_for_vision(self, image_path: str) -> str:
+        """Compress slide screenshots before sending to Ollama vision."""
+        try:
+            from PIL import Image
+
+            with Image.open(image_path) as im:
+                image = im.convert('RGB')
+                image.thumbnail((PPT_VISION_IMAGE_MAX_EDGE, PPT_VISION_IMAGE_MAX_EDGE))
+                buffer = BytesIO()
+                image.save(
+                    buffer,
+                    format='JPEG',
+                    quality=max(50, min(PPT_VISION_IMAGE_QUALITY, 95)),
+                    optimize=True,
+                )
+                return base64.b64encode(buffer.getvalue()).decode('ascii')
+        except Exception:
+            # Pillow is an optimization, not a hard dependency for the vision path.
+            with open(image_path, 'rb') as f:
+                return base64.b64encode(f.read()).decode('ascii')
+
    def check_ppt_file(self, pptx_path: str, max_slides: int = 5) -> Dict[str, Any]:
        """檢查整份 .pptx — Phase 26 整合到 PPT 生成流程。

@@ -330,11 +354,9 @@ class PPTVisionService:
                error=f'image not found: {image_path}',
            )

-        # 讀檔並 base64 編碼
+        # 讀檔並 base64 編碼；可用時先壓縮縮圖，避免 Ollama vision 被大圖拖慢。
        try:
-            with open(image_path, 'rb') as f:
-                img_bytes = f.read()
-            img_b64 = base64.b64encode(img_bytes).decode('ascii')
+            img_b64 = self._encode_image_for_vision(image_path)
        except Exception as e:
            return VisionResult(
                success=False,
@@ -351,7 +373,8 @@ class PPTVisionService:
                system_prompt=PPT_VISION_SYSTEM_PROMPT,
                temperature=0.2,
                timeout=PPT_VISION_TIMEOUT,
-                options={'num_predict': 512},
+                keep_alive='5m',
+                options={'num_predict': 256},
                images=[img_b64],
            )
            duration_ms = int((time.monotonic() - start) * 1000)
--- a/tests/test_ollama_retry_chain.py
+++ b/tests/test_ollama_retry_chain.py
@@ -138,6 +138,31 @@ def test_generate_same_host_breaks_loop():
    assert mock_post.call_count == 1


+def test_generate_forces_final_fallback_when_unhealthy_ttl_expires_mid_request():
+    """長 timeout 期間 unhealthy TTL 過期時，第三輪仍應打 111 fallback。"""
+    import requests
+    from services import ollama_service as oss
+    from services.ollama_service import OllamaService
+
+    svc = OllamaService()
+    hosts = [
+        oss.OLLAMA_HOST_PRIMARY,
+        oss.OLLAMA_HOST_SECONDARY,
+        oss.OLLAMA_HOST_PRIMARY,  # 模擬 primary unhealthy mark 過期後 resolver 又選回 primary
+    ]
+
+    with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
+         patch('services.ollama_service.requests.post',
+               side_effect=requests.Timeout('all timeout')) as mock_post:
+        resp = svc.generate('test')
+
+    posted_hosts = [call.args[0].split('/api/generate')[0] for call in mock_post.call_args_list]
+    assert resp.success is False
+    assert mock_post.call_count == 3
+    assert posted_hosts == [oss.OLLAMA_HOST_PRIMARY, oss.OLLAMA_HOST_SECONDARY, oss.OLLAMA_HOST_FALLBACK]
+    assert 'all 3 hosts failed' in (resp.error or '')
+
+
 def test_generate_token_parsing_phase13():
    """Phase 13 補強：OllamaResponse 解 prompt_eval_count + eval_count"""
    from services.ollama_service import OllamaService
--- a/tests/test_ppt_vision_service.py
+++ b/tests/test_ppt_vision_service.py
@@ -77,6 +77,34 @@ def test_no_issues_response(fake_image, monkeypatch):
    assert payload['images'] and isinstance(payload['images'][0], str)


+def test_check_image_compresses_valid_png_before_ollama(monkeypatch, tmp_path):
+    """有效截圖送 Ollama 前應轉成較輕的 JPEG payload。"""
+    monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
+    import base64
+    Image = pytest.importorskip("PIL.Image")
+    from services.ppt_vision_service import PPTVisionService
+
+    image_path = tmp_path / 'slide.png'
+    Image.new('RGB', (1800, 1000), color=(245, 238, 226)).save(image_path)
+
+    fake_resp = MagicMock(status_code=200)
+    fake_resp.json.return_value = {'response': '✅ 無視覺異常'}
+
+    with patch('services.ollama_service.resolve_ollama_host',
+               return_value='http://test:11434'), \
+         patch('services.ollama_service.requests.post', return_value=fake_resp) as mock_post:
+        svc = PPTVisionService()
+        result = svc.check_image(str(image_path))
+
+    payload = mock_post.call_args.kwargs['json']
+    encoded = base64.b64decode(payload['images'][0])
+    assert result.success is True
+    assert encoded.startswith(b'\xff\xd8')
+    assert len(encoded) < image_path.stat().st_size
+    assert payload['options']['num_predict'] == 256
+    assert payload['keep_alive'] == '5m'
+
+
 def test_issues_detected(fake_image, monkeypatch):
    """minicpm-v 回多個 ⚠️ marker → issues_found 應含解析的問題"""
    monkeypatch.setenv('PPT_VISION_ENABLED', 'true')