From 1cf1fd01b18c82b0f5d6619aa43befdf6153bc5c Mon Sep 17 00:00:00 2001
From: OoO <ooo@MacBook-Pro.local>
Date: Mon, 18 May 2026 21:32:15 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=20Ollama=20fallback=20?=
 =?UTF-8?q?=E8=88=87=20PPT=20vision=20payload?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 TODO_NEXT_STEPS.txt              |  1 +
 config.py                        |  2 +-
 services/ollama_service.py       | 40 ++++++++++++++++++++++++++++----
 services/ppt_vision_service.py   | 35 +++++++++++++++++++++++-----
 tests/test_ollama_retry_chain.py | 25 ++++++++++++++++++++
 tests/test_ppt_vision_service.py | 28 ++++++++++++++++++++++
 6 files changed, 119 insertions(+), 12 deletions(-)

diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt
index 0b08b5f..d543a5d 100644
--- a/TODO_NEXT_STEPS.txt
+++ b/TODO_NEXT_STEPS.txt
@@ -4,6 +4,7 @@
 ================================================================================
 
 【已完成】
+   - V10.212 修正 PPT 視覺 QA 的 Ollama 三主機 fallback：當 Primary/Secondary request timeout 超過 unhealthy TTL 時，第三輪仍強制打 111 final fallback；PPT 截圖送模型前轉輕量 JPEG、縮小輸出 token，降低單檔審核耗時。
    - V10.211 補 `/observability/ppt_audit_history` 全類型視覺 QA：審核歷史不再限 daily，頁面新增「立即視覺 QA」非阻塞補跑，結果寫入 `ppt_audit_results`；模型失敗時也保留 slide error，避免產線狀態只剩空白。
    - V10.210 補 `/observability/ppt_audit_history` 審核歷史同頁回放：每筆 daily 視覺審核紀錄的動作欄新增「回放」按鈕，沿用 PDF 預覽抽屜並保留下載/開新頁，讓問題追查不必再回檔案表找簡報。
    - V10.208 修正 `/observability/ppt_audit_history` 同頁預覽抽屜 selector：Modal 標題改用獨立 `data-ppt-preview-modal-title`，避免與多個預覽連結的資料屬性衝突。
diff --git a/config.py b/config.py
index 380b6a5..74592a2 100644
--- a/config.py
+++ b/config.py
@@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
 # ==========================================
 # 系統版本與路徑
 # ==========================================
-SYSTEM_VERSION = "V10.211"
+SYSTEM_VERSION = "V10.212"
 LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
 public_url = PUBLIC_URL  # 用於模板顯示
 
diff --git a/services/ollama_service.py b/services/ollama_service.py
index ca2c28b..302e166 100644
--- a/services/ollama_service.py
+++ b/services/ollama_service.py
@@ -90,17 +90,32 @@ def _mark_unhealthy_best_effort(host: str) -> None:
         logger.debug("[OllamaHost] mark_unhealthy failed for host=%s", host, exc_info=True)
 
 
+def _normalize_host(host: str) -> str:
+    return (host or '').rstrip('/')
+
+
+def _canonical_host_chain() -> List[str]:
+    """Return the approved static fallback chain without duplicates."""
+    chain: List[str] = []
+    for host in (OLLAMA_HOST_PRIMARY, OLLAMA_HOST_SECONDARY, OLLAMA_HOST_FALLBACK):
+        clean = _normalize_host(host)
+        if clean and clean not in chain:
+            chain.append(clean)
+    return chain
+
+
 def _is_unhealthy(host: str) -> bool:
     """檢查 host 是否在 unhealthy TTL 內"""
     import time
     if not host:
         return False
-    ts = _unhealthy_marks.get(host.rstrip('/'))
+    clean_host = _normalize_host(host)
+    ts = _unhealthy_marks.get(clean_host)
     if ts is None:
         return False
     if time.time() - ts >= _UNHEALTHY_TTL:
         # TTL 過期，清除
-        _unhealthy_marks.pop(host.rstrip('/'), None)
+        _unhealthy_marks.pop(clean_host, None)
         return False
     return True
 
@@ -323,12 +338,27 @@ class OllamaService:
         # HOTFIX 三主機 retry 鏈
         attempted_hosts: List[str] = []
         last_error: Optional[str] = None
+        canonical_hosts = _canonical_host_chain()
 
         for attempt in range(3):
-            current_host = self.host  # property 每次 lazy resolve
+            current_host = _normalize_host(self.host)  # property 每次 lazy resolve
             if current_host in attempted_hosts:
-                # 已試過同主機（cache 還沒過期），跳出避免無限迴圈
-                break
+                # 已試過同主機時，若是標準三主機鏈且 caller 沒指定 host，
+                # 改走尚未嘗試的下一台。避免 request timeout(60s) 大於
+                # unhealthy TTL(30s) 時第三輪又 resolve 回 primary，導致 111
+                # final fallback 永遠沒被打到。
+                next_host = None
+                if self._explicit_host is None and current_host in canonical_hosts:
+                    next_host = next((host for host in canonical_hosts if host not in attempted_hosts), None)
+                if not next_host:
+                    # 非標準 host 或 explicit host 維持原行為：跳出避免無限迴圈。
+                    break
+                logger.info(
+                    "[Ollama] resolver returned previously attempted host=%s; forcing next fallback host=%s",
+                    current_host,
+                    next_host,
+                )
+                current_host = next_host
             attempted_hosts.append(current_host)
 
             logger.info(f"[Ollama] 嘗試 #{attempt+1}/3 host={current_host} model={model} timeout={request_timeout}s")
diff --git a/services/ppt_vision_service.py b/services/ppt_vision_service.py
index 4105f54..78dde1f 100644
--- a/services/ppt_vision_service.py
+++ b/services/ppt_vision_service.py
@@ -23,6 +23,7 @@ import base64
 import logging
 import shutil
 import threading
+from io import BytesIO
 from dataclasses import dataclass, field
 from typing import Optional, Dict, Any, List, Sequence
 
@@ -32,7 +33,9 @@ logger = logging.getLogger(__name__)
 # Feature flag + 配置
 # ─────────────────────────────────────────────────────────────────────────────
 PPT_VISION_MODEL = os.getenv('PPT_VISION_MODEL', 'minicpm-v:latest')
-PPT_VISION_TIMEOUT = int(os.getenv('PPT_VISION_TIMEOUT', '60'))
+PPT_VISION_TIMEOUT = int(os.getenv('PPT_VISION_TIMEOUT', '45'))
+PPT_VISION_IMAGE_MAX_EDGE = int(os.getenv('PPT_VISION_IMAGE_MAX_EDGE', '1280'))
+PPT_VISION_IMAGE_QUALITY = int(os.getenv('PPT_VISION_IMAGE_QUALITY', '82'))
 _AUDIT_LOCK = threading.Lock()
 _LAST_AUDIT_RUN: Dict[str, Any] | None = None
 
@@ -105,6 +108,27 @@ class PPTVisionService:
     def is_available(self) -> bool:
         return is_ppt_vision_enabled()
 
+    def _encode_image_for_vision(self, image_path: str) -> str:
+        """Compress slide screenshots before sending to Ollama vision."""
+        try:
+            from PIL import Image
+
+            with Image.open(image_path) as im:
+                image = im.convert('RGB')
+                image.thumbnail((PPT_VISION_IMAGE_MAX_EDGE, PPT_VISION_IMAGE_MAX_EDGE))
+                buffer = BytesIO()
+                image.save(
+                    buffer,
+                    format='JPEG',
+                    quality=max(50, min(PPT_VISION_IMAGE_QUALITY, 95)),
+                    optimize=True,
+                )
+                return base64.b64encode(buffer.getvalue()).decode('ascii')
+        except Exception:
+            # Pillow is an optimization, not a hard dependency for the vision path.
+            with open(image_path, 'rb') as f:
+                return base64.b64encode(f.read()).decode('ascii')
+
     def check_ppt_file(self, pptx_path: str, max_slides: int = 5) -> Dict[str, Any]:
         """檢查整份 .pptx — Phase 26 整合到 PPT 生成流程。
 
@@ -330,11 +354,9 @@ class PPTVisionService:
                 error=f'image not found: {image_path}',
             )
 
-        # 讀檔並 base64 編碼
+        # 讀檔並 base64 編碼；可用時先壓縮縮圖，避免 Ollama vision 被大圖拖慢。
         try:
-            with open(image_path, 'rb') as f:
-                img_bytes = f.read()
-            img_b64 = base64.b64encode(img_bytes).decode('ascii')
+            img_b64 = self._encode_image_for_vision(image_path)
         except Exception as e:
             return VisionResult(
                 success=False,
@@ -351,7 +373,8 @@ class PPTVisionService:
                 system_prompt=PPT_VISION_SYSTEM_PROMPT,
                 temperature=0.2,
                 timeout=PPT_VISION_TIMEOUT,
-                options={'num_predict': 512},
+                keep_alive='5m',
+                options={'num_predict': 256},
                 images=[img_b64],
             )
             duration_ms = int((time.monotonic() - start) * 1000)
diff --git a/tests/test_ollama_retry_chain.py b/tests/test_ollama_retry_chain.py
index ee97a9e..740e5b9 100644
--- a/tests/test_ollama_retry_chain.py
+++ b/tests/test_ollama_retry_chain.py
@@ -138,6 +138,31 @@ def test_generate_same_host_breaks_loop():
     assert mock_post.call_count == 1
 
 
+def test_generate_forces_final_fallback_when_unhealthy_ttl_expires_mid_request():
+    """長 timeout 期間 unhealthy TTL 過期時，第三輪仍應打 111 fallback。"""
+    import requests
+    from services import ollama_service as oss
+    from services.ollama_service import OllamaService
+
+    svc = OllamaService()
+    hosts = [
+        oss.OLLAMA_HOST_PRIMARY,
+        oss.OLLAMA_HOST_SECONDARY,
+        oss.OLLAMA_HOST_PRIMARY,  # 模擬 primary unhealthy mark 過期後 resolver 又選回 primary
+    ]
+
+    with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
+         patch('services.ollama_service.requests.post',
+               side_effect=requests.Timeout('all timeout')) as mock_post:
+        resp = svc.generate('test')
+
+    posted_hosts = [call.args[0].split('/api/generate')[0] for call in mock_post.call_args_list]
+    assert resp.success is False
+    assert mock_post.call_count == 3
+    assert posted_hosts == [oss.OLLAMA_HOST_PRIMARY, oss.OLLAMA_HOST_SECONDARY, oss.OLLAMA_HOST_FALLBACK]
+    assert 'all 3 hosts failed' in (resp.error or '')
+
+
 def test_generate_token_parsing_phase13():
     """Phase 13 補強：OllamaResponse 解 prompt_eval_count + eval_count"""
     from services.ollama_service import OllamaService
diff --git a/tests/test_ppt_vision_service.py b/tests/test_ppt_vision_service.py
index e755e5e..a58d07e 100644
--- a/tests/test_ppt_vision_service.py
+++ b/tests/test_ppt_vision_service.py
@@ -77,6 +77,34 @@ def test_no_issues_response(fake_image, monkeypatch):
     assert payload['images'] and isinstance(payload['images'][0], str)
 
 
+def test_check_image_compresses_valid_png_before_ollama(monkeypatch, tmp_path):
+    """有效截圖送 Ollama 前應轉成較輕的 JPEG payload。"""
+    monkeypatch.setenv('PPT_VISION_ENABLED', 'true')
+    import base64
+    Image = pytest.importorskip("PIL.Image")
+    from services.ppt_vision_service import PPTVisionService
+
+    image_path = tmp_path / 'slide.png'
+    Image.new('RGB', (1800, 1000), color=(245, 238, 226)).save(image_path)
+
+    fake_resp = MagicMock(status_code=200)
+    fake_resp.json.return_value = {'response': '✅ 無視覺異常'}
+
+    with patch('services.ollama_service.resolve_ollama_host',
+               return_value='http://test:11434'), \
+         patch('services.ollama_service.requests.post', return_value=fake_resp) as mock_post:
+        svc = PPTVisionService()
+        result = svc.check_image(str(image_path))
+
+    payload = mock_post.call_args.kwargs['json']
+    encoded = base64.b64decode(payload['images'][0])
+    assert result.success is True
+    assert encoded.startswith(b'\xff\xd8')
+    assert len(encoded) < image_path.stat().st_size
+    assert payload['options']['num_predict'] == 256
+    assert payload['keep_alive'] == '5m'
+
+
 def test_issues_detected(fake_image, monkeypatch):
     """minicpm-v 回多個 ⚠️ marker → issues_found 應含解析的問題"""
     monkeypatch.setenv('PPT_VISION_ENABLED', 'true')