From 1cf1fd01b18c82b0f5d6619aa43befdf6153bc5c Mon Sep 17 00:00:00 2001 From: OoO Date: Mon, 18 May 2026 21:32:15 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=20Ollama=20fallback=20?= =?UTF-8?q?=E8=88=87=20PPT=20vision=20payload?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TODO_NEXT_STEPS.txt | 1 + config.py | 2 +- services/ollama_service.py | 40 ++++++++++++++++++++++++++++---- services/ppt_vision_service.py | 35 +++++++++++++++++++++++----- tests/test_ollama_retry_chain.py | 25 ++++++++++++++++++++ tests/test_ppt_vision_service.py | 28 ++++++++++++++++++++++ 6 files changed, 119 insertions(+), 12 deletions(-) diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index 0b08b5f..d543a5d 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.212 修正 PPT 視覺 QA 的 Ollama 三主機 fallback:當 Primary/Secondary request timeout 超過 unhealthy TTL 時,第三輪仍強制打 111 final fallback;PPT 截圖送模型前轉輕量 JPEG、縮小輸出 token,降低單檔審核耗時。 - V10.211 補 `/observability/ppt_audit_history` 全類型視覺 QA:審核歷史不再限 daily,頁面新增「立即視覺 QA」非阻塞補跑,結果寫入 `ppt_audit_results`;模型失敗時也保留 slide error,避免產線狀態只剩空白。 - V10.210 補 `/observability/ppt_audit_history` 審核歷史同頁回放:每筆 daily 視覺審核紀錄的動作欄新增「回放」按鈕,沿用 PDF 預覽抽屜並保留下載/開新頁,讓問題追查不必再回檔案表找簡報。 - V10.208 修正 `/observability/ppt_audit_history` 同頁預覽抽屜 selector:Modal 標題改用獨立 `data-ppt-preview-modal-title`,避免與多個預覽連結的資料屬性衝突。 diff --git a/config.py b/config.py index 380b6a5..74592a2 100644 --- a/config.py +++ b/config.py @@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.211" +SYSTEM_VERSION = "V10.212" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/services/ollama_service.py b/services/ollama_service.py index ca2c28b..302e166 100644 --- a/services/ollama_service.py +++ b/services/ollama_service.py @@ -90,17 +90,32 @@ def _mark_unhealthy_best_effort(host: str) -> None: logger.debug("[OllamaHost] mark_unhealthy failed for host=%s", host, exc_info=True) +def _normalize_host(host: str) -> str: + return (host or '').rstrip('/') + + +def _canonical_host_chain() -> List[str]: + """Return the approved static fallback chain without duplicates.""" + chain: List[str] = [] + for host in (OLLAMA_HOST_PRIMARY, OLLAMA_HOST_SECONDARY, OLLAMA_HOST_FALLBACK): + clean = _normalize_host(host) + if clean and clean not in chain: + chain.append(clean) + return chain + + def _is_unhealthy(host: str) -> bool: """檢查 host 是否在 unhealthy TTL 內""" import time if not host: return False - ts = _unhealthy_marks.get(host.rstrip('/')) + clean_host = _normalize_host(host) + ts = _unhealthy_marks.get(clean_host) if ts is None: return False if time.time() - ts >= _UNHEALTHY_TTL: # TTL 過期,清除 - _unhealthy_marks.pop(host.rstrip('/'), None) + _unhealthy_marks.pop(clean_host, None) return False return True @@ -323,12 +338,27 @@ class OllamaService: # HOTFIX 三主機 retry 鏈 attempted_hosts: List[str] = [] last_error: Optional[str] = None + canonical_hosts = _canonical_host_chain() for attempt in range(3): - current_host = self.host # property 每次 lazy resolve + current_host = _normalize_host(self.host) # property 每次 lazy resolve if current_host in attempted_hosts: - # 已試過同主機(cache 還沒過期),跳出避免無限迴圈 - break + # 已試過同主機時,若是標準三主機鏈且 caller 沒指定 host, + # 改走尚未嘗試的下一台。避免 request timeout(60s) 大於 + # unhealthy TTL(30s) 時第三輪又 resolve 回 primary,導致 111 + # final fallback 永遠沒被打到。 + next_host = None + if self._explicit_host is None and current_host in canonical_hosts: + next_host = next((host for host in canonical_hosts if host not in attempted_hosts), None) + if not next_host: + # 非標準 host 或 explicit host 維持原行為:跳出避免無限迴圈。 + break + logger.info( + "[Ollama] resolver returned previously attempted host=%s; forcing next fallback host=%s", + current_host, + next_host, + ) + current_host = next_host attempted_hosts.append(current_host) logger.info(f"[Ollama] 嘗試 #{attempt+1}/3 host={current_host} model={model} timeout={request_timeout}s") diff --git a/services/ppt_vision_service.py b/services/ppt_vision_service.py index 4105f54..78dde1f 100644 --- a/services/ppt_vision_service.py +++ b/services/ppt_vision_service.py @@ -23,6 +23,7 @@ import base64 import logging import shutil import threading +from io import BytesIO from dataclasses import dataclass, field from typing import Optional, Dict, Any, List, Sequence @@ -32,7 +33,9 @@ logger = logging.getLogger(__name__) # Feature flag + 配置 # ───────────────────────────────────────────────────────────────────────────── PPT_VISION_MODEL = os.getenv('PPT_VISION_MODEL', 'minicpm-v:latest') -PPT_VISION_TIMEOUT = int(os.getenv('PPT_VISION_TIMEOUT', '60')) +PPT_VISION_TIMEOUT = int(os.getenv('PPT_VISION_TIMEOUT', '45')) +PPT_VISION_IMAGE_MAX_EDGE = int(os.getenv('PPT_VISION_IMAGE_MAX_EDGE', '1280')) +PPT_VISION_IMAGE_QUALITY = int(os.getenv('PPT_VISION_IMAGE_QUALITY', '82')) _AUDIT_LOCK = threading.Lock() _LAST_AUDIT_RUN: Dict[str, Any] | None = None @@ -105,6 +108,27 @@ class PPTVisionService: def is_available(self) -> bool: return is_ppt_vision_enabled() + def _encode_image_for_vision(self, image_path: str) -> str: + """Compress slide screenshots before sending to Ollama vision.""" + try: + from PIL import Image + + with Image.open(image_path) as im: + image = im.convert('RGB') + image.thumbnail((PPT_VISION_IMAGE_MAX_EDGE, PPT_VISION_IMAGE_MAX_EDGE)) + buffer = BytesIO() + image.save( + buffer, + format='JPEG', + quality=max(50, min(PPT_VISION_IMAGE_QUALITY, 95)), + optimize=True, + ) + return base64.b64encode(buffer.getvalue()).decode('ascii') + except Exception: + # Pillow is an optimization, not a hard dependency for the vision path. + with open(image_path, 'rb') as f: + return base64.b64encode(f.read()).decode('ascii') + def check_ppt_file(self, pptx_path: str, max_slides: int = 5) -> Dict[str, Any]: """檢查整份 .pptx — Phase 26 整合到 PPT 生成流程。 @@ -330,11 +354,9 @@ class PPTVisionService: error=f'image not found: {image_path}', ) - # 讀檔並 base64 編碼 + # 讀檔並 base64 編碼;可用時先壓縮縮圖,避免 Ollama vision 被大圖拖慢。 try: - with open(image_path, 'rb') as f: - img_bytes = f.read() - img_b64 = base64.b64encode(img_bytes).decode('ascii') + img_b64 = self._encode_image_for_vision(image_path) except Exception as e: return VisionResult( success=False, @@ -351,7 +373,8 @@ class PPTVisionService: system_prompt=PPT_VISION_SYSTEM_PROMPT, temperature=0.2, timeout=PPT_VISION_TIMEOUT, - options={'num_predict': 512}, + keep_alive='5m', + options={'num_predict': 256}, images=[img_b64], ) duration_ms = int((time.monotonic() - start) * 1000) diff --git a/tests/test_ollama_retry_chain.py b/tests/test_ollama_retry_chain.py index ee97a9e..740e5b9 100644 --- a/tests/test_ollama_retry_chain.py +++ b/tests/test_ollama_retry_chain.py @@ -138,6 +138,31 @@ def test_generate_same_host_breaks_loop(): assert mock_post.call_count == 1 +def test_generate_forces_final_fallback_when_unhealthy_ttl_expires_mid_request(): + """長 timeout 期間 unhealthy TTL 過期時,第三輪仍應打 111 fallback。""" + import requests + from services import ollama_service as oss + from services.ollama_service import OllamaService + + svc = OllamaService() + hosts = [ + oss.OLLAMA_HOST_PRIMARY, + oss.OLLAMA_HOST_SECONDARY, + oss.OLLAMA_HOST_PRIMARY, # 模擬 primary unhealthy mark 過期後 resolver 又選回 primary + ] + + with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \ + patch('services.ollama_service.requests.post', + side_effect=requests.Timeout('all timeout')) as mock_post: + resp = svc.generate('test') + + posted_hosts = [call.args[0].split('/api/generate')[0] for call in mock_post.call_args_list] + assert resp.success is False + assert mock_post.call_count == 3 + assert posted_hosts == [oss.OLLAMA_HOST_PRIMARY, oss.OLLAMA_HOST_SECONDARY, oss.OLLAMA_HOST_FALLBACK] + assert 'all 3 hosts failed' in (resp.error or '') + + def test_generate_token_parsing_phase13(): """Phase 13 補強:OllamaResponse 解 prompt_eval_count + eval_count""" from services.ollama_service import OllamaService diff --git a/tests/test_ppt_vision_service.py b/tests/test_ppt_vision_service.py index e755e5e..a58d07e 100644 --- a/tests/test_ppt_vision_service.py +++ b/tests/test_ppt_vision_service.py @@ -77,6 +77,34 @@ def test_no_issues_response(fake_image, monkeypatch): assert payload['images'] and isinstance(payload['images'][0], str) +def test_check_image_compresses_valid_png_before_ollama(monkeypatch, tmp_path): + """有效截圖送 Ollama 前應轉成較輕的 JPEG payload。""" + monkeypatch.setenv('PPT_VISION_ENABLED', 'true') + import base64 + Image = pytest.importorskip("PIL.Image") + from services.ppt_vision_service import PPTVisionService + + image_path = tmp_path / 'slide.png' + Image.new('RGB', (1800, 1000), color=(245, 238, 226)).save(image_path) + + fake_resp = MagicMock(status_code=200) + fake_resp.json.return_value = {'response': '✅ 無視覺異常'} + + with patch('services.ollama_service.resolve_ollama_host', + return_value='http://test:11434'), \ + patch('services.ollama_service.requests.post', return_value=fake_resp) as mock_post: + svc = PPTVisionService() + result = svc.check_image(str(image_path)) + + payload = mock_post.call_args.kwargs['json'] + encoded = base64.b64decode(payload['images'][0]) + assert result.success is True + assert encoded.startswith(b'\xff\xd8') + assert len(encoded) < image_path.stat().st_size + assert payload['options']['num_predict'] == 256 + assert payload['keep_alive'] == '5m' + + def test_issues_detected(fake_image, monkeypatch): """minicpm-v 回多個 ⚠️ marker → issues_found 應含解析的問題""" monkeypatch.setenv('PPT_VISION_ENABLED', 'true')