From 21cdbdb75c791cc50198049d5231d83674870644 Mon Sep 17 00:00:00 2001 From: OoO Date: Tue, 19 May 2026 09:15:31 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=20PPT=20=E8=A6=96=E8=A6=BA?= =?UTF-8?q?=20QA=20=E5=A4=9A=20worker=20=E7=8B=80=E6=85=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TODO_NEXT_STEPS.txt | 1 + config.py | 2 +- services/ppt_vision_service.py | 114 +++++++++++++++++++---- tests/test_admin_observability_routes.py | 68 +++++++++++++- 4 files changed, 164 insertions(+), 21 deletions(-) diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index 6c08d9f..5f29bd6 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -115,6 +115,7 @@ - V10.226 補 PPT 視覺 QA runtime checklist:`/observability/ppt_audit_history` 在視覺模型未就緒時顯示 Feature Flag、LibreOffice、Vision Model 三段檢查與下一步操作,避免只看到「停用」而不知道卡在哪。 - Phase 54 manual sample candidate queue transaction:新增 `/api/market_intel/manual_sample_review/candidate_queue_transaction` POST 與 UI transaction preview 按鈕,將 queue row preview 轉成 `market_alert_review_queue` idempotent insert statement、payload hash 與 rollback plan;不開 DB connection、不開 transaction、不 commit、不建立 approval record;版本同步至 V10.227。 - V10.228 補 PPT 視覺 QA 背景狀態卡:新增 `/observability/ppt_audit/vision_status` 與頁面 Vision QA 狀態卡,讓立即視覺 QA 排入後可看 queued/running/completed/error 與最近審核摘要,不必刷新猜測。 + - V10.229 修正 PPT 視覺 QA 多 worker 狀態漂移:將 queued/running/completed/error 寫入 `/app/data/ppt_vision_audit_status.json` runtime state,所有 Gunicorn worker 共用同一份狀態並阻擋重複排入。 - Schema smoke:`tests/test_market_intel_skeleton.py` 檢查 `Base.metadata` 內含 ADR-035 八張 `market_*` tables。 - Desktop UI QA:本機只註冊 `market_intel_bp` 的 Flask harness 載入 `/market_intel`,確認 Phase 15、候選預覽、writer preview、安全 flags、點陣暖紙視覺正常,console error 0。 - API QA:`/api/market_intel/schema_smoke` 通過 7 張表與 `market_platforms` 必要欄位檢查;`/api/market_intel/platform_seed_writer_plan` 回傳 4 筆 dry-run upsert preview,`writes_executed=false`,四平台皆 `blocked_dry_run_only`。 diff --git a/config.py b/config.py index 65764d9..7e003b6 100644 --- a/config.py +++ b/config.py @@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.228" +SYSTEM_VERSION = "V10.229" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/services/ppt_vision_service.py b/services/ppt_vision_service.py index 4d364ea..6653fa2 100644 --- a/services/ppt_vision_service.py +++ b/services/ppt_vision_service.py @@ -20,6 +20,7 @@ from __future__ import annotations import os import time import base64 +import json import logging import shutil import threading @@ -39,6 +40,7 @@ PPT_VISION_IMAGE_MAX_EDGE = int(os.getenv('PPT_VISION_IMAGE_MAX_EDGE', '1280')) PPT_VISION_IMAGE_QUALITY = int(os.getenv('PPT_VISION_IMAGE_QUALITY', '82')) _AUDIT_LOCK = threading.Lock() _LAST_AUDIT_RUN: Dict[str, Any] | None = None +_ACTIVE_AUDIT_TTL_SECONDS = int(os.getenv('PPT_VISION_ACTIVE_TTL_SECONDS', '7200')) def is_ppt_vision_enabled() -> bool: @@ -106,6 +108,80 @@ def get_ppt_vision_runtime_status() -> Dict[str, Any]: } +def _audit_state_path() -> str: + return os.getenv( + 'PPT_VISION_STATE_PATH', + os.path.join(os.getenv('DATA_DIR', os.path.join(os.getcwd(), 'data')), 'ppt_vision_audit_status.json'), + ) + + +def _now_label() -> str: + return time.strftime('%Y-%m-%d %H:%M:%S') + + +def _read_persisted_audit_run() -> Dict[str, Any] | None: + path = _audit_state_path() + try: + if not os.path.isfile(path): + return None + with open(path, 'r', encoding='utf-8') as handle: + payload = json.load(handle) + return payload if isinstance(payload, dict) else None + except Exception: + logger.debug("[PPTVision] read audit state failed", exc_info=True) + return None + + +def _write_persisted_audit_run(run: Dict[str, Any]) -> None: + path = _audit_state_path() + directory = os.path.dirname(path) + try: + os.makedirs(directory, exist_ok=True) + tmp_path = f"{path}.tmp" + with open(tmp_path, 'w', encoding='utf-8') as handle: + json.dump(run, handle, ensure_ascii=False) + os.replace(tmp_path, path) + except Exception: + logger.debug("[PPTVision] write audit state failed", exc_info=True) + + +def _record_audit_run(run: Dict[str, Any]) -> Dict[str, Any]: + global _LAST_AUDIT_RUN + payload = dict(run) + payload['updated_at'] = payload.get('updated_at') or _now_label() + _LAST_AUDIT_RUN = payload + _write_persisted_audit_run(payload) + return payload + + +def _load_last_audit_run() -> Dict[str, Any] | None: + persisted = _read_persisted_audit_run() + if not _LAST_AUDIT_RUN: + return persisted + if not persisted: + return _LAST_AUDIT_RUN + if str(persisted.get('updated_at') or '') >= str(_LAST_AUDIT_RUN.get('updated_at') or ''): + return persisted + return _LAST_AUDIT_RUN + + +def _timestamp_age_seconds(value: str | None) -> float | None: + if not value: + return None + try: + parsed = datetime.strptime(value, '%Y-%m-%d %H:%M:%S') + return max(0.0, (datetime.now() - parsed).total_seconds()) + except Exception: + return None + + +def _is_recent_active_audit_run(run: Dict[str, Any] | None) -> bool: + if not run or run.get('status') not in {'queued', 'running'}: + return False + age = _timestamp_age_seconds(run.get('updated_at') or run.get('started_at') or run.get('queued_at')) + return age is None or age < _ACTIVE_AUDIT_TTL_SECONDS + + def _public_audit_run_payload(run: Dict[str, Any] | None) -> Dict[str, Any] | None: if not run: return None @@ -126,6 +202,7 @@ def _public_audit_run_payload(run: Dict[str, Any] | None) -> Dict[str, Any] | No 'queued_at': run.get('queued_at') or '', 'started_at': run.get('started_at') or '', 'finished_at': run.get('finished_at') or '', + 'updated_at': run.get('updated_at') or '', 'filenames': [ os.path.basename(str(name)) for name in (run.get('filenames') or []) @@ -146,8 +223,9 @@ def _public_audit_run_payload(run: Dict[str, Any] | None) -> Dict[str, Any] | No def get_ppt_vision_audit_status() -> Dict[str, Any]: """Return the current/last background visual QA run without touching DB.""" - running = _AUDIT_LOCK.locked() - last_run = _public_audit_run_payload(_LAST_AUDIT_RUN) + raw_run = _load_last_audit_run() + running = _AUDIT_LOCK.locked() or _is_recent_active_audit_run(raw_run) + last_run = _public_audit_run_payload(raw_run) if running: status = 'running' status_label = '執行中' @@ -619,14 +697,13 @@ def start_ppt_vision_audit_background( filenames: Sequence[str] | None = None, ) -> Dict[str, Any]: """Queue a non-blocking PPT vision audit run for the admin UI.""" - global _LAST_AUDIT_RUN - - if _AUDIT_LOCK.locked(): + current_run = _load_last_audit_run() + if _AUDIT_LOCK.locked() or _is_recent_active_audit_run(current_run): return { 'ok': True, 'status': 'already_running', 'message': 'PPT vision audit is already running.', - 'last_run': _public_audit_run_payload(_LAST_AUDIT_RUN), + 'last_run': _public_audit_run_payload(current_run), } clean_filenames = [ @@ -634,27 +711,26 @@ def start_ppt_vision_audit_background( for name in (filenames or []) if str(name).lower().endswith('.pptx') ] - queued_at = time.strftime('%Y-%m-%d %H:%M:%S') - _LAST_AUDIT_RUN = { + queued_at = _now_label() + _record_audit_run({ 'ok': True, 'status': 'queued', 'queued_at': queued_at, 'filenames': clean_filenames, 'max_files': max_files, - } + }) def _run(): - global _LAST_AUDIT_RUN with _AUDIT_LOCK: - started_at = time.strftime('%Y-%m-%d %H:%M:%S') - _LAST_AUDIT_RUN = { + started_at = _now_label() + _record_audit_run({ 'ok': True, 'status': 'running', 'queued_at': queued_at, 'started_at': started_at, 'filenames': clean_filenames, 'max_files': max_files, - } + }) try: summary = audit_recent_ppts( reports_dir=reports_dir, @@ -662,27 +738,27 @@ def start_ppt_vision_audit_background( max_files=max_files, filenames=clean_filenames or None, ) - _LAST_AUDIT_RUN = { + _record_audit_run({ 'ok': True, 'status': 'completed', 'queued_at': queued_at, 'started_at': started_at, - 'finished_at': time.strftime('%Y-%m-%d %H:%M:%S'), + 'finished_at': _now_label(), 'filenames': clean_filenames, 'max_files': max_files, 'summary': summary, - } + }) except Exception as exc: - _LAST_AUDIT_RUN = { + _record_audit_run({ 'ok': False, 'status': 'error', 'queued_at': queued_at, 'started_at': started_at, - 'finished_at': time.strftime('%Y-%m-%d %H:%M:%S'), + 'finished_at': _now_label(), 'filenames': clean_filenames, 'max_files': max_files, 'error': f'{type(exc).__name__}: {str(exc)[:200]}', - } + }) logger.error("[PPTVision] background audit failed: %s", exc, exc_info=True) thread = threading.Thread(target=_run, name='ppt-vision-audit', daemon=True) diff --git a/tests/test_admin_observability_routes.py b/tests/test_admin_observability_routes.py index 673daad..55efe76 100644 --- a/tests/test_admin_observability_routes.py +++ b/tests/test_admin_observability_routes.py @@ -696,10 +696,11 @@ def test_ppt_audit_run_vision_queues_background_audit(client, monkeypatch): assert captured['max_files'] == 1 -def test_ppt_vision_audit_status_sanitizes_last_run(monkeypatch): +def test_ppt_vision_audit_status_sanitizes_last_run(monkeypatch, tmp_path): """背景視覺 QA 狀態只回檔名與摘要,不把 reports_dir 絕對路徑曝露到頁面。""" from services import ppt_vision_service as svc + monkeypatch.setenv('PPT_VISION_STATE_PATH', str(tmp_path / 'vision_state.json')) monkeypatch.setattr(svc, '_LAST_AUDIT_RUN', { 'ok': True, 'status': 'completed', @@ -731,6 +732,71 @@ def test_ppt_vision_audit_status_sanitizes_last_run(monkeypatch): assert '/app/data/reports' not in str(status) +def test_ppt_vision_audit_status_reads_persisted_state(monkeypatch, tmp_path): + """多 worker 下狀態需從 runtime state 檔讀取,不能只靠單一 worker 記憶體。""" + import json + from services import ppt_vision_service as svc + + state_path = tmp_path / 'vision_state.json' + monkeypatch.setenv('PPT_VISION_STATE_PATH', str(state_path)) + monkeypatch.setattr(svc, '_LAST_AUDIT_RUN', None) + state_path.write_text(json.dumps({ + 'ok': True, + 'status': 'completed', + 'queued_at': '2026-05-19 12:00:00', + 'started_at': '2026-05-19 12:00:01', + 'finished_at': '2026-05-19 12:00:05', + 'updated_at': '2026-05-19 12:00:05', + 'filenames': ['/app/data/reports/ocbot_daily_20260518.pptx'], + 'max_files': 1, + 'summary': { + 'audited_files': [{ + 'path': '/app/data/reports/ocbot_daily_20260518.pptx', + 'slides_checked': 1, + 'issues': 0, + 'error': None, + }], + 'total_issues': 0, + 'errors': [], + }, + }), encoding='utf-8') + + status = svc.get_ppt_vision_audit_status() + + assert status['status'] == 'completed' + assert status['last_run']['updated_at'] == '2026-05-19 12:00:05' + assert status['last_run']['filenames'] == ['ocbot_daily_20260518.pptx'] + assert '/app/data/reports' not in str(status) + + +def test_ppt_vision_audit_background_respects_persisted_running(monkeypatch, tmp_path): + """另一個 worker 已在跑時,新 request 應回 already_running,避免重複開模型任務。""" + import json + from services import ppt_vision_service as svc + + state_path = tmp_path / 'vision_state.json' + monkeypatch.setenv('PPT_VISION_STATE_PATH', str(state_path)) + monkeypatch.setattr(svc, '_LAST_AUDIT_RUN', None) + state_path.write_text(json.dumps({ + 'ok': True, + 'status': 'running', + 'queued_at': '2999-05-19 12:00:00', + 'started_at': '2999-05-19 12:00:01', + 'updated_at': '2999-05-19 12:00:01', + 'filenames': ['/app/data/reports/ocbot_daily_20260518.pptx'], + 'max_files': 1, + }), encoding='utf-8') + + result = svc.start_ppt_vision_audit_background( + filenames=['ocbot_daily_20260518.pptx'], + max_files=1, + ) + + assert result['ok'] is True + assert result['status'] == 'already_running' + assert result['last_run']['filenames'] == ['ocbot_daily_20260518.pptx'] + + def test_ppt_audit_vision_status_route_returns_json(client, monkeypatch): """頁面輪詢用 status endpoint 要能回最近一次背景視覺 QA 狀態。""" from services import ppt_vision_service as svc