From c383a37f3fcd4eac62947576eefe311b03bfe47c Mon Sep 17 00:00:00 2001 From: OoO Date: Tue, 19 May 2026 10:03:10 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A3=9C=20PPT=20=E8=A6=96=E8=A6=BA=20QA=20sta?= =?UTF-8?q?le=20recovery?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + TODO_NEXT_STEPS.txt | 1 + config.py | 2 +- services/ppt_vision_service.py | 31 ++++++++++++ tests/test_admin_observability_routes.py | 62 ++++++++++++++++++++++++ 5 files changed, 96 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f824610..3d8f764 100644 --- a/.gitignore +++ b/.gitignore @@ -66,6 +66,7 @@ data/*.db-wal data/*.sqlite data/*.sqlite3 data/*.lock +data/ppt_vision_audit_status.json data/*.pkl database/*.db database/*.db-journal diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index 305d8a8..2cecec4 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -119,6 +119,7 @@ - Phase 55 candidate queue writer CLI gate:新增 `/api/market_intel/manual_sample_review/candidate_queue_writer_status` POST、`scripts/market_intel_candidate_queue_writer.py` 與 UI writer gate 按鈕,定義 `MARKET_INTEL_QUEUE_WRITE_APPROVAL` 一次性 token、execute/apply flags、備份、migration smoke 與 rollback gate;本階段仍不開 DB connection、不寫 `market_alert_review_queue`、不 commit、不掛 scheduler;版本同步至 V10.230。 - Phase 56 candidate queue writer preflight:新增 `/api/market_intel/manual_sample_review/candidate_queue_writer_preflight` POST 與 `services/market_intel/candidate_queue_writer_preflight.py`,檢查 transaction payload key 到 `market_alert_review_queue` 欄位映射、缺欄與 dedupe unique index;頁面預設 execute=false 不連 DB,CLI 可明確 `--read-only-preflight` 只讀 catalog;版本同步至 V10.232。 - Phase 57 candidate queue writer CLI transaction:`scripts/market_intel_candidate_queue_writer.py` 在 CLI-only 情境支援受控 transaction,必須同時通過 transaction payload、read-only preflight、`--execute`、`--apply-real-write`、一次性 token、備份確認與 migration live smoke 才會以 SQLAlchemy Core idempotent insert `market_alert_review_queue`;API/UI 仍不傳 token、不連 DB、不寫 queue、不掛 scheduler;版本同步至 V10.234。 + - V10.235 補 PPT 視覺 QA stale recovery:背景狀態寫入 worker PID;若部署 reload 後舊 PID 已不存在,`/observability/ppt_audit/vision_status` 會自動把 running 轉為可診斷 error 並允許重新排入,避免人工清 runtime state。 - Schema smoke:`tests/test_market_intel_skeleton.py` 檢查 `Base.metadata` 內含 ADR-035 八張 `market_*` tables。 - Desktop UI QA:本機只註冊 `market_intel_bp` 的 Flask harness 載入 `/market_intel`,確認 Phase 15、候選預覽、writer preview、安全 flags、點陣暖紙視覺正常,console error 0。 - API QA:`/api/market_intel/schema_smoke` 通過 7 張表與 `market_platforms` 必要欄位檢查;`/api/market_intel/platform_seed_writer_plan` 回傳 4 筆 dry-run upsert preview,`writes_executed=false`,四平台皆 `blocked_dry_run_only`。 diff --git a/config.py b/config.py index 5e102aa..107f98c 100644 --- a/config.py +++ b/config.py @@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.234" +SYSTEM_VERSION = "V10.235" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/services/ppt_vision_service.py b/services/ppt_vision_service.py index 4caea3e..fef68cd 100644 --- a/services/ppt_vision_service.py +++ b/services/ppt_vision_service.py @@ -150,6 +150,7 @@ def _record_audit_run(run: Dict[str, Any]) -> Dict[str, Any]: global _LAST_AUDIT_RUN payload = dict(run) payload['updated_at'] = payload.get('updated_at') or _now_label() + payload['pid'] = payload.get('pid') or os.getpid() _LAST_AUDIT_RUN = payload _write_persisted_audit_run(payload) return payload @@ -176,13 +177,40 @@ def _timestamp_age_seconds(value: str | None) -> float | None: return None +def _pid_exists(pid: Any) -> bool: + try: + pid_int = int(pid or 0) + except Exception: + return False + if pid_int <= 0: + return False + try: + os.kill(pid_int, 0) + return True + except OSError: + return False + + def _is_recent_active_audit_run(run: Dict[str, Any] | None) -> bool: if not run or run.get('status') not in {'queued', 'running'}: return False + if run.get('pid') and not _pid_exists(run.get('pid')): + return False age = _timestamp_age_seconds(run.get('updated_at') or run.get('started_at') or run.get('queued_at')) return age is None or age < _ACTIVE_AUDIT_TTL_SECONDS +def _mark_stale_audit_run(run: Dict[str, Any]) -> Dict[str, Any]: + payload = dict(run) + payload.update({ + 'ok': False, + 'status': 'error', + 'finished_at': payload.get('finished_at') or _now_label(), + 'error': 'background worker no longer running; audit state marked stale', + }) + return _record_audit_run(payload) + + def _is_vision_infra_error(error: str | None) -> bool: text = (error or '').lower() return any(marker in text for marker in ( @@ -214,6 +242,7 @@ def _public_audit_run_payload(run: Dict[str, Any] | None) -> Dict[str, Any] | No 'started_at': run.get('started_at') or '', 'finished_at': run.get('finished_at') or '', 'updated_at': run.get('updated_at') or '', + 'pid': run.get('pid') or None, 'filenames': [ os.path.basename(str(name)) for name in (run.get('filenames') or []) @@ -235,6 +264,8 @@ def _public_audit_run_payload(run: Dict[str, Any] | None) -> Dict[str, Any] | No def get_ppt_vision_audit_status() -> Dict[str, Any]: """Return the current/last background visual QA run without touching DB.""" raw_run = _load_last_audit_run() + if raw_run and raw_run.get('status') in {'queued', 'running'} and not _is_recent_active_audit_run(raw_run): + raw_run = _mark_stale_audit_run(raw_run) running = _AUDIT_LOCK.locked() or _is_recent_active_audit_run(raw_run) last_run = _public_audit_run_payload(raw_run) if running: diff --git a/tests/test_admin_observability_routes.py b/tests/test_admin_observability_routes.py index 55efe76..04d6132 100644 --- a/tests/test_admin_observability_routes.py +++ b/tests/test_admin_observability_routes.py @@ -797,6 +797,68 @@ def test_ppt_vision_audit_background_respects_persisted_running(monkeypatch, tmp assert result['last_run']['filenames'] == ['ocbot_daily_20260518.pptx'] +def test_ppt_vision_audit_status_marks_dead_worker_stale(monkeypatch, tmp_path): + """部署 reload 後若舊 worker 已不存在,running state 要自動轉為可診斷錯誤。""" + import json + from services import ppt_vision_service as svc + + state_path = tmp_path / 'vision_state.json' + monkeypatch.setenv('PPT_VISION_STATE_PATH', str(state_path)) + monkeypatch.setattr(svc, '_LAST_AUDIT_RUN', None) + monkeypatch.setattr(svc, '_pid_exists', lambda _pid: False) + state_path.write_text(json.dumps({ + 'ok': True, + 'status': 'running', + 'queued_at': '2999-05-19 12:00:00', + 'started_at': '2999-05-19 12:00:01', + 'updated_at': '2999-05-19 12:00:01', + 'pid': 999999, + 'filenames': ['/app/data/reports/ocbot_daily_20260518.pptx'], + 'max_files': 1, + }), encoding='utf-8') + + status = svc.get_ppt_vision_audit_status() + + assert status['running'] is False + assert status['status'] == 'error' + assert 'worker no longer running' in status['last_run']['error'] + assert status['last_run']['filenames'] == ['ocbot_daily_20260518.pptx'] + + +def test_ppt_vision_audit_background_allows_retry_after_dead_worker(monkeypatch, tmp_path): + """dead PID 的 running state 不應阻擋下一次手動補跑。""" + import json + from services import ppt_vision_service as svc + + state_path = tmp_path / 'vision_state.json' + monkeypatch.setenv('PPT_VISION_STATE_PATH', str(state_path)) + monkeypatch.setattr(svc, '_LAST_AUDIT_RUN', None) + monkeypatch.setattr(svc, '_pid_exists', lambda _pid: False) + monkeypatch.setattr(svc, 'audit_recent_ppts', lambda **_kwargs: { + 'audited_files': [], + 'total_issues': 0, + 'errors': [], + }) + state_path.write_text(json.dumps({ + 'ok': True, + 'status': 'running', + 'queued_at': '2999-05-19 12:00:00', + 'started_at': '2999-05-19 12:00:01', + 'updated_at': '2999-05-19 12:00:01', + 'pid': 999999, + 'filenames': ['/app/data/reports/ocbot_daily_20260518.pptx'], + 'max_files': 1, + }), encoding='utf-8') + + result = svc.start_ppt_vision_audit_background( + filenames=['ocbot_daily_20260518.pptx'], + max_files=1, + ) + + assert result['ok'] is True + assert result['status'] == 'queued' + + def test_ppt_audit_vision_status_route_returns_json(client, monkeypatch): """頁面輪詢用 status endpoint 要能回最近一次背景視覺 QA 狀態。""" from services import ppt_vision_service as svc