diff --git a/TODO_NEXT_STEPS.txt b/TODO_NEXT_STEPS.txt index d308848..0b08b5f 100644 --- a/TODO_NEXT_STEPS.txt +++ b/TODO_NEXT_STEPS.txt @@ -4,6 +4,7 @@ ================================================================================ 【已完成】 + - V10.211 補 `/observability/ppt_audit_history` 全類型視覺 QA:審核歷史不再限 daily,頁面新增「立即視覺 QA」非阻塞補跑,結果寫入 `ppt_audit_results`;模型失敗時也保留 slide error,避免產線狀態只剩空白。 - V10.210 補 `/observability/ppt_audit_history` 審核歷史同頁回放:每筆 daily 視覺審核紀錄的動作欄新增「回放」按鈕,沿用 PDF 預覽抽屜並保留下載/開新頁,讓問題追查不必再回檔案表找簡報。 - V10.208 修正 `/observability/ppt_audit_history` 同頁預覽抽屜 selector:Modal 標題改用獨立 `data-ppt-preview-modal-title`,避免與多個預覽連結的資料屬性衝突。 - V10.207 強化 `/observability/ppt_audit_history` 同頁線上預覽:所有可預覽簡報按鈕改為開啟頁內 PDF 預覽抽屜,保留開新頁與下載,降低產線頁來回跳轉成本並改善手機操作。 diff --git a/config.py b/config.py index 0a748a7..380b6a5 100644 --- a/config.py +++ b/config.py @@ -320,7 +320,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '') # ========================================== # 系統版本與路徑 # ========================================== -SYSTEM_VERSION = "V10.210" +SYSTEM_VERSION = "V10.211" LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log') public_url = PUBLIC_URL # 用於模板顯示 diff --git a/routes/admin_observability_routes.py b/routes/admin_observability_routes.py index 0db7806..c00fbae 100644 --- a/routes/admin_observability_routes.py +++ b/routes/admin_observability_routes.py @@ -1860,6 +1860,35 @@ def ppt_audit_generate_missing(): return jsonify({'ok': False, 'error': f'{type(e).__name__}: {str(e)[:200]}'}), 500 +@admin_observability_bp.route('/ppt_audit/run_vision', methods=['POST']) +@login_required +def ppt_audit_run_vision(): + """Queue a non-blocking visual QA run for selected generated PPT files.""" + try: + from services.ppt_vision_service import start_ppt_vision_audit_background + + data = request.get_json(silent=True) or {} + filenames = data.get('filenames') or [] + if isinstance(filenames, str): + filenames = [filenames] + filenames = [str(name) for name in filenames if str(name).lower().endswith('.pptx')] + max_files = data.get('max_files') or (len(filenames) if filenames else 10) + try: + max_files = max(1, min(int(max_files), 20)) + except Exception: + max_files = 10 + + result = start_ppt_vision_audit_background( + reports_dir=None, + filenames=filenames, + max_files=max_files, + hours=24, + ) + return jsonify(result), 202 if result.get('status') == 'queued' else 200 + except Exception as e: + return jsonify({'ok': False, 'error': f'{type(e).__name__}: {str(e)[:200]}'}), 500 + + def _resolve_ppt_report_path(filename: str): """在 REPORTS_DIR 內解析簡報檔名,並阻擋路徑逃逸。""" import os @@ -2349,7 +2378,7 @@ def _build_ppt_pipeline_view(files, auto_generation, audit_stats, generation_run qa_status = 'ready' if pass_rate >= 80 and audit_issues == 0 else 'partial' else: qa_value = '待審核' - qa_meta = '每日報表才進入 minicpm-v 視覺 QA' + qa_meta = '可立即補跑,或等待 22:00 排程' qa_status = 'planned' stages = [ @@ -2757,40 +2786,45 @@ def ppt_audit_history(): except Exception as e: error = f'{type(e).__name__}: {str(e)[:200]}' - # Phase 38:讀指定月份 daily audit 歷史(僅限 daily 類型) - if report_type == 'daily': + audit_filter_sql = "" + audit_params = {'month_start': month_start, 'month_end': month_end} + if report_prefix != 'all': + audit_filter_sql = " AND pptx_filename LIKE :audit_prefix" + audit_params['audit_prefix'] = f"{report_prefix}%" + + # Phase 38+:讀指定月份 / 指定簡報類型 audit 歷史 + try: + session = get_session() try: - session = get_session() - try: - audit_rows = session.execute( - sa_text(""" - SELECT audited_at, pptx_filename, audit_status, - issues_count, confidence, duration_ms, error_msg - FROM ppt_audit_results - WHERE audited_at >= :month_start - AND audited_at < :month_end - AND pptx_filename LIKE 'ocbot_daily_%' - ORDER BY audited_at DESC - LIMIT 1000 - """), - {'month_start': month_start, 'month_end': month_end}, - ).fetchall() - audit_records = [ - { - 'audited_at': r[0].strftime('%Y-%m-%d %H:%M'), - 'pptx_filename': r[1], - 'audit_status': r[2], - 'issues_count': int(r[3] or 0), - 'confidence': float(r[4] or 0), - 'duration_ms': int(r[5] or 0), - 'error_msg': r[6], - } - for r in audit_rows - ] - finally: - session.close() - except Exception: - logger.debug("PPT audit history table unavailable; rendering empty audit history", exc_info=True) + audit_rows = session.execute( + sa_text(f""" + SELECT audited_at, pptx_filename, audit_status, + issues_count, confidence, duration_ms, error_msg + FROM ppt_audit_results + WHERE audited_at >= :month_start + AND audited_at < :month_end + {audit_filter_sql} + ORDER BY audited_at DESC + LIMIT 1000 + """), + audit_params, + ).fetchall() + audit_records = [ + { + 'audited_at': r[0].strftime('%Y-%m-%d %H:%M'), + 'pptx_filename': r[1], + 'audit_status': r[2], + 'issues_count': int(r[3] or 0), + 'confidence': float(r[4] or 0), + 'duration_ms': int(r[5] or 0), + 'error_msg': r[6], + } + for r in audit_rows + ] + finally: + session.close() + except Exception: + logger.debug("PPT audit history table unavailable; rendering empty audit history", exc_info=True) # PPT vision 啟用狀態 vision_status = {'enabled': False, 'ready': False, 'blockers': ['視覺狀態讀取失敗']} @@ -2804,65 +2838,64 @@ def ppt_audit_history(): # Phase 47 K-6: 月報表統計 + top failure files audit_30d_stats = {} top_failure_files = [] - if report_type == 'daily': + try: + s_ppt = get_session() try: - s_ppt = get_session() - try: - stat_row = s_ppt.execute( - sa_text(""" - SELECT COUNT(*), - COUNT(*) FILTER (WHERE audit_status = 'passed'), - COUNT(*) FILTER (WHERE audit_status = 'failed'), - COUNT(*) FILTER (WHERE audit_status = 'skipped'), - COUNT(*) FILTER (WHERE audit_status = 'error'), - COALESCE(AVG(confidence) FILTER (WHERE audit_status = 'passed'), 0), - COALESCE(SUM(issues_count), 0) - FROM ppt_audit_results - WHERE audited_at >= :month_start - AND audited_at < :month_end - AND pptx_filename LIKE 'ocbot_daily_%' - """), - {'month_start': month_start, 'month_end': month_end}, - ).fetchone() - total_30d = int(stat_row[0] or 0) - audit_30d_stats = { - 'total': total_30d, - 'passed': int(stat_row[1] or 0), - 'failed': int(stat_row[2] or 0), - 'skipped': int(stat_row[3] or 0), - 'error': int(stat_row[4] or 0), - 'avg_confidence': round(float(stat_row[5] or 0), 3), - 'total_issues': int(stat_row[6] or 0), - 'pass_rate': (float(stat_row[1] or 0) / total_30d * 100) if total_30d else 0, - } + stat_row = s_ppt.execute( + sa_text(f""" + SELECT COUNT(*), + COUNT(*) FILTER (WHERE audit_status = 'passed'), + COUNT(*) FILTER (WHERE audit_status = 'failed'), + COUNT(*) FILTER (WHERE audit_status = 'skipped'), + COUNT(*) FILTER (WHERE audit_status = 'error'), + COALESCE(AVG(confidence) FILTER (WHERE audit_status = 'passed'), 0), + COALESCE(SUM(issues_count), 0) + FROM ppt_audit_results + WHERE audited_at >= :month_start + AND audited_at < :month_end + {audit_filter_sql} + """), + audit_params, + ).fetchone() + total_30d = int(stat_row[0] or 0) + audit_30d_stats = { + 'total': total_30d, + 'passed': int(stat_row[1] or 0), + 'failed': int(stat_row[2] or 0), + 'skipped': int(stat_row[3] or 0), + 'error': int(stat_row[4] or 0), + 'avg_confidence': round(float(stat_row[5] or 0), 3), + 'total_issues': int(stat_row[6] or 0), + 'pass_rate': (float(stat_row[1] or 0) / total_30d * 100) if total_30d else 0, + } - top_fail_rows = s_ppt.execute( - sa_text(""" - SELECT pptx_filename, COUNT(*) AS attempts, - SUM(issues_count) AS total_issues, - MAX(audited_at) AS last_audit - FROM ppt_audit_results - WHERE audit_status IN ('failed', 'error') - AND audited_at >= :month_start - AND audited_at < :month_end - AND pptx_filename LIKE 'ocbot_daily_%' - GROUP BY pptx_filename - ORDER BY attempts DESC, total_issues DESC LIMIT 10 - """), - {'month_start': month_start, 'month_end': month_end}, - ).fetchall() - top_failure_files = [ - { - 'filename': r[0], 'attempts': int(r[1] or 0), - 'total_issues': int(r[2] or 0), - 'last_audit': r[3].strftime('%Y-%m-%d %H:%M') if r[3] else '', - } - for r in top_fail_rows - ] - finally: - s_ppt.close() - except Exception: - pass + top_fail_rows = s_ppt.execute( + sa_text(f""" + SELECT pptx_filename, COUNT(*) AS attempts, + SUM(issues_count) AS total_issues, + MAX(audited_at) AS last_audit + FROM ppt_audit_results + WHERE audit_status IN ('failed', 'error') + AND audited_at >= :month_start + AND audited_at < :month_end + {audit_filter_sql} + GROUP BY pptx_filename + ORDER BY attempts DESC, total_issues DESC LIMIT 10 + """), + audit_params, + ).fetchall() + top_failure_files = [ + { + 'filename': r[0], 'attempts': int(r[1] or 0), + 'total_issues': int(r[2] or 0), + 'last_audit': r[3].strftime('%Y-%m-%d %H:%M') if r[3] else '', + } + for r in top_fail_rows + ] + finally: + s_ppt.close() + except Exception: + pass # Phase 41 E-2: 對最近 3 筆 failed audit 跑 RAG 找相似修法 rag_fixes = [] @@ -2950,6 +2983,11 @@ def ppt_audit_history(): vision_status=vision_status, audit_records=audit_records, ) + vision_audit_filenames = [ + item.get('name') + for item in files + if item.get('file_exists') and item.get('is_valid_ppt') and item.get('name') + ][:10] return render_template( 'admin/ppt_audit_history.html', @@ -2973,6 +3011,7 @@ def ppt_audit_history(): auto_generation_missing_report_types=auto_generation.get('missing_report_types', []), generation_runs=generation_runs, pipeline_view=pipeline_view, + vision_audit_filenames=vision_audit_filenames, error=error, ) diff --git a/services/ppt_vision_service.py b/services/ppt_vision_service.py index 3e3bf4f..4105f54 100644 --- a/services/ppt_vision_service.py +++ b/services/ppt_vision_service.py @@ -22,8 +22,9 @@ import time import base64 import logging import shutil +import threading from dataclasses import dataclass, field -from typing import Optional, Dict, Any, List +from typing import Optional, Dict, Any, List, Sequence logger = logging.getLogger(__name__) @@ -32,6 +33,8 @@ logger = logging.getLogger(__name__) # ───────────────────────────────────────────────────────────────────────────── PPT_VISION_MODEL = os.getenv('PPT_VISION_MODEL', 'minicpm-v:latest') PPT_VISION_TIMEOUT = int(os.getenv('PPT_VISION_TIMEOUT', '60')) +_AUDIT_LOCK = threading.Lock() +_LAST_AUDIT_RUN: Dict[str, Any] | None = None def is_ppt_vision_enabled() -> bool: @@ -142,8 +145,22 @@ class PPTVisionService: result['error'] = 'libreoffice not installed (skip vision check)' return result + def _finish_with_error(message: str, duration_ms: int = 0) -> Dict[str, Any]: + result['error'] = message + try: + self._persist_audit_result( + pptx_path=pptx_path, + result=result, + avg_confidence=0.0, + duration_ms=duration_ms, + ) + except Exception as e: + logger.warning(f"[PPTVision] persist audit result failed: {e}") + return result + # 1. LibreOffice 轉 png with tempfile.TemporaryDirectory() as tmpdir: + convert_started = time.monotonic() try: proc = subprocess.run( [converter, '--headless', '--convert-to', 'png', @@ -151,17 +168,23 @@ class PPTVisionService: capture_output=True, timeout=60, ) if proc.returncode != 0: - result['error'] = f'libreoffice convert failed: {proc.stderr.decode()[:200]}' - return result + return _finish_with_error( + f'libreoffice convert failed: {proc.stderr.decode()[:200]}', + int((time.monotonic() - convert_started) * 1000), + ) except FileNotFoundError: result['error'] = 'libreoffice not installed (skip vision check)' return result except subprocess.TimeoutExpired: - result['error'] = 'libreoffice convert timeout (60s)' - return result + return _finish_with_error( + 'libreoffice convert timeout (60s)', + int((time.monotonic() - convert_started) * 1000), + ) except Exception as e: - result['error'] = f'{type(e).__name__}: {str(e)[:200]}' - return result + return _finish_with_error( + f'{type(e).__name__}: {str(e)[:200]}', + int((time.monotonic() - convert_started) * 1000), + ) # LibreOffice 對 .pptx 預設只輸出第一頁;多頁需 --convert-to png:impress_png_Export png_files = sorted([ @@ -170,13 +193,16 @@ class PPTVisionService: ]) if not png_files: - result['error'] = 'libreoffice 未產出 png (可能需要 --convert-to png:impress_png_Export)' - return result + return _finish_with_error( + 'libreoffice 未產出 png (可能需要 --convert-to png:impress_png_Export)', + int((time.monotonic() - convert_started) * 1000), + ) # 2. 對前 N 張跑 check_image import time as _time t0 = _time.monotonic() confidences = [] + slide_errors = [] for idx, png in enumerate(png_files[:max_slides]): try: vr = self.check_image(png) @@ -186,10 +212,18 @@ class PPTVisionService: if vr.issues_found: result['total_issues'] += len(vr.issues_found) result['issues_by_slide'].append((idx + 1, vr.issues_found)) + else: + slide_errors.append(f"slide {idx + 1}: {vr.error or 'vision model failed'}") except Exception as exc: + message = f"slide {idx + 1}: {type(exc).__name__}: {str(exc)[:160]}" + slide_errors.append(message) logger.warning(f"[PPTVision] slide {idx+1} check failed: {exc}") result['success'] = result['slides_checked'] > 0 + if not result['success'] and slide_errors: + result['error'] = ';'.join(slide_errors[:3]) + if slide_errors: + result['slide_errors'] = slide_errors duration_ms = int((_time.monotonic() - t0) * 1000) # Phase 38:寫入 ppt_audit_results 留歷史(失敗安全) @@ -365,7 +399,8 @@ ppt_vision_service = PPTVisionService() def audit_recent_ppts(reports_dir: str | None = None, hours: int = 24, - max_files: int = 10) -> Dict[str, Any]: + max_files: int = 10, + filenames: Sequence[str] | None = None) -> Dict[str, Any]: """Phase 26 整合 hook — 每日 22:00 cron 跑:掃 reports/ 當天新增 .pptx 跑視覺檢查。 Args: @@ -396,18 +431,30 @@ def audit_recent_ppts(reports_dir: str | None = None, hours: int = 24, summary['errors'].append(f'{reports_dir} not found') return summary - # 掃當天新增 .pptx + requested_names = { + os.path.basename(str(name)) + for name in (filenames or []) + if str(name).lower().endswith('.pptx') + } + + # 掃當天新增 .pptx;若指定 filenames,直接審指定檔,不受 hours 視窗限制。 cutoff = time.time() - hours * 3600 pptx_files = [] for f in os.listdir(reports_dir): if not f.lower().endswith('.pptx'): continue + if requested_names and f not in requested_names: + continue full = os.path.join(reports_dir, f) try: - if os.path.getmtime(full) >= cutoff: + if requested_names or os.path.getmtime(full) >= cutoff: pptx_files.append((os.path.getmtime(full), full)) except OSError: continue + if requested_names: + found_names = {os.path.basename(path) for _mtime, path in pptx_files} + for missing in sorted(requested_names - found_names): + summary['errors'].append(f'{missing}: file not found') pptx_files.sort(reverse=True) pptx_files = pptx_files[:max_files] @@ -432,6 +479,69 @@ def audit_recent_ppts(reports_dir: str | None = None, hours: int = 24, return summary +def start_ppt_vision_audit_background( + *, + reports_dir: str | None = None, + hours: int = 24, + max_files: int = 10, + filenames: Sequence[str] | None = None, +) -> Dict[str, Any]: + """Queue a non-blocking PPT vision audit run for the admin UI.""" + global _LAST_AUDIT_RUN + + if _AUDIT_LOCK.locked(): + return { + 'ok': True, + 'status': 'already_running', + 'message': 'PPT vision audit is already running.', + 'last_run': _LAST_AUDIT_RUN, + } + + clean_filenames = [ + os.path.basename(str(name)) + for name in (filenames or []) + if str(name).lower().endswith('.pptx') + ] + + def _run(): + global _LAST_AUDIT_RUN + with _AUDIT_LOCK: + started_at = time.strftime('%Y-%m-%d %H:%M:%S') + try: + summary = audit_recent_ppts( + reports_dir=reports_dir, + hours=hours, + max_files=max_files, + filenames=clean_filenames or None, + ) + _LAST_AUDIT_RUN = { + 'ok': True, + 'status': 'completed', + 'started_at': started_at, + 'finished_at': time.strftime('%Y-%m-%d %H:%M:%S'), + 'summary': summary, + } + except Exception as exc: + _LAST_AUDIT_RUN = { + 'ok': False, + 'status': 'error', + 'started_at': started_at, + 'finished_at': time.strftime('%Y-%m-%d %H:%M:%S'), + 'error': f'{type(exc).__name__}: {str(exc)[:200]}', + } + logger.error("[PPTVision] background audit failed: %s", exc, exc_info=True) + + thread = threading.Thread(target=_run, name='ppt-vision-audit', daemon=True) + thread.start() + return { + 'ok': True, + 'status': 'queued', + 'message': 'PPT vision audit queued.', + 'filenames': clean_filenames, + 'max_files': max_files, + } + + def push_ppt_audit_to_telegram(summary: Dict[str, Any]) -> bool: """有 issues 才推 Telegram(避免靜默報「無問題」洗版)""" if summary['total_issues'] <= 0: @@ -470,5 +580,6 @@ __all__ = [ 'get_ppt_vision_runtime_status', 'PPT_VISION_SYSTEM_PROMPT', 'audit_recent_ppts', + 'start_ppt_vision_audit_background', 'push_ppt_audit_to_telegram', ] diff --git a/templates/admin/ppt_audit_history.html b/templates/admin/ppt_audit_history.html index 858d2c8..66f72bb 100644 --- a/templates/admin/ppt_audit_history.html +++ b/templates/admin/ppt_audit_history.html @@ -8,7 +8,6 @@ {% block ewooo_content %} {% import "admin/_observability_labels.html" as obs_label %} -{% set report_is_daily = report_type == 'daily' %}
| 時間 | 檔名 | 結果 | 問題 | 信心 | 耗時 | 錯誤 | 動作 |
|---|---|---|---|---|---|---|---|
| 目前無 daily 審核歷史;請確認 {{ report_month }} 是否已完成 22:00 排程。 | |||||||
| 目前無 {{ selected_report_type.label }} 審核歷史;可按「立即視覺 QA」補跑,或等待每日 22:00 排程。 | |||||||