diff --git a/migrations/029_create_host_health_probes.sql b/migrations/029_create_host_health_probes.sql
new file mode 100644
index 0000000..5340644
--- /dev/null
+++ b/migrations/029_create_host_health_probes.sql
@@ -0,0 +1,48 @@
+-- =============================================================================
+-- Migration 029: host_health_probes — 三主機健康歷史
+-- Operation Ollama-First v5.0 — Phase 38
+-- 日期: 2026-05-04 台北
+-- 對應頁面: /observability/host_health
+-- =============================================================================
+-- 說明:
+-- 原本 host_health 頁面每次刷新都即時 HTTP probe 三主機 /api/tags,
+-- 無歷史 → 無法看趨勢、無法回查「昨天 GCP 是不是有掛過」。
+-- 本 migration 加表,每次 probe 寫一筆,留 30 天歷史(cron 清理)。
+--
+-- 寫入點:
+-- 1. routes/admin_observability_routes.py::host_health_dashboard 每次 render 寫
+-- 2. scheduler.py 加每 5 分鐘 background probe(即使無人開頁也記錄)
+--
+-- 索引設計:
+-- - (probed_at DESC) 給最新 N 筆查詢
+-- - (host_label, probed_at DESC) 給「某台主機過去 24h 趨勢」
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS host_health_probes (
+ id BIGSERIAL PRIMARY KEY,
+ probed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+ host_label VARCHAR(64) NOT NULL, -- 'Primary (GCP)' / 'Secondary (GCP)' / 'Fallback (111)'
+ host_url VARCHAR(256) NOT NULL, -- http://34.143.170.20:11434 等
+ healthy BOOLEAN NOT NULL,
+ unhealthy_mark BOOLEAN NOT NULL DEFAULT FALSE, -- 對應 _is_unhealthy(host)
+ models_count INTEGER DEFAULT 0, -- 載入模型數
+ response_ms INTEGER, -- HTTP probe 耗時(ms)
+ error_msg TEXT, -- 失敗時的 exception 文字(截 500 字)
+
+ CONSTRAINT chk_host_label_029
+ CHECK (host_label IN ('Primary (GCP)', 'Secondary (GCP)', 'Fallback (111)'))
+);
+
+CREATE INDEX IF NOT EXISTS idx_host_health_probes_at
+ ON host_health_probes (probed_at DESC);
+
+CREATE INDEX IF NOT EXISTS idx_host_health_probes_label_at
+ ON host_health_probes (host_label, probed_at DESC);
+
+-- 清理舊資料(保留 30 天)— 由 scheduler 每日 03:00 跑:
+-- DELETE FROM host_health_probes WHERE probed_at < NOW() - INTERVAL '30 days';
+
+COMMENT ON TABLE host_health_probes IS
+ '三主機 Ollama 健康歷史;每次 host_health 頁面 render 或 scheduler 5min cron 寫入';
+COMMENT ON COLUMN host_health_probes.host_label IS
+ 'services/ollama_service.py::get_host_label() 對應標籤';
diff --git a/migrations/030_create_ppt_audit_results.sql b/migrations/030_create_ppt_audit_results.sql
new file mode 100644
index 0000000..4ea907a
--- /dev/null
+++ b/migrations/030_create_ppt_audit_results.sql
@@ -0,0 +1,57 @@
+-- =============================================================================
+-- Migration 030: ppt_audit_results — PPT 視覺審核歷史持久化
+-- Operation Ollama-First v5.0 — Phase 38
+-- 日期: 2026-05-04 台北
+-- 對應頁面: /observability/ppt_audit_history
+-- =============================================================================
+-- 說明:
+-- 原本 ppt_audit_history 頁面只 os.listdir(reports/) 列檔,
+-- PPT_VISION minicpm-v 跑出的審核結論(issues_found, confidence)完全遺失。
+-- 本 migration 加表,audit 完一律寫入,方便:
+-- 1. 觀測頁面顯示「audit 結果」而不只「檔案存在」
+-- 2. 趨勢分析(過去 30 天 PPT 通過率?常見 issue 類型?)
+-- 3. Telegram 推播去重(同檔案同問題 7 天內不重推)
+--
+-- 寫入點:
+-- 1. services/ppt_vision_service.py::check_ppt_file 跑完 minicpm-v 後寫
+-- 2. scheduler.py daily 22:00 cron 跑完所有當日 PPT 後 batch 寫
+--
+-- 索引設計:
+-- - (audited_at DESC) 最新 audit
+-- - (pptx_filename) 同檔多次審核
+-- - (audit_status) 篩選 failed only
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS ppt_audit_results (
+ id BIGSERIAL PRIMARY KEY,
+ audited_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+ pptx_filename VARCHAR(256) NOT NULL,
+ pptx_size_kb INTEGER,
+ pptx_mtime TIMESTAMPTZ, -- 檔案本身 mtime(區分同名重生)
+ vision_enabled BOOLEAN NOT NULL, -- audit 當時 PPT_VISION_ENABLED 狀態
+ audit_status VARCHAR(32) NOT NULL, -- 'passed' / 'failed' / 'skipped' / 'error'
+ issues_count INTEGER DEFAULT 0,
+ issues_found JSONB, -- ppt_vision_service 回傳的 issues 陣列
+ confidence NUMERIC(4,3), -- 0-1 minicpm-v 信心度
+ duration_ms INTEGER, -- audit 耗時
+ error_msg TEXT, -- 失敗時的 exception
+ reviewer_notes TEXT, -- 人工補註(admin 介面後續可加)
+
+ CONSTRAINT chk_audit_status_030
+ CHECK (audit_status IN ('passed', 'failed', 'skipped', 'error', 'pending'))
+);
+
+CREATE INDEX IF NOT EXISTS idx_ppt_audit_at
+ ON ppt_audit_results (audited_at DESC);
+
+CREATE INDEX IF NOT EXISTS idx_ppt_audit_filename
+ ON ppt_audit_results (pptx_filename);
+
+CREATE INDEX IF NOT EXISTS idx_ppt_audit_failed
+ ON ppt_audit_results (audited_at DESC)
+ WHERE audit_status = 'failed';
+
+COMMENT ON TABLE ppt_audit_results IS
+ 'PPT 視覺審核結果歷史;by services/ppt_vision_service.py minicpm-v 推論';
+COMMENT ON COLUMN ppt_audit_results.issues_found IS
+ 'JSONB 陣列:[{type, severity, description, slide_index}]';
diff --git a/routes/admin_observability_routes.py b/routes/admin_observability_routes.py
index 07caf07..ead4ef6 100644
--- a/routes/admin_observability_routes.py
+++ b/routes/admin_observability_routes.py
@@ -371,11 +371,12 @@ def budget_update(budget_id: int):
@admin_observability_bp.route('/ppt_audit_history')
@login_required
def ppt_audit_history():
- """掃 reports/ 目錄列近 7 日 .pptx 檔 + 即時跑 audit(如已啟用)"""
+ """掃 reports/ 目錄列近 7 日 .pptx 檔 + 從 ppt_audit_results 表讀 audit 歷史(Phase 38)"""
import os
import time
reports_dir = 'reports'
files = []
+ audit_records = []
error = None
try:
@@ -405,6 +406,37 @@ def ppt_audit_history():
except Exception as e:
error = f'{type(e).__name__}: {str(e)[:200]}'
+ # Phase 38:讀過去 7 日 audit 歷史
+ try:
+ session = get_session()
+ try:
+ audit_rows = session.execute(
+ sa_text("""
+ SELECT audited_at, pptx_filename, audit_status,
+ issues_count, confidence, duration_ms, error_msg
+ FROM ppt_audit_results
+ WHERE audited_at >= NOW() - INTERVAL '7 days'
+ ORDER BY audited_at DESC
+ LIMIT 100
+ """),
+ ).fetchall()
+ audit_records = [
+ {
+ 'audited_at': r[0].strftime('%Y-%m-%d %H:%M'),
+ 'pptx_filename': r[1],
+ 'audit_status': r[2],
+ 'issues_count': int(r[3] or 0),
+ 'confidence': float(r[4] or 0),
+ 'duration_ms': int(r[5] or 0),
+ 'error_msg': r[6],
+ }
+ for r in audit_rows
+ ]
+ finally:
+ session.close()
+ except Exception:
+ pass # 表可能尚未 migration,失敗安全
+
# PPT vision 啟用狀態
try:
from services.ppt_vision_service import is_ppt_vision_enabled
@@ -416,6 +448,7 @@ def ppt_audit_history():
'admin/ppt_audit_history.html',
active_page='obs_ppt_audit',
files=files,
+ audit_records=audit_records,
vision_enabled=vision_enabled,
error=error,
)
@@ -428,8 +461,10 @@ def ppt_audit_history():
@admin_observability_bp.route('/host_health')
@login_required
def host_health_dashboard():
- """三主機 Ollama + 4 個 MCP server 即時健康"""
+ """三主機 Ollama + 4 個 MCP server 即時健康(同時寫入 host_health_probes 留歷史)"""
+ import time as _time
ollama_hosts = []
+ probe_records = [] # 收集本次 probe 結果以批次寫 DB
try:
from services.ollama_service import (
OLLAMA_HOST_PRIMARY, OLLAMA_HOST_SECONDARY, OLLAMA_HOST_FALLBACK,
@@ -443,6 +478,8 @@ def host_health_dashboard():
]:
entry = {'label': label, 'host': host, 'healthy': False,
'unhealthy_mark': _is_unhealthy(host), 'models': []}
+ t0 = _time.monotonic()
+ err = None
try:
resp = _r.get(f"{host.rstrip('/')}/api/tags", timeout=3)
if resp.status_code == 200:
@@ -450,12 +487,44 @@ def host_health_dashboard():
entry['models'] = [
m.get('name', '') for m in resp.json().get('models', [])
][:15]
- except Exception:
- pass
+ else:
+ err = f"HTTP {resp.status_code}"
+ except Exception as e:
+ err = f"{type(e).__name__}: {str(e)[:200]}"
+ response_ms = int((_time.monotonic() - t0) * 1000)
+ probe_records.append({
+ 'host_label': label, 'host_url': host, 'healthy': entry['healthy'],
+ 'unhealthy_mark': entry['unhealthy_mark'],
+ 'models_count': len(entry['models']), 'response_ms': response_ms,
+ 'error_msg': err,
+ })
ollama_hosts.append(entry)
except Exception:
pass
+ # Phase 38:寫入 host_health_probes 留歷史(失敗安全,不擋頁面渲染)
+ if probe_records:
+ try:
+ _session = get_session()
+ try:
+ for rec in probe_records:
+ _session.execute(
+ sa_text("""
+ INSERT INTO host_health_probes
+ (host_label, host_url, healthy, unhealthy_mark,
+ models_count, response_ms, error_msg)
+ VALUES
+ (:host_label, :host_url, :healthy, :unhealthy_mark,
+ :models_count, :response_ms, :error_msg)
+ """),
+ rec,
+ )
+ _session.commit()
+ finally:
+ _session.close()
+ except Exception:
+ pass # DB 寫入失敗不影響頁面顯示
+
# MCP server 健康
mcp_status = {}
try:
@@ -472,10 +541,45 @@ def host_health_dashboard():
except Exception:
pass
+ # Phase 38:讀過去 24h 三主機健康歷史(給趨勢卡片)
+ health_history = []
+ try:
+ _session2 = get_session()
+ try:
+ history_rows = _session2.execute(
+ sa_text("""
+ SELECT host_label,
+ COUNT(*) FILTER (WHERE healthy) AS up_count,
+ COUNT(*) FILTER (WHERE NOT healthy) AS down_count,
+ COALESCE(AVG(response_ms) FILTER (WHERE healthy), 0) AS avg_ms,
+ COUNT(*) AS total
+ FROM host_health_probes
+ WHERE probed_at >= NOW() - INTERVAL '24 hours'
+ GROUP BY host_label
+ ORDER BY host_label
+ """),
+ ).fetchall()
+ health_history = [
+ {
+ 'host_label': r[0],
+ 'up_count': int(r[1] or 0),
+ 'down_count': int(r[2] or 0),
+ 'avg_ms': int(r[3] or 0),
+ 'total': int(r[4] or 0),
+ 'uptime_pct': (float(r[1] or 0) / float(r[4]) * 100) if r[4] else 0,
+ }
+ for r in history_rows
+ ]
+ finally:
+ _session2.close()
+ except Exception:
+ pass # 表可能尚未 migration,失敗安全
+
return render_template(
'admin/host_health.html',
active_page='obs_host_health',
ollama_hosts=ollama_hosts,
mcp_status=mcp_status,
throttle_state=throttle_state,
+ health_history=health_history,
)
diff --git a/services/code_review_pipeline_service.py b/services/code_review_pipeline_service.py
index 3b57a28..5374c77 100644
--- a/services/code_review_pipeline_service.py
+++ b/services/code_review_pipeline_service.py
@@ -204,7 +204,7 @@ class CodeReviewPipeline:
# ── Step 2:Hermes 掃描 ───────────────────────────────────────────────────
def _hermes_scan(self, files: Dict[str, str]) -> List[Dict]:
- """直呼內網 Ollama(http://192.168.0.188:11434),免認證"""
+ """走 resolve_ollama_host() 三主機級聯:GCP-A → GCP-B → 111(ADR-027 Phase 2)"""
try:
import requests as _req
diff --git a/services/ppt_vision_service.py b/services/ppt_vision_service.py
index b822439..5a0819a 100644
--- a/services/ppt_vision_service.py
+++ b/services/ppt_vision_service.py
@@ -149,11 +149,15 @@ class PPTVisionService:
return result
# 2. 對前 N 張跑 check_image
+ import time as _time
+ t0 = _time.monotonic()
+ confidences = []
for idx, png in enumerate(png_files[:max_slides]):
try:
vr = self.check_image(png)
if vr.success:
result['slides_checked'] += 1
+ confidences.append(vr.confidence)
if vr.issues_found:
result['total_issues'] += len(vr.issues_found)
result['issues_by_slide'].append((idx + 1, vr.issues_found))
@@ -161,8 +165,89 @@ class PPTVisionService:
logger.warning(f"[PPTVision] slide {idx+1} check failed: {exc}")
result['success'] = result['slides_checked'] > 0
+ duration_ms = int((_time.monotonic() - t0) * 1000)
+
+ # Phase 38:寫入 ppt_audit_results 留歷史(失敗安全)
+ try:
+ self._persist_audit_result(
+ pptx_path=pptx_path,
+ result=result,
+ avg_confidence=(sum(confidences) / len(confidences)) if confidences else 0.0,
+ duration_ms=duration_ms,
+ )
+ except Exception as e:
+ logger.warning(f"[PPTVision] persist audit result failed: {e}")
+
return result
+ def _persist_audit_result(self, pptx_path: str, result: Dict[str, Any],
+ avg_confidence: float, duration_ms: int) -> None:
+ """Phase 38: 把每次 audit 結果寫入 ppt_audit_results 表。
+
+ 失敗安全:DB 寫入失敗只 log warning,不擋主流程。
+ """
+ import os
+ from datetime import datetime as _dt
+ from sqlalchemy import text as _sa_text
+ from database.manager import get_session
+
+ # 推論 audit_status
+ if result.get('error'):
+ err = result['error']
+ if 'libreoffice not installed' in err or 'PPT_VISION_ENABLED' in err:
+ status = 'skipped'
+ else:
+ status = 'error'
+ elif result.get('total_issues', 0) > 0:
+ status = 'failed'
+ elif result.get('success'):
+ status = 'passed'
+ else:
+ status = 'error'
+
+ # issues_found JSONB 序列化
+ import json as _json
+ issues_json = _json.dumps([
+ {'slide': slide_num, 'issues': issues}
+ for slide_num, issues in result.get('issues_by_slide', [])
+ ], ensure_ascii=False)
+
+ try:
+ size_kb = round(os.path.getsize(pptx_path) / 1024, 1) if os.path.isfile(pptx_path) else None
+ mtime = _dt.fromtimestamp(os.path.getmtime(pptx_path)) if os.path.isfile(pptx_path) else None
+ except OSError:
+ size_kb = None
+ mtime = None
+
+ session = get_session()
+ try:
+ session.execute(
+ _sa_text("""
+ INSERT INTO ppt_audit_results
+ (pptx_filename, pptx_size_kb, pptx_mtime, vision_enabled,
+ audit_status, issues_count, issues_found, confidence,
+ duration_ms, error_msg)
+ VALUES
+ (:fname, :sz, :mt, :ve, :st, :ic, CAST(:if AS JSONB),
+ :cf, :du, :em)
+ """),
+ {
+ 'fname': os.path.basename(pptx_path),
+ 'sz': size_kb,
+ 'mt': mtime,
+ 've': True, # 進到這裡代表 vision 已 enabled
+ 'st': status,
+ 'ic': result.get('total_issues', 0),
+ 'if': issues_json,
+ 'cf': round(avg_confidence, 3),
+ 'du': duration_ms,
+ 'em': result.get('error', None),
+ },
+ )
+ session.commit()
+ finally:
+ session.close()
+
def check_image(self, image_path: str) -> VisionResult:
"""檢查單張 PPT 截圖。
diff --git a/templates/admin/host_health.html b/templates/admin/host_health.html
index e410355..0ed8c5c 100644
--- a/templates/admin/host_health.html
+++ b/templates/admin/host_health.html
@@ -112,8 +112,47 @@
+
+ {% if health_history %}
+
+
+
+
+
+
+ | 角色 |
+ 總探針次數 |
+ 正常次數 |
+ 離線次數 |
+ 在線率 |
+ 平均回應 ms |
+
+
+
+ {% for h in health_history %}
+
+ | {{ h.host_label }} |
+ {{ h.total }} |
+ {{ h.up_count }} |
+ {{ h.down_count }} |
+
+
+ {{ "%.1f"|format(h.uptime_pct) }}%
+
+ |
+ {{ h.avg_ms }} |
+
+ {% endfor %}
+
+
+
+
+ {% endif %}
+
- Operation Ollama-First v5.0 / Phase 29 — 主機健康監控
+ Operation Ollama-First v5.0 / Phase 38 — 主機健康監控(含 24h 歷史)
{% endblock %}
diff --git a/templates/admin/ppt_audit_history.html b/templates/admin/ppt_audit_history.html
index 09d1744..abb01aa 100644
--- a/templates/admin/ppt_audit_history.html
+++ b/templates/admin/ppt_audit_history.html
@@ -19,28 +19,75 @@
{% endif %}
-
-
-
- | 檔名 | 大小 (KB) |
- 修改時間 | 動作 |
-
-
-
- {% for f in files %}
-
- {{ f.name }} |
- {{ f.size_kb }} |
- {{ f.mtime }} |
-
- 由 audit cron 22:00 自動執行
- |
-
- {% else %}
- | 過去 7 日無 PPT 生成 |
- {% endfor %}
-
-
+
+
+
+
+
+
+ | 檔名 | 大小 (KB) |
+ 修改時間 | 動作 |
+
+
+
+ {% for f in files %}
+
+ {{ f.name }} |
+ {{ f.size_kb }} |
+ {{ f.mtime }} |
+
+ 由 audit cron 22:00 自動執行
+ |
+
+ {% else %}
+ | 過去 7 日無 PPT 生成 |
+ {% endfor %}
+
+
+
+
+
+
+
+
+
+
+
+ | 審核時間 | 檔名 | 結果 |
+ 問題數 | 信心度 |
+ 耗時 ms | 錯誤訊息 |
+
+
+
+ {% for r in audit_records %}
+
+ | {{ r.audited_at }} |
+ {{ r.pptx_filename }} |
+
+ {% if r.audit_status == 'passed' %}
+ 通過
+ {% elif r.audit_status == 'failed' %}
+ 有問題
+ {% elif r.audit_status == 'skipped' %}
+ 跳過
+ {% elif r.audit_status == 'error' %}
+ 錯誤
+ {% else %}
+ {{ r.audit_status }}
+ {% endif %}
+ |
+ {{ r.issues_count }} |
+ {{ "%.2f"|format(r.confidence) }} |
+ {{ r.duration_ms }} |
+ {{ (r.error_msg or '')[:80] }} |
+
+ {% else %}
+ | 尚無審核紀錄(migration 030 跑過後即會累積) |
+ {% endfor %}
+
+
+
+
審核結果:有問題才推 Telegram(避免靜默無問題洗版)。