diff --git a/routes/admin_observability_routes.py b/routes/admin_observability_routes.py index 08113b7..e04065c 100644 --- a/routes/admin_observability_routes.py +++ b/routes/admin_observability_routes.py @@ -32,6 +32,221 @@ admin_observability_bp = Blueprint( ) +# ───────────────────────────────────────────────────────────────────────────── +# /observability/overview — Phase 45 總覽(單頁聚合 6 項 KPI) +# ───────────────────────────────────────────────────────────────────────────── + +@admin_observability_bp.route('/') +@admin_observability_bp.route('/overview') +@login_required +def observability_overview(): + """Phase 45 — 觀測台總覽:一頁式聚合 6 個 sub-page 的關鍵 KPI。 + + 對應 Phase 44 daily Telegram summary 的 web 版本,做為 sidebar 入口頁。 + 所有區塊失敗安全:個別 query 失敗只跳過該卡片,不擋整頁渲染。 + """ + from datetime import datetime as _dt + today = _dt.now() + month_start = _dt(today.year, today.month, 1) + summary = {} + + session = get_session() + try: + # 三主機 24h 在線率 + try: + host_rows = session.execute( + sa_text(""" + SELECT host_label, COUNT(*) AS total, + COUNT(*) FILTER (WHERE healthy) AS up, + COALESCE(AVG(response_ms) FILTER (WHERE healthy), 0) AS avg_ms + FROM host_health_probes + WHERE probed_at >= NOW() - INTERVAL '24 hours' + GROUP BY host_label ORDER BY host_label + """), + ).fetchall() + summary['hosts'] = [ + { + 'label': r[0], + 'total': int(r[1] or 0), + 'up': int(r[2] or 0), + 'avg_ms': int(r[3] or 0), + 'uptime_pct': (float(r[2] or 0) / float(r[1]) * 100) if r[1] else 0, + } + for r in host_rows + ] + except Exception: + summary['hosts'] = [] + + # AI 呼叫 24h + try: + ai = session.execute( + sa_text(""" + SELECT COUNT(*), COALESCE(SUM(input_tokens + output_tokens), 0), + COALESCE(SUM(cost_usd), 0), + COUNT(*) FILTER (WHERE status NOT IN ('ok','cache_only')), + COUNT(*) FILTER (WHERE rag_hit), + COUNT(*) FILTER (WHERE cache_hit) + FROM ai_calls + WHERE called_at >= NOW() - INTERVAL '24 hours' + """), + ).fetchone() + total = int(ai[0] or 0) + summary['ai_calls'] = { + 'total': total, + 'tokens': int(ai[1] or 0), + 'cost_24h': float(ai[2] or 0), + 'errors': int(ai[3] or 0), + 'rag_hits': int(ai[4] or 0), + 'cache_hits': int(ai[5] or 0), + 'error_rate': (float(ai[3] or 0) / total * 100) if total else 0, + 'rag_rate': (float(ai[4] or 0) / total * 100) if total else 0, + 'cache_rate': (float(ai[5] or 0) / total * 100) if total else 0, + } + except Exception: + summary['ai_calls'] = {} + + # 當月成本 + try: + month_cost = session.execute( + sa_text("SELECT COALESCE(SUM(cost_usd), 0) FROM ai_calls WHERE called_at >= :ms"), + {'ms': month_start}, + ).fetchone()[0] + summary['month_cost'] = float(month_cost or 0) + except Exception: + summary['month_cost'] = 0 + + # 預算 over 80% + try: + budgets = session.execute( + sa_text(""" + SELECT b.period, b.provider, b.budget_usd, b.alert_pct, + COALESCE(( + SELECT SUM(cost_usd) FROM ai_calls + WHERE called_at >= :ms + AND (b.provider IS NULL OR provider = b.provider) + ), 0) AS spent + FROM ai_call_budgets b + """), + {'ms': month_start}, + ).fetchall() + over_threshold = [] + for r in budgets: + budget = float(r[2] or 0) + spent = float(r[4] or 0) + ratio = spent / budget if budget > 0 else 0 + threshold = float(r[3] or 80) / 100 + if ratio >= threshold: + over_threshold.append({ + 'period': r[0], 'provider': r[1] or '(全部)', + 'spent': spent, 'budget': budget, 'ratio': ratio, + }) + summary['budget_alerts'] = over_threshold + except Exception: + summary['budget_alerts'] = [] + + # 待審 + 蒸餾池 + try: + ep_pending = session.execute( + sa_text("SELECT COUNT(*) FROM learning_episodes WHERE promotion_status = 'awaiting_review' AND reviewed_at IS NULL"), + ).fetchone()[0] + ep_total_30d = session.execute( + sa_text("SELECT COUNT(*) FROM learning_episodes WHERE created_at >= NOW() - INTERVAL '30 days'"), + ).fetchone()[0] + ep_approved_30d = session.execute( + sa_text("SELECT COUNT(*) FROM learning_episodes WHERE created_at >= NOW() - INTERVAL '30 days' AND promotion_status = 'approved'"), + ).fetchone()[0] + summary['episodes'] = { + 'pending': int(ep_pending or 0), + 'total_30d': int(ep_total_30d or 0), + 'approved_30d': int(ep_approved_30d or 0), + 'approval_rate': (float(ep_approved_30d or 0) / float(ep_total_30d) * 100) if ep_total_30d else 0, + } + except Exception: + summary['episodes'] = {} + + # PPT 視覺審核 7d + try: + ppt = session.execute( + sa_text(""" + SELECT COUNT(*), + COUNT(*) FILTER (WHERE audit_status='passed'), + COUNT(*) FILTER (WHERE audit_status='failed') + FROM ppt_audit_results + WHERE audited_at >= NOW() - INTERVAL '7 days' + """), + ).fetchone() + ppt_total = int(ppt[0] or 0) + summary['ppt'] = { + 'total': ppt_total, + 'passed': int(ppt[1] or 0), + 'failed': int(ppt[2] or 0), + 'pass_rate': (float(ppt[1] or 0) / ppt_total * 100) if ppt_total else 0, + } + except Exception: + summary['ppt'] = {} + + # AIOps 7d + try: + inc = session.execute( + sa_text(""" + SELECT COUNT(*), + COUNT(*) FILTER (WHERE status='open'), + COUNT(*) FILTER (WHERE severity IN ('P0','P1')) + FROM incidents + WHERE created_at >= NOW() - INTERVAL '7 days' + """), + ).fetchone() + heal = session.execute( + sa_text(""" + SELECT COUNT(*), + COUNT(*) FILTER (WHERE result='success') + FROM heal_logs + WHERE created_at >= NOW() - INTERVAL '7 days' + """), + ).fetchone() + summary['aiops'] = { + 'incidents_total': int(inc[0] or 0), + 'incidents_open': int(inc[1] or 0), + 'incidents_p0_p1': int(inc[2] or 0), + 'heals_total': int(heal[0] or 0), + 'heals_success': int(heal[1] or 0), + 'heal_rate': (float(heal[1] or 0) / float(heal[0]) * 100) if heal[0] else 0, + } + except Exception: + summary['aiops'] = {} + + # MCP 24h + try: + mcp = session.execute( + sa_text(""" + SELECT COUNT(*), COUNT(DISTINCT server), + COUNT(*) FILTER (WHERE cache_hit), + COALESCE(SUM(cost_usd), 0) + FROM mcp_calls + WHERE called_at >= NOW() - INTERVAL '24 hours' + """), + ).fetchone() + mcp_total = int(mcp[0] or 0) + summary['mcp'] = { + 'total': mcp_total, + 'servers': int(mcp[1] or 0), + 'cache_hits': int(mcp[2] or 0), + 'cost': float(mcp[3] or 0), + 'cache_rate': (float(mcp[2] or 0) / mcp_total * 100) if mcp_total else 0, + } + except Exception: + summary['mcp'] = {} + finally: + session.close() + + return render_template( + 'admin/observability_overview.html', + active_page='obs_overview', + summary=summary, + today=today.strftime('%Y-%m-%d'), + ) + + # ───────────────────────────────────────────────────────────────────────────── # /observability/ai_calls — Phase 27 主入口 # ───────────────────────────────────────────────────────────────────────────── diff --git a/templates/admin/ai_calls_dashboard.html b/templates/admin/ai_calls_dashboard.html index caa56bf..0fc8a96 100644 --- a/templates/admin/ai_calls_dashboard.html +++ b/templates/admin/ai_calls_dashboard.html @@ -1,8 +1,8 @@ -{% extends "base.html" %} +{% extends "ewoooc_base.html" %} {% block title %}AI 呼叫總覽{% endblock %} -{% block content %} +{% block ewooo_content %}

AI 呼叫總覽 過去 {{ hours }} 小時 diff --git a/templates/admin/budget.html b/templates/admin/budget.html index e3780e0..3cffa5e 100644 --- a/templates/admin/budget.html +++ b/templates/admin/budget.html @@ -1,8 +1,8 @@ -{% extends "base.html" %} +{% extends "ewoooc_base.html" %} {% block title %}預算控管{% endblock %} -{% block content %} +{% block ewooo_content %}

預算控管 ai_call_budgets × 當月實際支出即時對比 diff --git a/templates/admin/host_health.html b/templates/admin/host_health.html index 1dd9645..0275243 100644 --- a/templates/admin/host_health.html +++ b/templates/admin/host_health.html @@ -1,8 +1,8 @@ -{% extends "base.html" %} +{% extends "ewoooc_base.html" %} {% block title %}主機健康監控{% endblock %} -{% block content %} +{% block ewooo_content %}

主機健康監控 三主機 Ollama + MCP + 成本節流即時狀態 diff --git a/templates/admin/observability_overview.html b/templates/admin/observability_overview.html new file mode 100644 index 0000000..e73fb99 --- /dev/null +++ b/templates/admin/observability_overview.html @@ -0,0 +1,273 @@ +{% extends "ewoooc_base.html" %} + +{% block title %}觀測台總覽{% endblock %} + +{% block ewooo_content %} +
+

AI 觀測台總覽 + {{ today }} · 全景一頁看(資料來源 8 表跨 JOIN) +

+ + +
+ {% if summary.hosts %} + {% for h in summary.hosts %} +
+
+
+
+
+ {{ h.label }} +

{{ "%.1f"|format(h.uptime_pct) }}%

+ 24h 在線率({{ h.up }}/{{ h.total }} probe) +
+
+ +
{{ h.avg_ms }} ms
+
+
+
+
+
+ {% endfor %} + {% else %} +
+
+ + host_health_probes 表無資料(migration 029 是否已跑?scheduler probe job 是否已啟動?) +
+
+ {% endif %} +
+ + +
+ {% if summary.ai_calls %} +
+
+
+ 24h AI 呼叫 +

{{ "{:,}".format(summary.ai_calls.total) }}

+ Token:{{ "{:,}".format(summary.ai_calls.tokens) }} +
+
+
+
+
+
+ 成本 +

${{ "%.2f"|format(summary.ai_calls.cost_24h) }}/24h

+ 當月累計 ${{ "%.2f"|format(summary.month_cost) }} +
+
+
+
+
+
+ 錯誤率 +

{{ "%.1f"|format(summary.ai_calls.error_rate) }}%

+ {{ summary.ai_calls.errors }} 次失敗 +
+
+
+
+
+
+ RAG 命中率 +

{{ "%.1f"|format(summary.ai_calls.rag_rate) }}%

+ {{ summary.ai_calls.rag_hits }} hits · cache {{ "%.0f"|format(summary.ai_calls.cache_rate) }}% +
+
+
+ {% endif %} +
+ + + {% if summary.budget_alerts %} +
+
+ 預算告警 — 共 {{ summary.budget_alerts|length }} 項超出閾值 +
+
+ + + + + + {% for b in summary.budget_alerts %} + + + + + + + + {% endfor %} + +
週期供應商已花費預算使用率
{{ b.period }}{{ b.provider }}${{ "%.2f"|format(b.spent) }}${{ "%.2f"|format(b.budget) }} + + {{ "%.0f"|format(b.ratio * 100) }}% + +
+
+ +
+ {% endif %} + + +
+ {% if summary.aiops %} +
+
+
AIOps 自癒 7d
+
+
+
事件總數

{{ summary.aiops.incidents_total }}

+
未解決

{{ summary.aiops.incidents_open }}

+
P0/P1

{{ summary.aiops.incidents_p0_p1 }}

+
自癒成功率

{{ "%.0f"|format(summary.aiops.heal_rate) }}%

+
+
+ +
+
+ {% endif %} + + {% if summary.mcp %} +
+
+
MCP 24h
+
+
+
tool 呼叫

{{ "{:,}".format(summary.mcp.total) }}

+
使用 server

{{ summary.mcp.servers }}

+
cache 命中

{{ summary.mcp.cache_hits }}

+
成本

${{ "%.4f"|format(summary.mcp.cost) }}

+
+
+ +
+
+ {% endif %} + + {% if summary.episodes %} +
+
+
RAG 學習鏈路 30d
+
+
+
待審核

{{ summary.episodes.pending }}

+
總 episodes

{{ summary.episodes.total_30d }}

+
已晉升

{{ summary.episodes.approved_30d }}

+
晉升率

{{ "%.0f"|format(summary.episodes.approval_rate) }}%

+
+
+ +
+
+ {% endif %} +
+ + + {% if summary.ppt and summary.ppt.total > 0 %} +
+
PPT 視覺審核 7d
+
+
+
總筆數

{{ summary.ppt.total }}

+
通過

{{ summary.ppt.passed }}

+
失敗

{{ summary.ppt.failed }}

+
通過率

{{ "%.0f"|format(summary.ppt.pass_rate) }}%

+
+
+ +
+ {% endif %} + + + + +

+ Operation Ollama-First v5.0 / Phase 45 — AI 觀測台總覽 + (資料來源:host_health_probes / ai_calls / ai_call_budgets / learning_episodes / ai_insights / + rag_query_log / mcp_calls / incidents / heal_logs / ppt_audit_results) +

+
+{% endblock %} diff --git a/templates/admin/ppt_audit_history.html b/templates/admin/ppt_audit_history.html index 532ce87..9eb1002 100644 --- a/templates/admin/ppt_audit_history.html +++ b/templates/admin/ppt_audit_history.html @@ -1,8 +1,8 @@ -{% extends "base.html" %} +{% extends "ewoooc_base.html" %} {% block title %}PPT 視覺審核歷史{% endblock %} -{% block content %} +{% block ewooo_content %}

PPT 視覺審核歷史 reports/ 目錄過去 7 日 .pptx diff --git a/templates/admin/promotion_review.html b/templates/admin/promotion_review.html index 55f38cc..c6693f3 100644 --- a/templates/admin/promotion_review.html +++ b/templates/admin/promotion_review.html @@ -1,8 +1,8 @@ -{% extends "base.html" %} +{% extends "ewoooc_base.html" %} {% block title %}RAG 學習晉升審核{% endblock %} -{% block content %} +{% block ewooo_content %}

RAG 學習晉升審核 待審核 × {{ episodes|length }} 筆 · 知識庫 ai_insights × {{ kb_size or 0 }} 筆 diff --git a/templates/admin/quality_trend.html b/templates/admin/quality_trend.html index b85f092..ab214ae 100644 --- a/templates/admin/quality_trend.html +++ b/templates/admin/quality_trend.html @@ -1,8 +1,8 @@ -{% extends "base.html" %} +{% extends "ewoooc_base.html" %} {% block title %}Caller 反饋趨勢{% endblock %} -{% block content %} +{% block ewooo_content %}

Caller 反饋趨勢 過去 {{ days }} 日 diff --git a/templates/components/_ewoooc_shell.html b/templates/components/_ewoooc_shell.html index efec0a7..7b0cd64 100644 --- a/templates/components/_ewoooc_shell.html +++ b/templates/components/_ewoooc_shell.html @@ -74,12 +74,51 @@

+ +