diff --git a/routes/admin_observability_routes.py b/routes/admin_observability_routes.py index 08113b7..e04065c 100644 --- a/routes/admin_observability_routes.py +++ b/routes/admin_observability_routes.py @@ -32,6 +32,221 @@ admin_observability_bp = Blueprint( ) +# ───────────────────────────────────────────────────────────────────────────── +# /observability/overview — Phase 45 總覽(單頁聚合 6 項 KPI) +# ───────────────────────────────────────────────────────────────────────────── + +@admin_observability_bp.route('/') +@admin_observability_bp.route('/overview') +@login_required +def observability_overview(): + """Phase 45 — 觀測台總覽:一頁式聚合 6 個 sub-page 的關鍵 KPI。 + + 對應 Phase 44 daily Telegram summary 的 web 版本,做為 sidebar 入口頁。 + 所有區塊失敗安全:個別 query 失敗只跳過該卡片,不擋整頁渲染。 + """ + from datetime import datetime as _dt + today = _dt.now() + month_start = _dt(today.year, today.month, 1) + summary = {} + + session = get_session() + try: + # 三主機 24h 在線率 + try: + host_rows = session.execute( + sa_text(""" + SELECT host_label, COUNT(*) AS total, + COUNT(*) FILTER (WHERE healthy) AS up, + COALESCE(AVG(response_ms) FILTER (WHERE healthy), 0) AS avg_ms + FROM host_health_probes + WHERE probed_at >= NOW() - INTERVAL '24 hours' + GROUP BY host_label ORDER BY host_label + """), + ).fetchall() + summary['hosts'] = [ + { + 'label': r[0], + 'total': int(r[1] or 0), + 'up': int(r[2] or 0), + 'avg_ms': int(r[3] or 0), + 'uptime_pct': (float(r[2] or 0) / float(r[1]) * 100) if r[1] else 0, + } + for r in host_rows + ] + except Exception: + summary['hosts'] = [] + + # AI 呼叫 24h + try: + ai = session.execute( + sa_text(""" + SELECT COUNT(*), COALESCE(SUM(input_tokens + output_tokens), 0), + COALESCE(SUM(cost_usd), 0), + COUNT(*) FILTER (WHERE status NOT IN ('ok','cache_only')), + COUNT(*) FILTER (WHERE rag_hit), + COUNT(*) FILTER (WHERE cache_hit) + FROM ai_calls + WHERE called_at >= NOW() - INTERVAL '24 hours' + """), + ).fetchone() + total = int(ai[0] or 0) + summary['ai_calls'] = { + 'total': total, + 'tokens': int(ai[1] or 0), + 'cost_24h': float(ai[2] or 0), + 'errors': int(ai[3] or 0), + 'rag_hits': int(ai[4] or 0), + 'cache_hits': int(ai[5] or 0), + 'error_rate': (float(ai[3] or 0) / total * 100) if total else 0, + 'rag_rate': (float(ai[4] or 0) / total * 100) if total else 0, + 'cache_rate': (float(ai[5] or 0) / total * 100) if total else 0, + } + except Exception: + summary['ai_calls'] = {} + + # 當月成本 + try: + month_cost = session.execute( + sa_text("SELECT COALESCE(SUM(cost_usd), 0) FROM ai_calls WHERE called_at >= :ms"), + {'ms': month_start}, + ).fetchone()[0] + summary['month_cost'] = float(month_cost or 0) + except Exception: + summary['month_cost'] = 0 + + # 預算 over 80% + try: + budgets = session.execute( + sa_text(""" + SELECT b.period, b.provider, b.budget_usd, b.alert_pct, + COALESCE(( + SELECT SUM(cost_usd) FROM ai_calls + WHERE called_at >= :ms + AND (b.provider IS NULL OR provider = b.provider) + ), 0) AS spent + FROM ai_call_budgets b + """), + {'ms': month_start}, + ).fetchall() + over_threshold = [] + for r in budgets: + budget = float(r[2] or 0) + spent = float(r[4] or 0) + ratio = spent / budget if budget > 0 else 0 + threshold = float(r[3] or 80) / 100 + if ratio >= threshold: + over_threshold.append({ + 'period': r[0], 'provider': r[1] or '(全部)', + 'spent': spent, 'budget': budget, 'ratio': ratio, + }) + summary['budget_alerts'] = over_threshold + except Exception: + summary['budget_alerts'] = [] + + # 待審 + 蒸餾池 + try: + ep_pending = session.execute( + sa_text("SELECT COUNT(*) FROM learning_episodes WHERE promotion_status = 'awaiting_review' AND reviewed_at IS NULL"), + ).fetchone()[0] + ep_total_30d = session.execute( + sa_text("SELECT COUNT(*) FROM learning_episodes WHERE created_at >= NOW() - INTERVAL '30 days'"), + ).fetchone()[0] + ep_approved_30d = session.execute( + sa_text("SELECT COUNT(*) FROM learning_episodes WHERE created_at >= NOW() - INTERVAL '30 days' AND promotion_status = 'approved'"), + ).fetchone()[0] + summary['episodes'] = { + 'pending': int(ep_pending or 0), + 'total_30d': int(ep_total_30d or 0), + 'approved_30d': int(ep_approved_30d or 0), + 'approval_rate': (float(ep_approved_30d or 0) / float(ep_total_30d) * 100) if ep_total_30d else 0, + } + except Exception: + summary['episodes'] = {} + + # PPT 視覺審核 7d + try: + ppt = session.execute( + sa_text(""" + SELECT COUNT(*), + COUNT(*) FILTER (WHERE audit_status='passed'), + COUNT(*) FILTER (WHERE audit_status='failed') + FROM ppt_audit_results + WHERE audited_at >= NOW() - INTERVAL '7 days' + """), + ).fetchone() + ppt_total = int(ppt[0] or 0) + summary['ppt'] = { + 'total': ppt_total, + 'passed': int(ppt[1] or 0), + 'failed': int(ppt[2] or 0), + 'pass_rate': (float(ppt[1] or 0) / ppt_total * 100) if ppt_total else 0, + } + except Exception: + summary['ppt'] = {} + + # AIOps 7d + try: + inc = session.execute( + sa_text(""" + SELECT COUNT(*), + COUNT(*) FILTER (WHERE status='open'), + COUNT(*) FILTER (WHERE severity IN ('P0','P1')) + FROM incidents + WHERE created_at >= NOW() - INTERVAL '7 days' + """), + ).fetchone() + heal = session.execute( + sa_text(""" + SELECT COUNT(*), + COUNT(*) FILTER (WHERE result='success') + FROM heal_logs + WHERE created_at >= NOW() - INTERVAL '7 days' + """), + ).fetchone() + summary['aiops'] = { + 'incidents_total': int(inc[0] or 0), + 'incidents_open': int(inc[1] or 0), + 'incidents_p0_p1': int(inc[2] or 0), + 'heals_total': int(heal[0] or 0), + 'heals_success': int(heal[1] or 0), + 'heal_rate': (float(heal[1] or 0) / float(heal[0]) * 100) if heal[0] else 0, + } + except Exception: + summary['aiops'] = {} + + # MCP 24h + try: + mcp = session.execute( + sa_text(""" + SELECT COUNT(*), COUNT(DISTINCT server), + COUNT(*) FILTER (WHERE cache_hit), + COALESCE(SUM(cost_usd), 0) + FROM mcp_calls + WHERE called_at >= NOW() - INTERVAL '24 hours' + """), + ).fetchone() + mcp_total = int(mcp[0] or 0) + summary['mcp'] = { + 'total': mcp_total, + 'servers': int(mcp[1] or 0), + 'cache_hits': int(mcp[2] or 0), + 'cost': float(mcp[3] or 0), + 'cache_rate': (float(mcp[2] or 0) / mcp_total * 100) if mcp_total else 0, + } + except Exception: + summary['mcp'] = {} + finally: + session.close() + + return render_template( + 'admin/observability_overview.html', + active_page='obs_overview', + summary=summary, + today=today.strftime('%Y-%m-%d'), + ) + + # ───────────────────────────────────────────────────────────────────────────── # /observability/ai_calls — Phase 27 主入口 # ───────────────────────────────────────────────────────────────────────────── diff --git a/templates/admin/ai_calls_dashboard.html b/templates/admin/ai_calls_dashboard.html index caa56bf..0fc8a96 100644 --- a/templates/admin/ai_calls_dashboard.html +++ b/templates/admin/ai_calls_dashboard.html @@ -1,8 +1,8 @@ -{% extends "base.html" %} +{% extends "ewoooc_base.html" %} {% block title %}AI 呼叫總覽{% endblock %} -{% block content %} +{% block ewooo_content %}
| 週期 | 供應商 | 已花費 | 預算 | 使用率 |
|---|---|---|---|---|
| {{ b.period }} | +{{ b.provider }} |
+ ${{ "%.2f"|format(b.spent) }} | +${{ "%.2f"|format(b.budget) }} | ++ + {{ "%.0f"|format(b.ratio * 100) }}% + + | +
+ Operation Ollama-First v5.0 / Phase 45 — AI 觀測台總覽 + (資料來源:host_health_probes / ai_calls / ai_call_budgets / learning_episodes / ai_insights / + rag_query_log / mcp_calls / incidents / heal_logs / ppt_audit_results) +
+