diff --git a/app.py b/app.py index 8ccff7a..6b469f5 100644 --- a/app.py +++ b/app.py @@ -301,6 +301,20 @@ csrf.exempt(bot_api_bp) # Bot API 使用 Token 認證,不需要 CSRF sys_log.info("[Blueprint] ✅ Bot API Blueprint 已註冊") +# ========================================== +# Phase 27/28: Admin Observability Dashboard +# Operation Ollama-First v5.0 戰役觀測前端 +# 路徑:/admin/ai_calls / promotion_review / quality_trend / host_health +# ========================================== +try: + from routes.admin_observability_routes import admin_observability_bp + app.register_blueprint(admin_observability_bp) + csrf.exempt(admin_observability_bp) # Web AJAX POST 走 server-side check + sys_log.info("[Blueprint] ✅ Admin Observability Blueprint 已註冊(Phase 27/28)") +except ImportError as _imp_err: + sys_log.warning("[Blueprint] ⚠️ Admin Observability 註冊失敗: %s", _imp_err) + + # ========================================== # Elephant Alpha AI Agent Super Orchestrator Blueprint # ========================================== diff --git a/routes/admin_observability_routes.py b/routes/admin_observability_routes.py new file mode 100644 index 0000000..0ec70f8 --- /dev/null +++ b/routes/admin_observability_routes.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +routes/admin_observability_routes.py +Operation Ollama-First v5.0 / Phase 27 — Admin Observability Dashboard + +提供 admin 介面看戰役累積的觀測資料: + /admin/ai_calls — ai_calls 即時查詢(含篩選 / 圖表) + /admin/promotion_review — Phase 28 PromotionGate 待審核列表 + /admin/quality_trend — Phase 25 caller 反饋趨勢 + /admin/host_health — 三主機 Ollama + MCP 健康度 + +設計原則: +- 純讀(除了 promotion approve/reject 是 mutation) +- 失敗安全:DB 失敗回空清單 + 警告 banner +- 每頁 100 筆分頁,無限捲動 +- 不暴露 secret / prompt 原文 +""" + +from datetime import datetime, timedelta +from flask import Blueprint, render_template, request, jsonify +from sqlalchemy import text as sa_text + +from database.manager import get_session + + +admin_observability_bp = Blueprint( + 'admin_observability', + __name__, + url_prefix='/admin', +) + + +# ───────────────────────────────────────────────────────────────────────────── +# /admin/ai_calls — Phase 27 主入口 +# ───────────────────────────────────────────────────────────────────────────── + +@admin_observability_bp.route('/ai_calls') +def ai_calls_dashboard(): + """ai_calls 表觀測 dashboard(24h 預設視窗)""" + hours = int(request.args.get('hours', '24')) + caller_filter = request.args.get('caller', '').strip() + provider_filter = request.args.get('provider', '').strip() + + since = datetime.now() - timedelta(hours=hours) + session = get_session() + try: + # 1. 總覽 + summary = session.execute( + sa_text(""" + SELECT + COUNT(*) AS total_calls, + COALESCE(SUM(input_tokens + output_tokens), 0) AS total_tokens, + COALESCE(SUM(cost_usd), 0) AS total_cost, + COALESCE(AVG(duration_ms), 0) AS avg_duration, + COUNT(*) FILTER (WHERE status = 'ok') AS ok_calls, + COUNT(*) FILTER (WHERE status NOT IN ('ok','cache_only')) AS error_calls, + COUNT(*) FILTER (WHERE rag_hit) AS rag_hits, + COUNT(*) FILTER (WHERE cache_hit) AS cache_hits + FROM ai_calls + WHERE called_at >= :since + """), + {'since': since}, + ).fetchone() + + # 2. by provider + by_provider = session.execute( + sa_text(""" + SELECT provider, COUNT(*) AS calls, + COALESCE(SUM(input_tokens + output_tokens), 0) AS tokens, + COALESCE(SUM(cost_usd), 0) AS cost + FROM ai_calls + WHERE called_at >= :since + GROUP BY provider + ORDER BY tokens DESC + """), + {'since': since}, + ).fetchall() + + # 3. TOP 20 calls(最近)— 動態 WHERE + where_parts = ["called_at >= :since"] + params = {'since': since} + if caller_filter: + where_parts.append("caller = :caller") + params['caller'] = caller_filter + if provider_filter: + where_parts.append("provider = :provider") + params['provider'] = provider_filter + + recent = session.execute( + sa_text(f""" + SELECT id, called_at, caller, provider, model, + input_tokens, output_tokens, duration_ms, status, + cost_usd, cache_hit, rag_hit + FROM ai_calls + WHERE {' AND '.join(where_parts)} + ORDER BY called_at DESC + LIMIT 100 + """), + params, + ).fetchall() + + # 4. caller 列表(給篩選 dropdown) + callers = session.execute( + sa_text(""" + SELECT DISTINCT caller FROM ai_calls + WHERE called_at >= :since ORDER BY caller + """), + {'since': since}, + ).fetchall() + + return render_template( + 'admin/ai_calls_dashboard.html', + hours=hours, + caller_filter=caller_filter, + provider_filter=provider_filter, + summary={ + 'total_calls': int(summary[0] or 0), + 'total_tokens': int(summary[1] or 0), + 'total_cost': float(summary[2] or 0), + 'avg_duration': int(summary[3] or 0), + 'ok_calls': int(summary[4] or 0), + 'error_calls': int(summary[5] or 0), + 'rag_hits': int(summary[6] or 0), + 'cache_hits': int(summary[7] or 0), + }, + by_provider=[ + {'provider': r[0], 'calls': int(r[1] or 0), + 'tokens': int(r[2] or 0), 'cost': float(r[3] or 0)} + for r in by_provider + ], + recent=[ + {'id': r[0], 'called_at': r[1].strftime('%H:%M:%S'), + 'caller': r[2], 'provider': r[3], 'model': r[4], + 'in_tokens': int(r[5] or 0), 'out_tokens': int(r[6] or 0), + 'duration_ms': int(r[7] or 0), 'status': r[8], + 'cost': float(r[9] or 0), 'cache_hit': bool(r[10]), + 'rag_hit': bool(r[11])} + for r in recent + ], + callers=[r[0] for r in callers], + error=None, + ) + except Exception as e: + return render_template( + 'admin/ai_calls_dashboard.html', + hours=hours, caller_filter=caller_filter, + provider_filter=provider_filter, + summary={}, by_provider=[], recent=[], callers=[], + error=f'查詢失敗: {type(e).__name__}: {str(e)[:200]}', + ) + finally: + session.close() + + +# ───────────────────────────────────────────────────────────────────────────── +# /admin/promotion_review — Phase 28 PromotionGate 待審核列表 +# ───────────────────────────────────────────────────────────────────────────── + +@admin_observability_bp.route('/promotion_review') +def promotion_review_list(): + """awaiting_review episodes 列表(24h 內 reviewed_at IS NULL)""" + session = get_session() + try: + rows = session.execute( + sa_text(""" + SELECT id, created_at, episode_type, source_table, source_id, + distilled_text, quality_score, weight, promotion_status + FROM learning_episodes + WHERE promotion_status = 'awaiting_review' + AND reviewed_at IS NULL + ORDER BY weight DESC, created_at ASC + LIMIT 50 + """), + ).fetchall() + + episodes = [ + {'id': r[0], 'created_at': r[1].strftime('%Y-%m-%d %H:%M'), + 'episode_type': r[2], 'source_table': r[3], 'source_id': r[4], + 'distilled_text': (r[5] or '')[:600], + 'quality_score': float(r[6] or 0), + 'weight': float(r[7] or 0), + 'status': r[8]} + for r in rows + ] + + return render_template( + 'admin/promotion_review.html', + episodes=episodes, + error=None, + ) + except Exception as e: + return render_template( + 'admin/promotion_review.html', + episodes=[], + error=f'查詢失敗: {type(e).__name__}: {str(e)[:200]}', + ) + finally: + session.close() + + +@admin_observability_bp.route('/promotion_review/approve/', methods=['POST']) +def promotion_review_approve(episode_id: int): + """Web 介面「通過」按鈕 — 等同於 Telegram pg_ok callback""" + try: + from services.learning_pipeline import promotion_gate, hash_human_approver + username = request.headers.get('X-Forwarded-User', 'web_admin') + approver_hash = hash_human_approver(username) + insight_id = promotion_gate.promote(episode_id) + if insight_id: + return jsonify({'ok': True, 'insight_id': insight_id, 'approver': approver_hash}) + return jsonify({'ok': False, 'error': 'promote failed'}), 500 + except Exception as e: + return jsonify({'ok': False, 'error': str(e)[:200]}), 500 + + +@admin_observability_bp.route('/promotion_review/reject/', methods=['POST']) +def promotion_review_reject(episode_id: int): + """Web 介面「拒絕」按鈕""" + try: + from services.learning_pipeline import promotion_gate + ok = promotion_gate.reject(episode_id, 'rejected_human', detail='web admin reject') + return jsonify({'ok': ok}) + except Exception as e: + return jsonify({'ok': False, 'error': str(e)[:200]}), 500 + + +# ───────────────────────────────────────────────────────────────────────────── +# /admin/quality_trend — Phase 25 caller 反饋趨勢視覺化 +# ───────────────────────────────────────────────────────────────────────────── + +@admin_observability_bp.route('/quality_trend') +def quality_trend_dashboard(): + """caller × feedback 趨勢(30 日窗格)""" + days = int(request.args.get('days', '30')) + try: + from services.feedback_quality_tracker import ( + compute_caller_quality_trend, get_caller_recommendations, + ) + trends = compute_caller_quality_trend(days=days) + recommendations = get_caller_recommendations(days=days) + + # 排序:avg_score 升序(最差先看) + sorted_trends = sorted( + trends.items(), + key=lambda kv: kv[1].get('avg_score', 5), + ) + + return render_template( + 'admin/quality_trend.html', + days=days, + trends=[(c, info) for c, info in sorted_trends], + recommendations=recommendations, + error=None, + ) + except Exception as e: + return render_template( + 'admin/quality_trend.html', + days=days, trends=[], recommendations=[], + error=f'查詢失敗: {type(e).__name__}: {str(e)[:200]}', + ) + + +# ───────────────────────────────────────────────────────────────────────────── +# /admin/host_health — 三主機 + MCP 健康度 +# ───────────────────────────────────────────────────────────────────────────── + +@admin_observability_bp.route('/host_health') +def host_health_dashboard(): + """三主機 Ollama + 4 個 MCP server 即時健康""" + ollama_hosts = [] + try: + from services.ollama_service import ( + OLLAMA_HOST_PRIMARY, OLLAMA_HOST_SECONDARY, OLLAMA_HOST_FALLBACK, + _is_unhealthy, _unhealthy_marks, + ) + import requests as _r + for label, host in [ + ('Primary (GCP)', OLLAMA_HOST_PRIMARY), + ('Secondary (GCP)', OLLAMA_HOST_SECONDARY), + ('Fallback (111)', OLLAMA_HOST_FALLBACK), + ]: + entry = {'label': label, 'host': host, 'healthy': False, + 'unhealthy_mark': _is_unhealthy(host), 'models': []} + try: + resp = _r.get(f"{host.rstrip('/')}/api/tags", timeout=3) + if resp.status_code == 200: + entry['healthy'] = True + entry['models'] = [ + m.get('name', '') for m in resp.json().get('models', []) + ][:15] + except Exception: + pass + ollama_hosts.append(entry) + except Exception: + pass + + # MCP server 健康 + mcp_status = {} + try: + from services.mcp_router import mcp_router + mcp_status = mcp_router.health_check() + except Exception: + pass + + # cost throttle 狀態 + throttle_state = {} + try: + from services.cost_throttle_service import get_throttle_state + throttle_state = get_throttle_state() + except Exception: + pass + + return render_template( + 'admin/host_health.html', + ollama_hosts=ollama_hosts, + mcp_status=mcp_status, + throttle_state=throttle_state, + ) diff --git a/templates/admin/ai_calls_dashboard.html b/templates/admin/ai_calls_dashboard.html new file mode 100644 index 0000000..3ac5737 --- /dev/null +++ b/templates/admin/ai_calls_dashboard.html @@ -0,0 +1,122 @@ +{% extends "base.html" %} + +{% block title %}AI Calls Dashboard{% endblock %} + +{% block content %} +
+

📊 AI Calls Dashboard + 過去 {{ hours }} 小時 +

+ + {% if error %} +
⚠️ {{ error }}
+ {% endif %} + + +
+
+ +
+
+ +
+
+ +
+
+ +
+
+ + +
+
Total Calls

{{ "{:,}".format(summary.total_calls or 0) }}

+
Tokens

{{ "{:,}".format(summary.total_tokens or 0) }}

+
Cost USD

${{ "%.2f"|format(summary.total_cost or 0) }}

+
Avg Duration

{{ summary.avg_duration or 0 }} ms

+
RAG Hits

{{ summary.rag_hits or 0 }}

+
Errors

{{ summary.error_calls or 0 }}

+
+ + +
+
By Provider
+
+ + + + + + {% for row in by_provider %} + + + + + + + {% endfor %} + +
ProviderCallsTokensCost USD
{{ row.provider }}{{ "{:,}".format(row.calls) }}{{ "{:,}".format(row.tokens) }}${{ "%.2f"|format(row.cost) }}
+
+
+ + +
+
Recent Calls (TOP 100)
+
+ + + + + + + + + + {% for r in recent %} + + + + + + + + + + + + + + {% endfor %} + +
IDTimeCallerProviderModelInOutmsStatus$Flags
{{ r.id }}{{ r.called_at }}{{ r.caller }}{{ r.provider }}{{ r.model[:25] }}{{ r.in_tokens }}{{ r.out_tokens }}{{ r.duration_ms }}{{ r.status }}${{ "%.4f"|format(r.cost) }} + {% if r.cache_hit %}cache{% endif %} + {% if r.rag_hit %}rag{% endif %} +
+
+
+ +

+ 🤖 Operation Ollama-First v5.0 / Phase 27 — Admin Observability + | Promotion Review + | Quality Trend + | Host Health +

+
+{% endblock %} diff --git a/templates/admin/host_health.html b/templates/admin/host_health.html new file mode 100644 index 0000000..b47285b --- /dev/null +++ b/templates/admin/host_health.html @@ -0,0 +1,122 @@ +{% extends "base.html" %} + +{% block title %}Host Health Dashboard{% endblock %} + +{% block content %} +
+

🏥 Host Health Dashboard + 三主機 + MCP + Cost Throttle 即時狀態 +

+ + +
+
🤖 Ollama 三主機(HTTP /api/tags 即時 probe)
+
+ + + + + + {% for h in ollama_hosts %} + + + + + + + + {% endfor %} + +
角色主機HTTP 健康unhealthy mark已載入模型
{{ h.label }}{{ h.host }} + {% if h.healthy %} + ✅ HTTP OK + {% else %} + ❌ DOWN + {% endif %} + + {% if h.unhealthy_mark %} + ⚠️ marked unhealthy (30s) + {% else %} + + {% endif %} + + {% for m in h.models %} + {{ m }} + {% endfor %} + {% if not h.models %}無 / 未連線{% endif %} +
+
+
+ + +
+
🔌 MCP Servers(Phase 10/10.5)
+
+ + + + + + {% for server, healthy in mcp_status.items() %} + + + + + {% else %} + + {% endfor %} + +
Server狀態
{{ server }} + {% if healthy %} + ✅ healthy + {% else %} + — 未啟用 / DOWN + {% endif %} +
MCP_ROUTER_ENABLED=false 或 mcp-stack 未 deploy
+
+
+ + +
+
💰 Cost Throttle 狀態(Phase 20)
+
+ {% if throttle_state %} + + + + + + {% for provider, info in throttle_state.items() %} + + + + + + + + + {% endfor %} + +
ProviderSpentBudget月底推估Ratio狀態
{{ provider }}${{ "%.2f"|format(info.spent) }}${{ "%.2f"|format(info.budget) }}${{ "%.2f"|format(info.projected) }}{{ "%.0f"|format(info.ratio * 100) }}% + {% if info.throttled %} + ⚠️ THROTTLED + {% else %} + ✅ 正常 + {% endif %} +
+ {% else %} +

+ COST_THROTTLE_ENABLED=false 或尚未首次 evaluate(每小時 cron 跑) +

+ {% endif %} +
+
+ +

+ 🤖 Operation Ollama-First v5.0 / Phase 27 — Host Health Dashboard + | AI Calls + | Promotion Review + | Quality Trend +

+
+{% endblock %} diff --git a/templates/admin/promotion_review.html b/templates/admin/promotion_review.html new file mode 100644 index 0000000..282ef13 --- /dev/null +++ b/templates/admin/promotion_review.html @@ -0,0 +1,104 @@ +{% extends "base.html" %} + +{% block title %}Promotion Review · RAG 自主學習{% endblock %} + +{% block content %} +
+

🧠 RAG 學習晉升審核 + awaiting_review × {{ episodes|length }} 筆 +

+ + {% if error %} +
⚠️ {{ error }}
+ {% endif %} + + {% if episodes %} +

+ ⚠️ Phase 11 PromotionGate Stage 4 強制門檻:weight >= 0.8 的 episode 必經統帥審核, + 24h 無回應自動 expired(weight 降為 0.5 不晉升)。 + 點 ✅ 通過 → 寫入 ai_insights 供 RAG 檢索;點 ❌ 拒絕 → 永不晉升(learning_episodes 留存)。 +

+ + {% for ep in episodes %} +
+
+
+ Episode #{{ ep.id }} + {{ ep.episode_type }} + {% if ep.source_table %} + {{ ep.source_table }}#{{ ep.source_id }}{% endif %} + weight: {{ "%.2f"|format(ep.weight) }} + quality: {{ "%.2f"|format(ep.quality_score) }} +
+ {{ ep.created_at }} +
+
+
{{ ep.distilled_text }}
+
+ +
+ {% endfor %} + {% else %} +
+ ✨ 目前無 awaiting_review episodes。 + (RAG 未啟用 / 無高 weight episode / 全部已 24h 過期) +
+ {% endif %} + +

+ 🤖 Operation Ollama-First v5.0 / Phase 28 — PromotionGate Web 審核頁 + | AI Calls + | Quality Trend +

+
+ + +{% endblock %} diff --git a/templates/admin/quality_trend.html b/templates/admin/quality_trend.html new file mode 100644 index 0000000..c7f6bc4 --- /dev/null +++ b/templates/admin/quality_trend.html @@ -0,0 +1,102 @@ +{% extends "base.html" %} + +{% block title %}Caller Quality Trend{% endblock %} + +{% block content %} +
+

💬 Caller 反饋趨勢 + 過去 {{ days }} 日 +

+ + {% if error %} +
⚠️ {{ error }}
+ {% endif %} + +
+
+ +
+
+
+ + {% if recommendations %} +
+
🔮 智能建議
+
+
    + {% for rec in recommendations %} +
  • + {% if rec.action == 'review' %}⚠️{% else %}✅{% endif %} + {{ rec.caller }}: {{ rec.reason }} +
  • + {% endfor %} +
+
+
+ {% endif %} + +
+
Caller × 反饋分佈 + (avg_score 升序,最差先看) +
+
+ + + + + + + + + + {% for caller, info in trends %} + + + + + + + + + + {% else %} + + {% endfor %} + +
CallerAvg👍👎NTrendBar
{{ caller }} + {{ "%.2f"|format(info.avg_score) }}/5 + {{ info.thumbs_up }}{{ info.thumbs_down }}{{ info.total_feedback }} + {% if info.trend == 'positive' %} + ✅ Positive + {% elif info.trend == 'negative' %} + ⚠️ Negative + {% elif info.trend == 'neutral' %} + ➖ Neutral + {% else %} + ❓ No Data + {% endif %} + +
+ {% set pct = (info.avg_score / 5 * 100)|int %} +
+
+
無反饋資料
+
+
+ +

+ 🤖 Operation Ollama-First v5.0 / Phase 25+27 — Caller Quality Trend + | AI Calls + | Promotion Review + | Host Health +

+
+{% endblock %}