diff --git a/routes/admin_observability_routes.py b/routes/admin_observability_routes.py index ead4ef6..b434d95 100644 --- a/routes/admin_observability_routes.py +++ b/routes/admin_observability_routes.py @@ -109,6 +109,33 @@ def ai_calls_dashboard(): {'since': since}, ).fetchall() + # 5. Phase 39 D-3: caller × RAG 命中率 × MCP 編排率(跨表 JOIN) + # 展現「AI 自動化專業」核心:每個 caller 多大比例走了 RAG / MCP + caller_richness = session.execute( + sa_text(""" + SELECT a.caller, + COUNT(*) AS total_calls, + COUNT(*) FILTER (WHERE a.rag_hit) AS rag_hits, + COUNT(DISTINCT m.request_id) AS mcp_orchestrated, + COALESCE(AVG(rl.feedback_score) FILTER (WHERE rl.feedback_score IS NOT NULL), 0) + AS avg_rag_feedback, + COUNT(rl.feedback_score) AS feedback_count + FROM ai_calls a + LEFT JOIN mcp_calls m + ON m.request_id = a.request_id + AND m.called_at >= :since + LEFT JOIN rag_query_log rl + ON rl.caller = a.caller + AND rl.queried_at >= :since + WHERE a.called_at >= :since + GROUP BY a.caller + HAVING COUNT(*) >= 5 + ORDER BY total_calls DESC + LIMIT 12 + """), + {'since': since}, + ).fetchall() + return render_template( 'admin/ai_calls_dashboard.html', active_page='obs_ai_calls', @@ -140,6 +167,19 @@ def ai_calls_dashboard(): for r in recent ], callers=[r[0] for r in callers], + caller_richness=[ + { + 'caller': r[0], + 'total_calls': int(r[1] or 0), + 'rag_hits': int(r[2] or 0), + 'mcp_orchestrated': int(r[3] or 0), + 'avg_rag_feedback': round(float(r[4] or 0), 2), + 'feedback_count': int(r[5] or 0), + 'rag_hit_rate': (float(r[2] or 0) / float(r[1]) * 100) if r[1] else 0, + 'mcp_rate': (float(r[3] or 0) / float(r[1]) * 100) if r[1] else 0, + } + for r in caller_richness + ], error=None, ) except Exception as e: @@ -148,7 +188,7 @@ def ai_calls_dashboard(): active_page='obs_ai_calls', hours=hours, caller_filter=caller_filter, provider_filter=provider_filter, - summary={}, by_provider=[], recent=[], callers=[], + summary={}, by_provider=[], recent=[], callers=[], caller_richness=[], error=f'查詢失敗: {type(e).__name__}: {str(e)[:200]}', ) finally: @@ -162,7 +202,11 @@ def ai_calls_dashboard(): @admin_observability_bp.route('/promotion_review') @login_required def promotion_review_list(): - """awaiting_review episodes 列表(24h 內 reviewed_at IS NULL)""" + """awaiting_review episodes 列表(24h 內 reviewed_at IS NULL) + + Phase 39(D-1):每筆 episode 自動跑 RAG 找 Top 3 相似已晉升 ai_insights, + 輔助人工判斷晉升價值。RAG fail-safe:失敗則 similar_insights=[],不擋頁面。 + """ session = get_session() try: rows = session.execute( @@ -177,20 +221,59 @@ def promotion_review_list(): """), ).fetchall() + # ai_insights 全表大小(給「晉升後 KB 增長」對照) + kb_size = 0 + try: + kb_row = session.execute( + sa_text("SELECT COUNT(*) FROM ai_insights"), + ).fetchone() + kb_size = int(kb_row[0] or 0) + except Exception: + pass + episodes = [ {'id': r[0], 'created_at': r[1].strftime('%Y-%m-%d %H:%M'), 'episode_type': r[2], 'source_table': r[3], 'source_id': r[4], 'distilled_text': (r[5] or '')[:600], 'quality_score': float(r[6] or 0), 'weight': float(r[7] or 0), - 'status': r[8]} + 'status': r[8], + 'similar_insights': []} for r in rows ] + # Phase 39 D-1:對每筆 episode 跑 RAG 找 Top 3 相似已晉升 + try: + from services.rag_service import rag_service + for ep in episodes: + try: + rag_result = rag_service.query( + text=ep['distilled_text'][:500], + caller='admin_promotion_review', + top_k=3, + threshold=0.7, + ) + ep['similar_insights'] = [ + { + 'id': h.get('id'), + 'insight_type': h.get('insight_type'), + 'content': (h.get('content') or '')[:180], + 'similarity': round(float(h.get('similarity', 0)), 3), + 'created_at': h.get('created_at').strftime('%Y-%m-%d') + if h.get('created_at') else '', + } + for h in rag_result.hits[:3] + ] + except Exception: + pass # 單筆 RAG 失敗不影響其餘 + except Exception: + pass # rag_service import 失敗(feature flag OFF)→ 略過 + return render_template( 'admin/promotion_review.html', active_page='obs_promotion_review', episodes=episodes, + kb_size=kb_size, error=None, ) except Exception as e: @@ -198,6 +281,7 @@ def promotion_review_list(): 'admin/promotion_review.html', active_page='obs_promotion_review', episodes=[], + kb_size=0, error=f'查詢失敗: {type(e).__name__}: {str(e)[:200]}', ) finally: @@ -328,14 +412,79 @@ def budget_dashboard(): 'updated_at': b[5].strftime('%Y-%m-%d %H:%M') if b[5] else '-', }) - return render_template('admin/budget.html', active_page='obs_budget', rows=rows, error=None) + # Phase 39 D-4: RAG 自動建議策略(針對超 80% 的 row) + budget_strategies = [] + over_threshold_rows = [r for r in rows if r.get('ratio', 0) >= 0.8] + if over_threshold_rows: + try: + from services.rag_service import rag_service + top_breach = max(over_threshold_rows, key=lambda r: r.get('ratio', 0)) + query_text = ( + f"預算超出 alert_pct provider={top_breach['provider']} " + f"ratio={int(top_breach['ratio']*100)}% 應採取什麼節流策略" + ) + rag_result = rag_service.query( + text=query_text, + caller='admin_budget_dashboard', + top_k=3, + threshold=0.65, + ) + budget_strategies = [ + { + 'id': h.get('id'), + 'insight_type': h.get('insight_type'), + 'content': (h.get('content') or '')[:240], + 'similarity': round(float(h.get('similarity', 0)), 3), + } + for h in rag_result.hits[:3] + ] + except Exception: + pass + + return render_template( + 'admin/budget.html', + active_page='obs_budget', + rows=rows, + budget_strategies=budget_strategies, + error=None, + ) except Exception as e: return render_template('admin/budget.html', active_page='obs_budget', rows=[], + budget_strategies=[], error=f'查詢失敗: {type(e).__name__}: {str(e)[:200]}') finally: session.close() +@admin_observability_bp.route('/budget/force_throttle', methods=['POST']) +@login_required +def budget_force_throttle(): + """Phase 39 D-4 (L2 自動化):立即強制執行 cost_throttle evaluate(不等 hourly cron)。 + + 用途:admin 在觀測台看到 ratio 飆超 110% 時不需等下次 cron, + 直接點按鈕強制 re-evaluate 三主機 throttle 狀態(claude→gemini fallback 立即生效)。 + """ + try: + from services.cost_throttle_service import ( + evaluate_throttle_status, is_cost_throttle_enabled, + ) + if not is_cost_throttle_enabled(): + return jsonify({ + 'ok': False, + 'error': 'COST_THROTTLE_ENABLED=false(先設環境變數)', + }), 400 + new_state = evaluate_throttle_status() + throttled = [p for p, s in new_state.items() if s.get('throttled')] + return jsonify({ + 'ok': True, + 'throttled_providers': throttled, + 'state': new_state, + 'message': f'已立即重算 throttle 狀態,被節流的 provider:{throttled or "(無)"}', + }) + except Exception as e: + return jsonify({'ok': False, 'error': f'{type(e).__name__}: {str(e)[:200]}'}), 500 + + @admin_observability_bp.route('/budget/update/', methods=['POST']) @login_required def budget_update(budget_id: int): @@ -543,6 +692,8 @@ def host_health_dashboard(): # Phase 38:讀過去 24h 三主機健康歷史(給趨勢卡片) health_history = [] + mcp_24h = [] # Phase 39 D-2: MCP 24h 各 server 工作量 + aiops_summary = {} # Phase 39 D-5: incidents + heal_logs 7d 統計 try: _session2 = get_session() try: @@ -570,6 +721,79 @@ def host_health_dashboard(): } for r in history_rows ] + + # Phase 39 D-5:incidents + heal_logs 過去 7d 統計 + try: + inc_rows = _session2.execute( + sa_text(""" + SELECT + COUNT(*) AS total_incidents, + COUNT(*) FILTER (WHERE status = 'open') AS open_count, + COUNT(*) FILTER (WHERE status = 'resolved') AS resolved_count, + COUNT(*) FILTER (WHERE severity = 'P0') AS p0_count, + COUNT(*) FILTER (WHERE severity = 'P1') AS p1_count + FROM incidents + WHERE created_at >= NOW() - INTERVAL '7 days' + """), + ).fetchone() + heal_rows = _session2.execute( + sa_text(""" + SELECT + COUNT(*) AS total_heals, + COUNT(*) FILTER (WHERE result = 'success') AS heal_success, + COUNT(*) FILTER (WHERE result = 'failed') AS heal_failed, + COALESCE(AVG(duration_ms) FILTER (WHERE result = 'success'), 0) AS avg_ms + FROM heal_logs + WHERE created_at >= NOW() - INTERVAL '7 days' + """), + ).fetchone() + aiops_summary = { + 'incidents_total': int(inc_rows[0] or 0), + 'incidents_open': int(inc_rows[1] or 0), + 'incidents_resolved': int(inc_rows[2] or 0), + 'incidents_p0': int(inc_rows[3] or 0), + 'incidents_p1': int(inc_rows[4] or 0), + 'heals_total': int(heal_rows[0] or 0), + 'heals_success': int(heal_rows[1] or 0), + 'heals_failed': int(heal_rows[2] or 0), + 'heals_avg_ms': int(heal_rows[3] or 0), + 'heal_success_rate': ( + float(heal_rows[1] or 0) / float(heal_rows[0]) * 100 + ) if heal_rows[0] else 0, + } + except Exception: + aiops_summary = {} + + # Phase 39 D-2:MCP 24h 工作量(每個 server) + mcp_rows = _session2.execute( + sa_text(""" + SELECT server, + COUNT(*) AS total_calls, + COUNT(*) FILTER (WHERE status = 'ok') AS ok_calls, + COUNT(*) FILTER (WHERE cache_hit) AS cache_hits, + COALESCE(SUM(cost_usd), 0) AS total_cost, + COALESCE(AVG(duration_ms), 0) AS avg_ms, + COUNT(DISTINCT tool) AS tools_used + FROM mcp_calls + WHERE called_at >= NOW() - INTERVAL '24 hours' + GROUP BY server + ORDER BY total_calls DESC + """), + ).fetchall() + mcp_24h = [ + { + 'server': r[0], + 'total_calls': int(r[1] or 0), + 'ok_calls': int(r[2] or 0), + 'cache_hits': int(r[3] or 0), + 'total_cost': float(r[4] or 0), + 'avg_ms': int(r[5] or 0), + 'tools_used': int(r[6] or 0), + 'success_rate': (float(r[2] or 0) / float(r[1]) * 100) if r[1] else 0, + 'cache_rate': (float(r[3] or 0) / float(r[1]) * 100) if r[1] else 0, + } + for r in mcp_rows + ] finally: _session2.close() except Exception: @@ -582,4 +806,6 @@ def host_health_dashboard(): mcp_status=mcp_status, throttle_state=throttle_state, health_history=health_history, + mcp_24h=mcp_24h, + aiops_summary=aiops_summary, ) diff --git a/templates/admin/ai_calls_dashboard.html b/templates/admin/ai_calls_dashboard.html index 2ab9103..a45f749 100644 --- a/templates/admin/ai_calls_dashboard.html +++ b/templates/admin/ai_calls_dashboard.html @@ -54,6 +54,65 @@
錯誤次數

{{ summary.error_calls or 0 }}

+ + {% if caller_richness %} +
+
呼叫端 × RAG × MCP 編排矩陣 + 資料來源:ai_calls × mcp_calls × rag_query_log({{ hours }}h 內呼叫 ≥ 5 次的 caller) +
+
+ + + + + + + + + + + + + {% for c in caller_richness %} + + + + + + + + + {% endfor %} + +
呼叫端總呼叫RAG 命中率MCP 編排率RAG 反饋分數反饋筆數
{{ c.caller }}{{ "{:,}".format(c.total_calls) }} + + {{ "%.1f"|format(c.rag_hit_rate) }}% + + ({{ c.rag_hits }}) + + + {{ "%.1f"|format(c.mcp_rate) }}% + + ({{ c.mcp_orchestrated }}) + + {% if c.feedback_count > 0 %} + + {{ "%.2f"|format(c.avg_rag_feedback) }}/5 + + {% else %} + + {% endif %} + {{ c.feedback_count }}
+
+ +
+ {% endif %} +
依供應商分組
diff --git a/templates/admin/budget.html b/templates/admin/budget.html index 231b4d4..e3780e0 100644 --- a/templates/admin/budget.html +++ b/templates/admin/budget.html @@ -16,6 +16,35 @@ 手動編輯預算後立即生效(不需重啟)。

+ +
+ + 用途:發現某 provider 飆超 110% 時立即 evaluate,毋需等下次每小時 cron。 +
+ + + {% if budget_strategies %} +
+
+ RAG 自動策略建議 + — 從知識庫 ai_insights 召回過去類似超支情境的應對策略 +
+
+
    + {% for s in budget_strategies %} +
  • + {{ s.insight_type }} + 相似度 {{ "%.2f"|format(s.similarity) }} + {{ s.content }}{% if s.content|length >= 240 %}…{% endif %} +
  • + {% endfor %} +
+
+
+ {% endif %} + @@ -76,6 +105,24 @@