ewoooc/routes/admin_observability_routes.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
routes/admin_observability_routes.py
Operation Ollama-First v5.0 / Phase 27 — Admin Observability Dashboard

提供 admin 介面看戰役累積的觀測資料：
  /observability/ai_calls              — ai_calls 即時查詢（含篩選 / 圖表）
  /observability/promotion_review      — Phase 28 PromotionGate 待審核列表
  /observability/quality_trend         — Phase 25 caller 反饋趨勢
  /observability/host_health           — 三主機 Ollama + MCP 健康度

設計原則：
- 純讀（除了 promotion approve/reject 是 mutation）
- 失敗安全：DB 失敗回空清單 + 警告 banner
- 每頁 100 筆分頁，無限捲動
- 不暴露 secret / prompt 原文
"""

from datetime import datetime, timedelta
from flask import Blueprint, render_template, request, jsonify
from sqlalchemy import text as sa_text

from auth import login_required, get_current_user
from database.manager import get_session


admin_observability_bp = Blueprint(
    'admin_observability',
    __name__,
    url_prefix='/observability',
)


# ─────────────────────────────────────────────────────────────────────────────
# /observability/overview — Phase 45 總覽（單頁聚合 6 項 KPI）
# ─────────────────────────────────────────────────────────────────────────────

@admin_observability_bp.route('/')
@admin_observability_bp.route('/overview')
@login_required
def observability_overview():
    """Phase 45 — 觀測台總覽：一頁式聚合 6 個 sub-page 的關鍵 KPI。

    對應 Phase 44 daily Telegram summary 的 web 版本，做為 sidebar 入口頁。
    所有區塊失敗安全：個別 query 失敗只跳過該卡片，不擋整頁渲染。
    """
    from datetime import datetime as _dt
    today = _dt.now()
    month_start = _dt(today.year, today.month, 1)
    summary = {}

    session = get_session()
    try:
        # 三主機 24h 在線率
        try:
            host_rows = session.execute(
                sa_text("""
                    SELECT host_label, COUNT(*) AS total,
                           COUNT(*) FILTER (WHERE healthy) AS up,
                           COALESCE(AVG(response_ms) FILTER (WHERE healthy), 0) AS avg_ms
                    FROM host_health_probes
                    WHERE probed_at >= NOW() - INTERVAL '24 hours'
                    GROUP BY host_label ORDER BY host_label
                """),
            ).fetchall()
            summary['hosts'] = [
                {
                    'label': r[0],
                    'total': int(r[1] or 0),
                    'up': int(r[2] or 0),
                    'avg_ms': int(r[3] or 0),
                    'uptime_pct': (float(r[2] or 0) / float(r[1]) * 100) if r[1] else 0,
                }
                for r in host_rows
            ]
        except Exception:
            summary['hosts'] = []

        # AI 呼叫 24h
        try:
            ai = session.execute(
                sa_text("""
                    SELECT COUNT(*), COALESCE(SUM(input_tokens + output_tokens), 0),
                           COALESCE(SUM(cost_usd), 0),
                           COUNT(*) FILTER (WHERE status NOT IN ('ok','cache_only')),
                           COUNT(*) FILTER (WHERE rag_hit),
                           COUNT(*) FILTER (WHERE cache_hit)
                    FROM ai_calls
                    WHERE called_at >= NOW() - INTERVAL '24 hours'
                """),
            ).fetchone()
            total = int(ai[0] or 0)
            summary['ai_calls'] = {
                'total': total,
                'tokens': int(ai[1] or 0),
                'cost_24h': float(ai[2] or 0),
                'errors': int(ai[3] or 0),
                'rag_hits': int(ai[4] or 0),
                'cache_hits': int(ai[5] or 0),
                'error_rate': (float(ai[3] or 0) / total * 100) if total else 0,
                'rag_rate': (float(ai[4] or 0) / total * 100) if total else 0,
                'cache_rate': (float(ai[5] or 0) / total * 100) if total else 0,
            }
        except Exception:
            summary['ai_calls'] = {}

        # 當月成本
        try:
            month_cost = session.execute(
                sa_text("SELECT COALESCE(SUM(cost_usd), 0) FROM ai_calls WHERE called_at >= :ms"),
                {'ms': month_start},
            ).fetchone()[0]
            summary['month_cost'] = float(month_cost or 0)
        except Exception:
            summary['month_cost'] = 0

        # 預算 over 80%
        try:
            budgets = session.execute(
                sa_text("""
                    SELECT b.period, b.provider, b.budget_usd, b.alert_pct,
                           COALESCE((
                               SELECT SUM(cost_usd) FROM ai_calls
                               WHERE called_at >= :ms
                                 AND (b.provider IS NULL OR provider = b.provider)
                           ), 0) AS spent
                    FROM ai_call_budgets b
                """),
                {'ms': month_start},
            ).fetchall()
            over_threshold = []
            for r in budgets:
                budget = float(r[2] or 0)
                spent = float(r[4] or 0)
                ratio = spent / budget if budget > 0 else 0
                threshold = float(r[3] or 80) / 100
                if ratio >= threshold:
                    over_threshold.append({
                        'period': r[0], 'provider': r[1] or '(全部)',
                        'spent': spent, 'budget': budget, 'ratio': ratio,
                    })
            summary['budget_alerts'] = over_threshold
        except Exception:
            summary['budget_alerts'] = []

        # 待審 + 蒸餾池
        try:
            ep_pending = session.execute(
                sa_text("SELECT COUNT(*) FROM learning_episodes WHERE promotion_status = 'awaiting_review' AND reviewed_at IS NULL"),
            ).fetchone()[0]
            ep_total_30d = session.execute(
                sa_text("SELECT COUNT(*) FROM learning_episodes WHERE created_at >= NOW() - INTERVAL '30 days'"),
            ).fetchone()[0]
            ep_approved_30d = session.execute(
                sa_text("SELECT COUNT(*) FROM learning_episodes WHERE created_at >= NOW() - INTERVAL '30 days' AND promotion_status = 'approved'"),
            ).fetchone()[0]
            summary['episodes'] = {
                'pending': int(ep_pending or 0),
                'total_30d': int(ep_total_30d or 0),
                'approved_30d': int(ep_approved_30d or 0),
                'approval_rate': (float(ep_approved_30d or 0) / float(ep_total_30d) * 100) if ep_total_30d else 0,
            }
        except Exception:
            summary['episodes'] = {}

        # PPT 視覺審核 7d
        try:
            ppt = session.execute(
                sa_text("""
                    SELECT COUNT(*),
                           COUNT(*) FILTER (WHERE audit_status='passed'),
                           COUNT(*) FILTER (WHERE audit_status='failed')
                    FROM ppt_audit_results
                    WHERE audited_at >= NOW() - INTERVAL '7 days'
                """),
            ).fetchone()
            ppt_total = int(ppt[0] or 0)
            summary['ppt'] = {
                'total': ppt_total,
                'passed': int(ppt[1] or 0),
                'failed': int(ppt[2] or 0),
                'pass_rate': (float(ppt[1] or 0) / ppt_total * 100) if ppt_total else 0,
            }
        except Exception:
            summary['ppt'] = {}

        # AIOps 7d
        try:
            inc = session.execute(
                sa_text("""
                    SELECT COUNT(*),
                           COUNT(*) FILTER (WHERE status='open'),
                           COUNT(*) FILTER (WHERE severity IN ('P0','P1'))
                    FROM incidents
                    WHERE created_at >= NOW() - INTERVAL '7 days'
                """),
            ).fetchone()
            heal = session.execute(
                sa_text("""
                    SELECT COUNT(*),
                           COUNT(*) FILTER (WHERE result='success')
                    FROM heal_logs
                    WHERE created_at >= NOW() - INTERVAL '7 days'
                """),
            ).fetchone()
            summary['aiops'] = {
                'incidents_total': int(inc[0] or 0),
                'incidents_open': int(inc[1] or 0),
                'incidents_p0_p1': int(inc[2] or 0),
                'heals_total': int(heal[0] or 0),
                'heals_success': int(heal[1] or 0),
                'heal_rate': (float(heal[1] or 0) / float(heal[0]) * 100) if heal[0] else 0,
            }
        except Exception:
            summary['aiops'] = {}

        # MCP 24h
        try:
            mcp = session.execute(
                sa_text("""
                    SELECT COUNT(*), COUNT(DISTINCT server),
                           COUNT(*) FILTER (WHERE cache_hit),
                           COALESCE(SUM(cost_usd), 0)
                    FROM mcp_calls
                    WHERE called_at >= NOW() - INTERVAL '24 hours'
                """),
            ).fetchone()
            mcp_total = int(mcp[0] or 0)
            summary['mcp'] = {
                'total': mcp_total,
                'servers': int(mcp[1] or 0),
                'cache_hits': int(mcp[2] or 0),
                'cost': float(mcp[3] or 0),
                'cache_rate': (float(mcp[2] or 0) / mcp_total * 100) if mcp_total else 0,
            }
        except Exception:
            summary['mcp'] = {}
    finally:
        session.close()

    # Phase 51 O-3: 24h 三主機健康 sparkline 資料（每小時 bucket × 3 host）
    host_sparkline = {}
    try:
        s_sp = get_session()
        try:
            sp_rows = s_sp.execute(
                sa_text("""
                    SELECT host_label,
                           date_trunc('hour', probed_at) AS hr,
                           COUNT(*) AS total,
                           COUNT(*) FILTER (WHERE healthy) AS up
                    FROM host_health_probes
                    WHERE probed_at >= NOW() - INTERVAL '24 hours'
                    GROUP BY host_label, hr
                    ORDER BY host_label, hr ASC
                """),
            ).fetchall()
            for r in sp_rows:
                label, hr, total, up = r[0], r[1], int(r[2] or 0), int(r[3] or 0)
                if label not in host_sparkline:
                    host_sparkline[label] = {'hours': [], 'uptime_pct': []}
                host_sparkline[label]['hours'].append(
                    hr.strftime('%H:00') if hr else ''
                )
                host_sparkline[label]['uptime_pct'].append(
                    (up / total * 100) if total else 0
                )
        finally:
            s_sp.close()
    except Exception:
        pass

    return render_template(
        'admin/observability_overview.html',
        active_page='obs_overview',
        summary=summary,
        host_sparkline=host_sparkline,
        today=today.strftime('%Y-%m-%d'),
    )


# ─────────────────────────────────────────────────────────────────────────────
# /observability/rag_queries — Phase 51 RAG 召回詳情
# ─────────────────────────────────────────────────────────────────────────────

@admin_observability_bp.route('/rag_queries')
@login_required
def rag_queries_dashboard():
    """Phase 51 — RAG 召回詳情：每筆 query 的命中、saved_call、反饋。

    補完 RAG 觀測深度：之前只看 caller 級命中率，現在看每筆查詢的真實內容。
    """
    hours = int(request.args.get('hours', '24'))
    caller_filter = request.args.get('caller', '').strip()
    saved_only = request.args.get('saved_only', '').strip() == '1'

    session = get_session()
    try:
        rag_query_log_exists = bool(session.execute(
            sa_text("SELECT to_regclass('public.rag_query_log') IS NOT NULL")
        ).scalar())
        if not rag_query_log_exists:
            return render_template(
                'admin/rag_queries.html',
                active_page='obs_rag_queries',
                hours=hours,
                caller_filter=caller_filter,
                saved_only=saved_only,
                summary={},
                callers=[],
                by_caller=[],
                queries=[],
                error='rag_query_log 尚未建立，RAG 召回資料待接入。',
            )

        # 整體統計
        summary_row = session.execute(
            sa_text("""
                SELECT COUNT(*) AS total,
                       COUNT(*) FILTER (WHERE saved_call) AS saved,
                       COUNT(*) FILTER (WHERE hit_count > 0) AS with_hits,
                       COALESCE(AVG(hit_count), 0) AS avg_hits,
                       COALESCE(AVG(feedback_score) FILTER (WHERE feedback_score IS NOT NULL), 0) AS avg_score,
                       COUNT(*) FILTER (WHERE feedback_score IS NOT NULL) AS feedback_count,
                       COUNT(DISTINCT caller) AS distinct_callers
                FROM rag_query_log
                WHERE queried_at >= NOW() - (:h * INTERVAL '1 hour')
            """),
            {'h': hours},
        ).fetchone()
        total = int(summary_row[0] or 0)
        saved = int(summary_row[1] or 0)
        with_hits = int(summary_row[2] or 0)
        summary = {
            'total': total,
            'saved': saved,
            'with_hits': with_hits,
            'no_hits': total - with_hits,
            'avg_hits': round(float(summary_row[3] or 0), 2),
            'avg_score': round(float(summary_row[4] or 0), 2),
            'feedback_count': int(summary_row[5] or 0),
            'distinct_callers': int(summary_row[6] or 0),
            'saved_rate': (float(saved) / total * 100) if total else 0,
            'hit_rate': (float(with_hits) / total * 100) if total else 0,
        }

        # caller 列表（dropdown）
        callers = session.execute(
            sa_text("""
                SELECT DISTINCT caller FROM rag_query_log
                WHERE queried_at >= NOW() - (:h * INTERVAL '1 hour')
                ORDER BY caller
            """),
            {'h': hours},
        ).fetchall()
        caller_list = [r[0] for r in callers]

        # by caller 統計
        by_caller = session.execute(
            sa_text("""
                SELECT caller,
                       COUNT(*) AS total,
                       COUNT(*) FILTER (WHERE saved_call) AS saved,
                       COUNT(*) FILTER (WHERE hit_count > 0) AS with_hits,
                       COALESCE(AVG(feedback_score) FILTER (WHERE feedback_score IS NOT NULL), 0) AS avg_score,
                       COUNT(*) FILTER (WHERE feedback_score IS NOT NULL) AS fb_count
                FROM rag_query_log
                WHERE queried_at >= NOW() - (:h * INTERVAL '1 hour')
                GROUP BY caller
                ORDER BY total DESC
            """),
            {'h': hours},
        ).fetchall()

        # 最近 50 筆查詢（套 caller filter + saved_only）
        params = {'h': hours, 'caller_f': caller_filter}
        recent_queries = session.execute(
            sa_text(f"""
                SELECT id, queried_at, caller, LEFT(query_text, 200) AS qtext,
                       top_k, threshold, hit_count, used_results, saved_call,
                       feedback_score, request_id
                FROM rag_query_log
                WHERE queried_at >= NOW() - (:h * INTERVAL '1 hour')
                  AND (:caller_f = '' OR caller = :caller_f)
                  {"AND saved_call = TRUE" if saved_only else ""}
                ORDER BY queried_at DESC
                LIMIT 50
            """),
            params,
        ).fetchall()
        queries = []
        for r in recent_queries:
            used_ids = list(r[7]) if r[7] else []
            queries.append({
                'id': int(r[0]),
                'queried_at': r[1].strftime('%Y-%m-%d %H:%M:%S') if r[1] else '',
                'caller': r[2],
                'query_text': r[3] or '',
                'top_k': int(r[4] or 0),
                'threshold': round(float(r[5] or 0), 3),
                'hit_count': int(r[6] or 0),
                'used_results': used_ids,
                'saved_call': bool(r[8]),
                'feedback_score': int(r[9]) if r[9] is not None else None,
                'request_id': r[10],
            })

        return render_template(
            'admin/rag_queries.html',
            active_page='obs_rag_queries',
            hours=hours,
            caller_filter=caller_filter,
            saved_only=saved_only,
            summary=summary,
            callers=caller_list,
            by_caller=[
                {
                    'caller': r[0], 'total': int(r[1] or 0),
                    'saved': int(r[2] or 0), 'with_hits': int(r[3] or 0),
                    'avg_score': round(float(r[4] or 0), 2),
                    'fb_count': int(r[5] or 0),
                    'saved_rate': (float(r[2] or 0) / float(r[1]) * 100) if r[1] else 0,
                    'hit_rate': (float(r[3] or 0) / float(r[1]) * 100) if r[1] else 0,
                }
                for r in by_caller
            ],
            queries=queries,
            error=None,
        )
    except Exception as e:
        return render_template(
            'admin/rag_queries.html',
            active_page='obs_rag_queries', hours=hours,
            caller_filter=caller_filter, saved_only=saved_only,
            summary={}, callers=[], by_caller=[], queries=[],
            error='RAG 召回資料暫時不可用，已切換安全空狀態。',
        )
    finally:
        session.close()


@admin_observability_bp.route('/rag_queries/<int:query_id>/hits', methods=['GET'])
@login_required
def rag_query_hits(query_id: int):
    """Phase 51 — JSON API：回傳單筆 query 的 hits 詳細內容（給 modal 展開）。"""
    try:
        session = get_session()
        try:
            row = session.execute(
                sa_text("""
                    SELECT id, query_text, used_results, hit_count, threshold
                    FROM rag_query_log WHERE id = :id
                """),
                {'id': query_id},
            ).fetchone()
            if not row:
                return jsonify({'ok': False, 'error': 'not found'}), 404

            used_ids = list(row[2]) if row[2] else []
            hits = []
            if used_ids:
                rows = session.execute(
                    sa_text("""
                        SELECT id, insight_type, period, product_sku,
                               LEFT(content, 300) AS preview, created_at
                        FROM ai_insights
                        WHERE id = ANY(:ids)
                        ORDER BY created_at DESC
                    """),
                    {'ids': used_ids},
                ).fetchall()
                hits = [
                    {
                        'id': int(h[0]),
                        'insight_type': h[1],
                        'period': h[2],
                        'product_sku': h[3],
                        'content': h[4] or '',
                        'created_at': h[5].strftime('%Y-%m-%d') if h[5] else '',
                    }
                    for h in rows
                ]
            return jsonify({
                'ok': True,
                'query_id': query_id,
                'query_text': row[1],
                'hit_count': int(row[3] or 0),
                'threshold': round(float(row[4] or 0), 3),
                'hits': hits,
            })
        finally:
            session.close()
    except Exception as e:
        return jsonify({'ok': False, 'error': f'{type(e).__name__}: {str(e)[:200]}'}), 500


# ─────────────────────────────────────────────────────────────────────────────
# /observability/business_intel — Phase 48 商業面 × AI 編排
# ─────────────────────────────────────────────────────────────────────────────

@admin_observability_bp.route('/business_intel')
@login_required
def business_intel_dashboard():
    """Phase 48 — 商業面 × AI 編排：把 AI 觀測台延伸到商業層級。

    展現「AI 在做什麼生意」：
    - ai_price_recommendations × competitor_prices: AI 看到什麼定價機會
    - action_plans × action_outcomes: 計畫到 verdict 的閉環
    - competitor_match_attempts: 競品比對失敗追蹤
    """
    days = int(request.args.get('days', '7'))
    session = get_session()
    try:
        # 1. ai_price_recommendations 30d 總覽
        rec_summary = session.execute(
            sa_text(f"""
                SELECT strategy, COUNT(*) AS cnt,
                       COALESCE(AVG(confidence), 0) AS avg_conf,
                       COALESCE(AVG(gap_pct), 0) AS avg_gap_pct,
                       COALESCE(AVG(sales_7d_delta), 0) AS avg_sales_delta
                FROM ai_price_recommendations
                WHERE created_at >= NOW() - INTERVAL '{int(days)} days'
                GROUP BY strategy ORDER BY cnt DESC
            """),
        ).fetchall()
        rec_by_strategy = [
            {
                'strategy': r[0], 'count': int(r[1] or 0),
                'avg_confidence': round(float(r[2] or 0), 3),
                'avg_gap_pct': round(float(r[3] or 0), 2),
                'avg_sales_delta': round(float(r[4] or 0), 2),
            }
            for r in rec_summary
        ]

        # 2. ai_price_recommendations 最近 20 筆詳細
        latest_recs = session.execute(
            sa_text("""
                SELECT id, sku, LEFT(name, 50), strategy, confidence,
                       momo_price, pchome_price, gap_pct, sales_7d_delta,
                       LEFT(reason, 120), created_at
                FROM ai_price_recommendations
                ORDER BY created_at DESC LIMIT 20
            """),
        ).fetchall()
        latest_recommendations = [
            {
                'id': r[0], 'sku': r[1], 'name': r[2], 'strategy': r[3],
                'confidence': round(float(r[4] or 0), 3),
                'momo_price': float(r[5] or 0),
                'pchome_price': float(r[6] or 0) if r[6] else None,
                'gap_pct': round(float(r[7] or 0), 2),
                'sales_delta': round(float(r[8] or 0), 2) if r[8] is not None else None,
                'reason': r[9] or '',
                'created_at': r[10].strftime('%m-%d %H:%M') if r[10] else '',
            }
            for r in latest_recs
        ]

        # 3. action_plans × action_outcomes 閉環（30d）
        closed_loops = session.execute(
            sa_text(f"""
                SELECT p.id, p.sku, p.plan_type, p.status,
                       p.created_by, p.created_at, p.executed_at,
                       o.verdict, o.metric_type, o.before_val, o.after_val
                FROM action_plans p
                LEFT JOIN action_outcomes o ON o.plan_id = p.id
                WHERE p.created_at >= NOW() - INTERVAL '{int(days)} days'
                ORDER BY p.created_at DESC LIMIT 25
            """),
        ).fetchall()
        loop_records = []
        for r in closed_loops:
            before = float(r[9]) if r[9] is not None else None
            after = float(r[10]) if r[10] is not None else None
            change_pct = None
            if before and before != 0 and after is not None:
                change_pct = (after - before) / abs(before) * 100
            loop_records.append({
                'plan_id': r[0], 'sku': r[1], 'plan_type': r[2],
                'status': r[3], 'created_by': r[4],
                'created_at': r[5].strftime('%m-%d %H:%M') if r[5] else '',
                'executed_at': r[6].strftime('%m-%d %H:%M') if r[6] else None,
                'verdict': r[7], 'metric_type': r[8],
                'before': before, 'after': after, 'change_pct': change_pct,
            })

        # 4. action_outcomes verdict 統計
        verdict_summary = session.execute(
            sa_text(f"""
                SELECT verdict, COUNT(*) AS cnt,
                       AVG(after_val - before_val) AS avg_delta
                FROM action_outcomes
                WHERE created_at >= NOW() - INTERVAL '{int(days)} days'
                  AND before_val IS NOT NULL AND after_val IS NOT NULL
                GROUP BY verdict ORDER BY cnt DESC
            """),
        ).fetchall()
        verdict_stats = [
            {
                'verdict': r[0] or 'unknown', 'count': int(r[1] or 0),
                'avg_delta': round(float(r[2] or 0), 2),
            }
            for r in verdict_summary
        ]

        # 5. competitor_match_attempts 失敗統計（30d）
        match_attempts = session.execute(
            sa_text(f"""
                SELECT attempt_status, COUNT(*) AS cnt,
                       COALESCE(AVG(candidate_count), 0) AS avg_candidates,
                       COALESCE(AVG(best_match_score), 0) AS avg_score
                FROM competitor_match_attempts
                WHERE attempted_at >= NOW() - INTERVAL '{int(days)} days'
                GROUP BY attempt_status ORDER BY cnt DESC
            """),
        ).fetchall()
        match_stats = [
            {
                'status': r[0], 'count': int(r[1] or 0),
                'avg_candidates': round(float(r[2] or 0), 1),
                'avg_score': round(float(r[3] or 0), 3),
            }
            for r in match_attempts
        ]

        # 6. competitor_prices 24h 變動 TOP 10
        recent_competitor = session.execute(
            sa_text("""
                SELECT cph.sku, cph.competitor_product_name, cph.price,
                       cph.momo_price, cph.discount_pct, cph.match_score,
                       cph.crawled_at
                FROM competitor_price_history cph
                WHERE cph.crawled_at >= NOW() - INTERVAL '24 hours'
                  AND cph.match_score >= 0.7
                ORDER BY cph.crawled_at DESC LIMIT 12
            """),
        ).fetchall()
        recent_competitor_prices = [
            {
                'sku': r[0],
                'product_name': (r[1] or '')[:50],
                'pchome_price': float(r[2] or 0),
                'momo_price': float(r[3] or 0) if r[3] else None,
                'discount_pct': int(r[4]) if r[4] else None,
                'match_score': round(float(r[5] or 0), 3),
                'gap': (float(r[3]) - float(r[2])) if (r[2] and r[3]) else None,
                'crawled_at': r[6].strftime('%m-%d %H:%M') if r[6] else '',
            }
            for r in recent_competitor
        ]

        # 7. 高 confidence 但未 follow-through (recommendation 沒對應 action_plan)
        unfollowed = session.execute(
            sa_text(f"""
                SELECT COUNT(*)
                FROM ai_price_recommendations r
                WHERE r.created_at >= NOW() - INTERVAL '{int(days)} days'
                  AND r.confidence >= 0.7
                  AND NOT EXISTS (
                      SELECT 1 FROM action_plans p
                      WHERE p.sku = r.sku
                        AND p.created_at >= r.created_at
                        AND p.created_at < r.created_at + INTERVAL '7 days'
                  )
            """),
        ).fetchone()
        unfollowed_count = int(unfollowed[0] or 0) if unfollowed else 0

        return render_template(
            'admin/business_intel.html',
            active_page='obs_business_intel',
            days=days,
            rec_by_strategy=rec_by_strategy,
            latest_recommendations=latest_recommendations,
            loop_records=loop_records,
            verdict_stats=verdict_stats,
            match_stats=match_stats,
            recent_competitor_prices=recent_competitor_prices,
            unfollowed_count=unfollowed_count,
            error=None,
        )
    except Exception as e:
        return render_template(
            'admin/business_intel.html',
            active_page='obs_business_intel', days=days,
            rec_by_strategy=[], latest_recommendations=[], loop_records=[],
            verdict_stats=[], match_stats=[], recent_competitor_prices=[],
            unfollowed_count=0,
            error='商業 AI 資料暫時不可用，已切換安全空狀態。',
        )
    finally:
        session.close()


# ─────────────────────────────────────────────────────────────────────────────
# /observability/agent_orchestration — Phase 46 編排矩陣
# ─────────────────────────────────────────────────────────────────────────────

# caller → agent 歸類規則（同 services/* 各 agent 真實 caller 值）
_AGENT_CALLER_GROUPS = {
    'openclaw': [
        'openclaw_qa', 'openclaw_daily', 'openclaw_daily_insight',
        'openclaw_meta', 'openclaw_monthly', 'openclaw_weekly',
        'openclaw_bot_main', 'openclaw_bot_gemini', 'openclaw_bot_nim',
        'sales_copy', 'code_review_openclaw',
    ],
    'hermes': [
        'hermes_analyst', 'hermes_intent', 'code_review_hermes',
    ],
    'nemotron': [
        'nemotron_dispatch',
    ],
    'elephant_alpha': [
        'ea_engine', 'code_review_elephant',
    ],
}
_AGENT_LABELS = {
    'openclaw': ('🤖 OpenClaw', '主編排者 / Bot 對話 / 報告生成'),
    'hermes': ('🔍 Hermes', '價格/程式碼分析師'),
    'nemotron': ('🧬 NemoTron', '任務 dispatcher'),
    'elephant_alpha': ('🐘 ElephantAlpha', '自主決策引擎'),
}

# Provider → 類別歸類
_PROVIDER_TIER = {
    'gcp_ollama': 'ollama_local',
    'ollama_secondary': 'ollama_local',
    'ollama_111': 'ollama_local',
    'ollama_other': 'ollama_local',
    'gemini': 'paid_external',
    'claude': 'paid_external',
    'nim': 'paid_external',
    'nim_via_elephant': 'paid_external',
    'openrouter': 'paid_external',
}


@admin_observability_bp.route('/agent_orchestration')
@login_required
def agent_orchestration_dashboard():
    """Phase 46 — 4 Agent × Models × MCP × RAG 編排矩陣

    展現「組合發揮」：每個 agent 在 24h 內如何調用 Ollama/Gemini，
    搭配 MCP tool（外部 + 內部 mcp_collector），與 RAG 知識庫的協作。

    資料來源：ai_calls × mcp_calls × rag_query_log 三表跨 JOIN + caller 分組。
    """
    hours = int(request.args.get('hours', '24'))
    session = get_session()
    try:
        # 1. 整體統計
        overall = session.execute(
            sa_text("""
                SELECT COUNT(*),
                       COALESCE(SUM(cost_usd), 0),
                       COUNT(*) FILTER (WHERE provider IN ('gemini','claude','nim','openrouter','nim_via_elephant')),
                       COUNT(*) FILTER (WHERE provider IN ('gcp_ollama','ollama_secondary','ollama_111','ollama_other')),
                       COUNT(*) FILTER (WHERE rag_hit),
                       COALESCE(SUM(input_tokens + output_tokens), 0)
                FROM ai_calls
                WHERE called_at >= NOW() - (:h * INTERVAL '1 hour')
            """),
            {'h': hours},
        ).fetchone()
        total_calls = int(overall[0] or 0)
        total_cost = float(overall[1] or 0)
        paid_calls = int(overall[2] or 0)
        local_calls = int(overall[3] or 0)
        rag_hits = int(overall[4] or 0)
        total_tokens = int(overall[5] or 0)
        mcp_calls_table_exists = bool(session.execute(
            sa_text("SELECT to_regclass('public.mcp_calls') IS NOT NULL")
        ).scalar())

        # 2. 每個 agent group 的細節
        agent_matrix = []
        for agent_key, callers in _AGENT_CALLER_GROUPS.items():
            ag_row = session.execute(
                sa_text("""
                    SELECT COUNT(*) AS calls,
                           COALESCE(SUM(input_tokens + output_tokens), 0) AS tokens,
                           COALESCE(SUM(cost_usd), 0) AS cost,
                           COUNT(*) FILTER (WHERE rag_hit) AS rag_hits,
                           COUNT(*) FILTER (WHERE provider IN ('gcp_ollama','ollama_secondary','ollama_111','ollama_other')) AS ollama,
                           COUNT(*) FILTER (WHERE provider = 'gcp_ollama') AS ollama_gcp_a,
                           COUNT(*) FILTER (WHERE provider = 'ollama_secondary') AS ollama_gcp_b,
                           COUNT(*) FILTER (WHERE provider = 'ollama_111') AS ollama_111,
                           COUNT(*) FILTER (WHERE provider = 'gemini') AS gemini,
                           COUNT(*) FILTER (WHERE provider IN ('claude','nim','openrouter','nim_via_elephant')) AS other_paid,
                           COUNT(*) FILTER (WHERE status NOT IN ('ok','cache_only')) AS errors,
                           COALESCE(AVG(duration_ms), 0) AS avg_ms
                    FROM ai_calls
                    WHERE called_at >= NOW() - (:h * INTERVAL '1 hour')
                      AND caller = ANY(:callers)
                """),
                {'h': hours, 'callers': callers},
            ).fetchone()
            calls = int(ag_row[0] or 0)
            if calls == 0:
                # 沒呼叫也佔位顯示
                agent_matrix.append({
                    'key': agent_key, 'label': _AGENT_LABELS[agent_key][0],
                    'desc': _AGENT_LABELS[agent_key][1],
                    'calls': 0, 'tokens': 0, 'cost': 0,
                    'rag_hits': 0, 'rag_rate': 0,
                    'ollama_pct': 0, 'gemini_pct': 0, 'paid_pct': 0,
                    'ollama_gcp_a': 0, 'ollama_gcp_b': 0, 'ollama_111': 0,
                    'gemini': 0, 'other_paid': 0,
                    'errors': 0, 'error_rate': 0,
                    'avg_ms': 0, 'mcp_calls': 0, 'mcp_rate': 0,
                    'callers_in_group': callers,
                })
                continue

            # MCP 編排率（透過 request_id 串接）。mcp_calls 尚未 migration 時安全降級為 0。
            if mcp_calls_table_exists:
                mcp_count = session.execute(
                    sa_text("""
                        SELECT COUNT(DISTINCT a.request_id)
                        FROM ai_calls a
                        INNER JOIN mcp_calls m ON m.request_id = a.request_id
                        WHERE a.called_at >= NOW() - (:h * INTERVAL '1 hour')
                          AND a.caller = ANY(:callers)
                          AND a.request_id IS NOT NULL
                    """),
                    {'h': hours, 'callers': callers},
                ).fetchone()[0] or 0
            else:
                mcp_count = 0

            errors = int(ag_row[10] or 0)
            ollama = int(ag_row[4] or 0)
            gemini = int(ag_row[8] or 0)
            other_paid = int(ag_row[9] or 0)

            agent_matrix.append({
                'key': agent_key,
                'label': _AGENT_LABELS[agent_key][0],
                'desc': _AGENT_LABELS[agent_key][1],
                'calls': calls,
                'tokens': int(ag_row[1] or 0),
                'cost': float(ag_row[2] or 0),
                'rag_hits': int(ag_row[3] or 0),
                'rag_rate': (float(ag_row[3] or 0) / calls * 100) if calls else 0,
                'ollama': ollama, 'ollama_pct': (ollama / calls * 100) if calls else 0,
                'ollama_gcp_a': int(ag_row[5] or 0),
                'ollama_gcp_b': int(ag_row[6] or 0),
                'ollama_111': int(ag_row[7] or 0),
                'gemini': gemini, 'gemini_pct': (gemini / calls * 100) if calls else 0,
                'other_paid': other_paid,
                'paid_pct': ((gemini + other_paid) / calls * 100) if calls else 0,
                'errors': errors, 'error_rate': (errors / calls * 100) if calls else 0,
                'avg_ms': int(ag_row[11] or 0),
                'mcp_calls': int(mcp_count),
                'mcp_rate': (float(mcp_count) / calls * 100) if calls else 0,
                'callers_in_group': callers,
            })

        # 3. MCP server 24h 工作量（同 host_health 邏輯）
        if mcp_calls_table_exists:
            mcp_servers = session.execute(
                sa_text("""
                    SELECT server, caller, COUNT(*) AS calls,
                           COUNT(*) FILTER (WHERE cache_hit) AS cache_hits,
                           COALESCE(SUM(cost_usd), 0) AS cost
                    FROM mcp_calls
                    WHERE called_at >= NOW() - (:h * INTERVAL '1 hour')
                    GROUP BY server, caller
                    ORDER BY calls DESC
                    LIMIT 30
                """),
                {'h': hours},
            ).fetchall()
            mcp_matrix = [
                {
                    'server': r[0], 'caller': r[1],
                    'calls': int(r[2] or 0),
                    'cache_hits': int(r[3] or 0),
                    'cost': float(r[4] or 0),
                    'cache_rate': (float(r[3] or 0) / float(r[2]) * 100) if r[2] else 0,
                }
                for r in mcp_servers
            ]
        else:
            mcp_matrix = []

        # 4. 自動編排建議（rule-based 提案）
        recommendations = []
        if not mcp_calls_table_exists:
            recommendations.append({
                'severity': 'med', 'agent': 'MCP 觀測',
                'finding': 'mcp_calls 尚未建立，MCP 編排率目前以 0 顯示',
                'suggestion': '執行 Phase 10.7 migration 後，本頁會自動接回 MCP server/caller 矩陣',
            })
        for ag in agent_matrix:
            if ag['calls'] == 0:
                continue
            # 規則 1：付費比例 > 50% 且 ollama 比例 < 20% → 建議切 Hermes-first
            if ag['paid_pct'] > 50 and ag['ollama_pct'] < 20:
                recommendations.append({
                    'severity': 'high', 'agent': ag['label'],
                    'finding': f"付費 LLM 比例 {ag['paid_pct']:.0f}%（cost ${ag['cost']:.2f}）",
                    'suggestion': '改用 Hermes-first 短路機制：先試 Ollama 三主機 5s timeout，0 hits 才 escalate Gemini',
                })
            # 規則 2：錯誤率 > 10% → 建議跑 code review
            if ag['error_rate'] > 10:
                recommendations.append({
                    'severity': 'high', 'agent': ag['label'],
                    'finding': f"錯誤率 {ag['error_rate']:.1f}%（{ag['errors']}/{ag['calls']}）",
                    'suggestion': '觸發 Code Review Pipeline 找 regression（ai_calls 觀測台一鍵）',
                })
            # 規則 3：MCP 編排率 < 5% 但 calls 多 → 建議擴大 MCP 使用
            if mcp_calls_table_exists and ag['mcp_rate'] < 5 and ag['calls'] > 50:
                recommendations.append({
                    'severity': 'med', 'agent': ag['label'],
                    'finding': f"MCP 編排率僅 {ag['mcp_rate']:.1f}%，未善用外部工具",
                    'suggestion': '考慮加 MCP omnisearch / firecrawl 補強事實查證鏈',
                })
            # 規則 4：RAG 命中率高（≥40%）但有 saved_call=False 的多 → 提醒 feedback
            if ag['rag_rate'] >= 40 and ag['rag_hits'] >= 20:
                recommendations.append({
                    'severity': 'low', 'agent': ag['label'],
                    'finding': f"RAG 命中率 {ag['rag_rate']:.1f}%（{ag['rag_hits']} hits）— 知識庫貢獻度高",
                    'suggestion': '推 Telegram inline button 收集 feedback_score 強化 promotion gate',
                })
            # 規則 5：111 fallback 比例 > 20% → 警示
            if ag['calls'] > 0 and ag['ollama_111'] / max(ag['calls'], 1) > 0.20:
                fb_pct = ag['ollama_111'] / ag['calls'] * 100
                recommendations.append({
                    'severity': 'med', 'agent': ag['label'],
                    'finding': f"111 fallback 比例 {fb_pct:.0f}%（GCP 兩台不可達？）",
                    'suggestion': '檢查 mo.wooo.work/observability/host_health AIOps incidents',
                })

        return render_template(
            'admin/agent_orchestration.html',
            active_page='obs_agent_orchestration',
            hours=hours,
            agent_matrix=agent_matrix,
            mcp_matrix=mcp_matrix,
            recommendations=recommendations,
            overall={
                'total_calls': total_calls,
                'total_cost': total_cost,
                'total_tokens': total_tokens,
                'paid_calls': paid_calls,
                'local_calls': local_calls,
                'rag_hits': rag_hits,
                'paid_pct': (paid_calls / total_calls * 100) if total_calls else 0,
                'local_pct': (local_calls / total_calls * 100) if total_calls else 0,
                'rag_rate': (rag_hits / total_calls * 100) if total_calls else 0,
            },
            error=None,
        )
    except Exception as e:
        return render_template(
            'admin/agent_orchestration.html',
            active_page='obs_agent_orchestration', hours=hours,
            agent_matrix=[], mcp_matrix=[], recommendations=[], overall={},
            error='資料查詢暫時不可用，已切換安全空狀態。',
        )
    finally:
        session.close()


# ─────────────────────────────────────────────────────────────────────────────
# /observability/ai_calls — Phase 27 主入口
# ─────────────────────────────────────────────────────────────────────────────

@admin_observability_bp.route('/ai_calls')
@login_required
def ai_calls_dashboard():
    """ai_calls 表觀測 dashboard（24h 預設視窗）"""
    hours = int(request.args.get('hours', '24'))
    caller_filter = request.args.get('caller', '').strip()
    provider_filter = request.args.get('provider', '').strip()

    since = datetime.now() - timedelta(hours=hours)
    session = get_session()
    try:
        # 1. 總覽
        summary = session.execute(
            sa_text("""
                SELECT
                  COUNT(*) AS total_calls,
                  COALESCE(SUM(input_tokens + output_tokens), 0) AS total_tokens,
                  COALESCE(SUM(cost_usd), 0) AS total_cost,
                  COALESCE(AVG(duration_ms), 0) AS avg_duration,
                  COUNT(*) FILTER (WHERE status = 'ok') AS ok_calls,
                  COUNT(*) FILTER (WHERE status NOT IN ('ok','cache_only')) AS error_calls,
                  COUNT(*) FILTER (WHERE rag_hit) AS rag_hits,
                  COUNT(*) FILTER (WHERE cache_hit) AS cache_hits
                FROM ai_calls
                WHERE called_at >= :since
            """),
            {'since': since},
        ).fetchone()

        # 2. by provider
        by_provider = session.execute(
            sa_text("""
                SELECT provider, COUNT(*) AS calls,
                       COALESCE(SUM(input_tokens + output_tokens), 0) AS tokens,
                       COALESCE(SUM(cost_usd), 0) AS cost
                FROM ai_calls
                WHERE called_at >= :since
                GROUP BY provider
                ORDER BY tokens DESC
            """),
            {'since': since},
        ).fetchall()

        # 3. TOP 100 calls — Phase 33 Critic HIGH #2 修補：
        # 改用固定 SQL + 全綁參數，移除 f-string 動態 WHERE 拼接（防後人不慎注入）
        recent = session.execute(
            sa_text("""
                SELECT id, called_at, caller, provider, model,
                       input_tokens, output_tokens, duration_ms, status,
                       cost_usd, cache_hit, rag_hit
                FROM ai_calls
                WHERE called_at >= :since
                  AND (:caller_f = '' OR caller = :caller_f)
                  AND (:provider_f = '' OR provider = :provider_f)
                ORDER BY called_at DESC
                LIMIT 100
            """),
            {
                'since': since,
                'caller_f': caller_filter,
                'provider_f': provider_filter,
            },
        ).fetchall()

        # 4. caller 列表（給篩選 dropdown）
        callers = session.execute(
            sa_text("""
                SELECT DISTINCT caller FROM ai_calls
                WHERE called_at >= :since ORDER BY caller
            """),
            {'since': since},
        ).fetchall()

        # 5b. Phase 47 K-2: by model 細分（不只 provider，到實際 model）
        by_model = session.execute(
            sa_text("""
                SELECT model, provider, COUNT(*) AS calls,
                       COALESCE(SUM(input_tokens + output_tokens), 0) AS tokens,
                       COALESCE(SUM(cost_usd), 0) AS cost,
                       COALESCE(AVG(duration_ms), 0) AS avg_ms,
                       COUNT(*) FILTER (WHERE status NOT IN ('ok','cache_only')) AS errors
                FROM ai_calls
                WHERE called_at >= :since
                  AND model IS NOT NULL AND model != ''
                GROUP BY model, provider
                ORDER BY calls DESC
                LIMIT 15
            """),
            {'since': since},
        ).fetchall()

        # 5c. Phase 47 K-2: hourly 呼叫量趨勢（24 個 bucket）
        hourly_trend = session.execute(
            sa_text("""
                SELECT date_trunc('hour', called_at) AS hr,
                       COUNT(*) AS calls,
                       COALESCE(SUM(cost_usd), 0) AS cost,
                       COUNT(*) FILTER (WHERE status NOT IN ('ok','cache_only')) AS errors
                FROM ai_calls
                WHERE called_at >= NOW() - INTERVAL '24 hours'
                GROUP BY hr ORDER BY hr ASC
            """),
        ).fetchall()

        # 5d. Phase 47 K-2: agent_context 最近 10 筆（OpenClaw/Hermes 對話上下文）
        recent_contexts = session.execute(
            sa_text("""
                SELECT created_at, agent_name, context_key, ttl_minutes,
                       LEFT(context_val, 120) AS preview
                FROM agent_context
                ORDER BY created_at DESC LIMIT 10
            """),
        ).fetchall()

        # 5. Phase 39 D-3: caller × RAG 命中率 × MCP 編排率（跨表 JOIN）
        # mcp_calls / rag_query_log 尚未 migration 時安全降級，不曝露 DB exception。
        mcp_calls_table_exists = bool(session.execute(
            sa_text("SELECT to_regclass('public.mcp_calls') IS NOT NULL")
        ).scalar())
        rag_query_log_exists = bool(session.execute(
            sa_text("SELECT to_regclass('public.rag_query_log') IS NOT NULL")
        ).scalar())
        if mcp_calls_table_exists and rag_query_log_exists:
            caller_richness = session.execute(
                sa_text("""
                    SELECT a.caller,
                           COUNT(*) AS total_calls,
                           COUNT(*) FILTER (WHERE a.rag_hit) AS rag_hits,
                           COUNT(DISTINCT m.request_id) AS mcp_orchestrated,
                           COALESCE(AVG(rl.feedback_score) FILTER (WHERE rl.feedback_score IS NOT NULL), 0)
                               AS avg_rag_feedback,
                           COUNT(rl.feedback_score) AS feedback_count
                    FROM ai_calls a
                    LEFT JOIN mcp_calls m
                           ON m.request_id = a.request_id
                          AND m.called_at >= :since
                    LEFT JOIN rag_query_log rl
                           ON rl.caller = a.caller
                          AND rl.queried_at >= :since
                    WHERE a.called_at >= :since
                    GROUP BY a.caller
                    HAVING COUNT(*) >= 5
                    ORDER BY total_calls DESC
                    LIMIT 12
                """),
                {'since': since},
            ).fetchall()
        else:
            caller_richness = session.execute(
                sa_text("""
                    SELECT caller,
                           COUNT(*) AS total_calls,
                           COUNT(*) FILTER (WHERE rag_hit) AS rag_hits
                    FROM ai_calls
                    WHERE called_at >= :since
                    GROUP BY caller
                    HAVING COUNT(*) >= 5
                    ORDER BY total_calls DESC
                    LIMIT 12
                """),
                {'since': since},
            ).fetchall()

        return render_template(
            'admin/ai_calls_dashboard.html',
            active_page='obs_ai_calls',
            hours=hours,
            caller_filter=caller_filter,
            provider_filter=provider_filter,
            summary={
                'total_calls': int(summary[0] or 0),
                'total_tokens': int(summary[1] or 0),
                'total_cost': float(summary[2] or 0),
                'avg_duration': int(summary[3] or 0),
                'ok_calls': int(summary[4] or 0),
                'error_calls': int(summary[5] or 0),
                'rag_hits': int(summary[6] or 0),
                'cache_hits': int(summary[7] or 0),
            },
            by_provider=[
                {'provider': r[0], 'calls': int(r[1] or 0),
                 'tokens': int(r[2] or 0), 'cost': float(r[3] or 0)}
                for r in by_provider
            ],
            recent=[
                {'id': r[0], 'called_at': r[1].strftime('%H:%M:%S'),
                 'caller': r[2], 'provider': r[3], 'model': r[4],
                 'in_tokens': int(r[5] or 0), 'out_tokens': int(r[6] or 0),
                 'duration_ms': int(r[7] or 0), 'status': r[8],
                 'cost': float(r[9] or 0), 'cache_hit': bool(r[10]),
                 'rag_hit': bool(r[11])}
                for r in recent
            ],
            callers=[r[0] for r in callers],
            by_model=[
                {
                    'model': r[0], 'provider': r[1],
                    'calls': int(r[2] or 0), 'tokens': int(r[3] or 0),
                    'cost': float(r[4] or 0), 'avg_ms': int(r[5] or 0),
                    'errors': int(r[6] or 0),
                }
                for r in by_model
            ],
            hourly_trend=[
                {
                    'hour': r[0].strftime('%H:%M') if r[0] else '',
                    'calls': int(r[1] or 0),
                    'cost': float(r[2] or 0),
                    'errors': int(r[3] or 0),
                }
                for r in hourly_trend
            ],
            recent_contexts=[
                {
                    'created_at': r[0].strftime('%Y-%m-%d %H:%M') if r[0] else '',
                    'agent_name': r[1], 'context_key': r[2],
                    'ttl_minutes': int(r[3] or 0),
                    'preview': r[4] or '',
                }
                for r in recent_contexts
            ],
            caller_richness=[
                {
                    'caller': r[0],
                    'total_calls': int(r[1] or 0),
                    'rag_hits': int(r[2] or 0),
                    'mcp_orchestrated': int(r[3] or 0) if mcp_calls_table_exists and rag_query_log_exists else 0,
                    'avg_rag_feedback': round(float(r[4] or 0), 2) if mcp_calls_table_exists and rag_query_log_exists else 0,
                    'feedback_count': int(r[5] or 0) if mcp_calls_table_exists and rag_query_log_exists else 0,
                    'rag_hit_rate': (float(r[2] or 0) / float(r[1]) * 100) if r[1] else 0,
                    'mcp_rate': (float(r[3] or 0) / float(r[1]) * 100) if mcp_calls_table_exists and rag_query_log_exists and r[1] else 0,
                }
                for r in caller_richness
            ],
            error=None,
        )
    except Exception as e:
        return render_template(
            'admin/ai_calls_dashboard.html',
            active_page='obs_ai_calls',
            hours=hours, caller_filter=caller_filter,
            provider_filter=provider_filter,
            summary={}, by_provider=[], recent=[], callers=[], caller_richness=[],
            by_model=[], hourly_trend=[], recent_contexts=[],
            error='AI 呼叫資料暫時不可用，已切換安全空狀態。',
        )
    finally:
        session.close()


# ─────────────────────────────────────────────────────────────────────────────
# /observability/promotion_review — Phase 28 PromotionGate 待審核列表
# ─────────────────────────────────────────────────────────────────────────────

@admin_observability_bp.route('/promotion_review')
@login_required
def promotion_review_list():
    """awaiting_review episodes 列表（24h 內 reviewed_at IS NULL）

    Phase 39（D-1）：每筆 episode 自動跑 RAG 找 Top 3 相似已晉升 ai_insights，
    輔助人工判斷晉升價值。RAG fail-safe：失敗則 similar_insights=[]，不擋頁面。
    """
    session = get_session()
    try:
        rows = session.execute(
            sa_text("""
                SELECT id, created_at, episode_type, source_table, source_id,
                       distilled_text, quality_score, weight, promotion_status
                FROM learning_episodes
                WHERE promotion_status = 'awaiting_review'
                  AND reviewed_at IS NULL
                ORDER BY weight DESC, created_at ASC
                LIMIT 50
            """),
        ).fetchall()

        # ai_insights 全表大小（給「晉升後 KB 增長」對照）
        kb_size = 0
        try:
            kb_row = session.execute(
                sa_text("SELECT COUNT(*) FROM ai_insights"),
            ).fetchone()
            kb_size = int(kb_row[0] or 0)
        except Exception:
            pass

        episodes = [
            {'id': r[0], 'created_at': r[1].strftime('%Y-%m-%d %H:%M'),
             'episode_type': r[2], 'source_table': r[3], 'source_id': r[4],
             'distilled_text': (r[5] or '')[:600],
             'quality_score': float(r[6] or 0),
             'weight': float(r[7] or 0),
             'status': r[8],
             'similar_insights': []}
            for r in rows
        ]

        # Phase 39 D-1：對每筆 episode 跑 RAG 找 Top 3 相似已晉升
        try:
            from services.rag_service import rag_service
            for ep in episodes:
                try:
                    rag_result = rag_service.query(
                        text=ep['distilled_text'][:500],
                        caller='admin_promotion_review',
                        top_k=3,
                        threshold=0.7,
                    )
                    ep['similar_insights'] = [
                        {
                            'id': h.get('id'),
                            'insight_type': h.get('insight_type'),
                            'content': (h.get('content') or '')[:180],
                            'similarity': round(float(h.get('similarity', 0)), 3),
                            'created_at': h.get('created_at').strftime('%Y-%m-%d')
                                          if h.get('created_at') else '',
                        }
                        for h in rag_result.hits[:3]
                    ]
                except Exception:
                    pass  # 單筆 RAG 失敗不影響其餘
        except Exception:
            pass  # rag_service import 失敗（feature flag OFF）→ 略過

        # Phase 47 K-4: 蒸餾池 status 分布（30d）
        ep_distribution = session.execute(
            sa_text("""
                SELECT promotion_status, COUNT(*) AS cnt
                FROM learning_episodes
                WHERE created_at >= NOW() - INTERVAL '30 days'
                GROUP BY promotion_status ORDER BY cnt DESC
            """),
        ).fetchall()
        episode_distribution_30d = {r[0]: int(r[1] or 0) for r in ep_distribution}

        # Phase 47 K-4: ai_insights 最近 10 筆已晉升（type/created_at 視覺）
        latest_insights = session.execute(
            sa_text("""
                SELECT id, insight_type, period, product_sku, created_at,
                       LEFT(content, 160) AS preview
                FROM ai_insights
                ORDER BY created_at DESC LIMIT 10
            """),
        ).fetchall()

        # Phase 47 K-4: agent_strategy_weights TOP 12（OpenClaw 學習權重）
        strategy_weights = session.execute(
            sa_text("""
                SELECT strategy_key, weight, success_cnt, fail_cnt, updated_at
                FROM agent_strategy_weights
                ORDER BY (success_cnt + fail_cnt) DESC
                LIMIT 12
            """),
        ).fetchall()

        return render_template(
            'admin/promotion_review.html',
            active_page='obs_promotion_review',
            episodes=episodes,
            kb_size=kb_size,
            episode_distribution_30d=episode_distribution_30d,
            latest_insights=[
                {
                    'id': r[0], 'insight_type': r[1], 'period': r[2],
                    'product_sku': r[3],
                    'created_at': r[4].strftime('%Y-%m-%d %H:%M') if r[4] else '',
                    'preview': r[5] or '',
                }
                for r in latest_insights
            ],
            strategy_weights=[
                {
                    'strategy_key': r[0], 'weight': float(r[1] or 0),
                    'success': int(r[2] or 0), 'fail': int(r[3] or 0),
                    'updated_at': r[4].strftime('%Y-%m-%d') if r[4] else '',
                    'success_rate': (
                        float(r[2] or 0) / float((r[2] or 0) + (r[3] or 0)) * 100
                    ) if ((r[2] or 0) + (r[3] or 0)) > 0 else 0,
                }
                for r in strategy_weights
            ],
            error=None,
        )
    except Exception as e:
        return render_template(
            'admin/promotion_review.html',
            active_page='obs_promotion_review',
            episodes=[],
            kb_size=0,
            episode_distribution_30d={},
            latest_insights=[],
            strategy_weights=[],
            error='RAG 晉升資料暫時不可用，已切換安全空狀態。',
        )
    finally:
        session.close()


@admin_observability_bp.route('/promotion_review/approve/<int:episode_id>', methods=['POST'])
@login_required
def promotion_review_approve(episode_id: int):
    """Web 介面「通過」按鈕 — 等同於 Telegram pg_ok callback"""
    try:
        from services.learning_pipeline import promotion_gate, hash_human_approver
        # 從 Flask session 取（已過 @login_required）— 不信任 client header
        user = get_current_user() or {}
        username = user.get('username', 'web_admin')
        approver_hash = hash_human_approver(username)
        insight_id = promotion_gate.promote(episode_id)
        if insight_id:
            return jsonify({'ok': True, 'insight_id': insight_id, 'approver': approver_hash})
        return jsonify({'ok': False, 'error': 'promote failed'}), 500
    except Exception as e:
        return jsonify({'ok': False, 'error': str(e)[:200]}), 500


@admin_observability_bp.route('/promotion_review/reject/<int:episode_id>', methods=['POST'])
@login_required
def promotion_review_reject(episode_id: int):
    """Web 介面「拒絕」按鈕"""
    try:
        from services.learning_pipeline import promotion_gate
        ok = promotion_gate.reject(episode_id, 'rejected_human', detail='web admin reject')
        return jsonify({'ok': ok})
    except Exception as e:
        return jsonify({'ok': False, 'error': str(e)[:200]}), 500


# ─────────────────────────────────────────────────────────────────────────────
# /observability/quality_trend — Phase 25 caller 反饋趨勢視覺化
# ─────────────────────────────────────────────────────────────────────────────

@admin_observability_bp.route('/quality_trend')
@login_required
def quality_trend_dashboard():
    """caller × feedback 趨勢（30 日窗格）"""
    days = int(request.args.get('days', '30'))
    try:
        from services.feedback_quality_tracker import (
            compute_caller_quality_trend, get_caller_recommendations,
        )
        trends = compute_caller_quality_trend(days=days)
        recommendations = get_caller_recommendations(days=days)

        # 排序：avg_score 升序（最差先看）
        sorted_trends = sorted(
            trends.items(),
            key=lambda kv: kv[1].get('avg_score', 5),
        )

        # Phase 40 D-6: learning_episodes 各 status 分布（蒸餾池飽和度）
        episode_distribution = {}
        try:
            session = get_session()
            try:
                rows = session.execute(
                    sa_text("""
                        SELECT promotion_status, COUNT(*) AS cnt
                        FROM learning_episodes
                        WHERE created_at >= NOW() - INTERVAL ':days days'
                        GROUP BY promotion_status
                    """).bindparams(days=days),
                ).fetchall() if False else session.execute(
                    sa_text(f"""
                        SELECT promotion_status, COUNT(*) AS cnt
                        FROM learning_episodes
                        WHERE created_at >= NOW() - INTERVAL '{int(days)} days'
                        GROUP BY promotion_status
                    """),
                ).fetchall()
                episode_distribution = {r[0]: int(r[1] or 0) for r in rows}
            finally:
                session.close()
        except Exception:
            pass

        # Phase 40 D-6: 對最差 3 名 caller 跑 RAG 找根因建議
        rag_root_causes = []
        try:
            from services.rag_service import rag_service
            worst_3 = sorted_trends[:3] if len(sorted_trends) >= 3 else sorted_trends
            for caller, info in worst_3:
                if info.get('avg_score', 5) < 3.0 and info.get('total_feedback', 0) >= 3:
                    try:
                        q = (
                            f"caller {caller} 反饋分數低 平均 "
                            f"{info.get('avg_score', 0):.1f}/5 應採取什麼根因排查"
                        )
                        rag_result = rag_service.query(
                            text=q,
                            caller='admin_quality_trend',
                            top_k=2,
                            threshold=0.6,
                        )
                        if rag_result.hits:
                            rag_root_causes.append({
                                'caller': caller,
                                'avg_score': info.get('avg_score', 0),
                                'feedback_n': info.get('total_feedback', 0),
                                'hits': [
                                    {
                                        'id': h.get('id'),
                                        'insight_type': h.get('insight_type'),
                                        'content': (h.get('content') or '')[:200],
                                        'similarity': round(float(h.get('similarity', 0)), 3),
                                    }
                                    for h in rag_result.hits[:2]
                                ],
                            })
                    except Exception:
                        pass
        except Exception:
            pass

        # Phase 47 K-5: action_outcomes verdict 統計（ADR-012 閉環學習結果）
        action_outcomes_stats = []
        action_plans_status = []
        rag_overall_dist = []
        try:
            session = get_session()
            try:
                # action_outcomes verdict 分布（30d）
                ao_rows = session.execute(
                    sa_text(f"""
                        SELECT verdict, COUNT(*) AS cnt
                        FROM action_outcomes
                        WHERE created_at >= NOW() - INTERVAL '{int(days)} days'
                        GROUP BY verdict ORDER BY cnt DESC
                    """),
                ).fetchall()
                action_outcomes_stats = [{'verdict': r[0] or 'unknown', 'count': int(r[1] or 0)} for r in ao_rows]

                # action_plans status 分布（30d）
                ap_rows = session.execute(
                    sa_text(f"""
                        SELECT status, plan_type, COUNT(*) AS cnt
                        FROM action_plans
                        WHERE created_at >= NOW() - INTERVAL '{int(days)} days'
                        GROUP BY status, plan_type ORDER BY cnt DESC
                    """),
                ).fetchall()
                action_plans_status = [
                    {'status': r[0], 'plan_type': r[1] or 'misc', 'count': int(r[2] or 0)}
                    for r in ap_rows
                ]

                # rag_query_log 整體 feedback 分布（不只 caller-level，整體）
                rag_dist_rows = session.execute(
                    sa_text(f"""
                        SELECT feedback_score, COUNT(*) AS cnt
                        FROM rag_query_log
                        WHERE queried_at >= NOW() - INTERVAL '{int(days)} days'
                          AND feedback_score IS NOT NULL
                        GROUP BY feedback_score ORDER BY feedback_score
                    """),
                ).fetchall()
                rag_overall_dist = [{'score': int(r[0] or 0), 'count': int(r[1] or 0)} for r in rag_dist_rows]
            finally:
                session.close()
        except Exception:
            pass

        return render_template(
            'admin/quality_trend.html',
            active_page='obs_quality_trend',
            days=days,
            trends=[(c, info) for c, info in sorted_trends],
            recommendations=recommendations,
            episode_distribution=episode_distribution,
            rag_root_causes=rag_root_causes,
            action_outcomes_stats=action_outcomes_stats,
            action_plans_status=action_plans_status,
            rag_overall_dist=rag_overall_dist,
            error=None,
        )
    except Exception as e:
        return render_template(
            'admin/quality_trend.html',
            active_page='obs_quality_trend',
            days=days, trends=[], recommendations=[],
            episode_distribution={}, rag_root_causes=[],
            action_outcomes_stats=[], action_plans_status=[], rag_overall_dist=[],
            error='AI 品質資料暫時不可用，已切換安全空狀態。',
        )


# ─────────────────────────────────────────────────────────────────────────────
# /observability/budget — Phase 29 預算管理 + 手動 throttle
# ─────────────────────────────────────────────────────────────────────────────

@admin_observability_bp.route('/budget')
@login_required
def budget_dashboard():
    """ai_call_budgets 編輯 + 當月 spent 即時對比"""
    from datetime import datetime as _dt
    today = _dt.now()
    month_start = _dt(today.year, today.month, 1)

    session = get_session()
    try:
        ai_call_budgets_exists = bool(session.execute(
            sa_text("SELECT to_regclass('public.ai_call_budgets') IS NOT NULL")
        ).scalar())
        if ai_call_budgets_exists:
            budgets = session.execute(
                sa_text("""
                    SELECT id, period, provider, budget_usd, alert_pct, updated_at
                    FROM ai_call_budgets
                    ORDER BY period, provider NULLS FIRST
                """),
            ).fetchall()
        else:
            budgets = []

        spent_rows = session.execute(
            sa_text("""
                SELECT provider, COALESCE(SUM(cost_usd), 0) AS spent
                FROM ai_calls
                WHERE called_at >= :ms
                GROUP BY provider
            """),
            {'ms': month_start},
        ).fetchall()
        spent_map = {r[0]: float(r[1] or 0) for r in spent_rows}

        # throttle 狀態
        throttle_state = {}
        try:
            from services.cost_throttle_service import get_throttle_state
            throttle_state = get_throttle_state()
        except Exception:
            pass

        rows = []
        for b in budgets:
            provider = b[2]  # 可能 None（全供應商總額）
            spent = spent_map.get(provider, 0.0) if provider else sum(spent_map.values())
            budget_usd = float(b[3] or 0)
            ratio = (spent / budget_usd) if budget_usd > 0 else 0
            rows.append({
                'id': b[0], 'period': b[1], 'provider': provider or '(all)',
                'budget_usd': budget_usd, 'alert_pct': int(b[4] or 80),
                'spent': spent, 'ratio': ratio,
                'throttled': throttle_state.get(provider, {}).get('throttled', False) if provider else False,
                'updated_at': b[5].strftime('%Y-%m-%d %H:%M') if b[5] else '-',
            })

        # Phase 47 K-3: 30d daily cost trend by provider
        cost_30d = session.execute(
            sa_text("""
                SELECT date_trunc('day', called_at)::date AS d,
                       provider, COALESCE(SUM(cost_usd), 0) AS cost
                FROM ai_calls
                WHERE called_at >= NOW() - INTERVAL '30 days'
                GROUP BY d, provider
                ORDER BY d DESC, cost DESC
            """),
        ).fetchall()
        cost_trend_30d = []
        for r in cost_30d:
            cost_trend_30d.append({
                'date': r[0].strftime('%m-%d') if r[0] else '',
                'provider': r[1],
                'cost': float(r[2] or 0),
            })

        # Phase 55 S-3: 當月各 provider cost 分布（給圓餅圖用）
        provider_cost_month = session.execute(
            sa_text("""
                SELECT provider, COALESCE(SUM(cost_usd), 0) AS cost
                FROM ai_calls
                WHERE called_at >= :ms AND cost_usd > 0
                GROUP BY provider ORDER BY cost DESC
            """),
            {'ms': month_start},
        ).fetchall()

        # Phase 47 K-3: top 5 cost-burning caller (當月)
        top_cost_callers = session.execute(
            sa_text("""
                SELECT caller, COUNT(*) AS calls,
                       COALESCE(SUM(cost_usd), 0) AS cost,
                       COALESCE(SUM(input_tokens + output_tokens), 0) AS tokens
                FROM ai_calls
                WHERE called_at >= :ms
                  AND cost_usd > 0
                GROUP BY caller
                ORDER BY cost DESC LIMIT 5
            """),
            {'ms': month_start},
        ).fetchall()

        # Phase 47 K-3: ai_price_recommendations 7d 統計
        price_rec_7d = session.execute(
            sa_text("""
                SELECT strategy, COUNT(*) AS cnt,
                       COALESCE(AVG(confidence), 0) AS avg_conf
                FROM ai_price_recommendations
                WHERE created_at >= NOW() - INTERVAL '7 days'
                GROUP BY strategy ORDER BY cnt DESC
            """),
        ).fetchall()

        # Phase 39 D-4: RAG 自動建議策略（針對超 80% 的 row）
        budget_strategies = []
        over_threshold_rows = [r for r in rows if r.get('ratio', 0) >= 0.8]
        if over_threshold_rows:
            try:
                from services.rag_service import rag_service
                top_breach = max(over_threshold_rows, key=lambda r: r.get('ratio', 0))
                query_text = (
                    f"預算超出 alert_pct provider={top_breach['provider']} "
                    f"ratio={int(top_breach['ratio']*100)}% 應採取什麼節流策略"
                )
                rag_result = rag_service.query(
                    text=query_text,
                    caller='admin_budget_dashboard',
                    top_k=3,
                    threshold=0.65,
                )
                budget_strategies = [
                    {
                        'id': h.get('id'),
                        'insight_type': h.get('insight_type'),
                        'content': (h.get('content') or '')[:240],
                        'similarity': round(float(h.get('similarity', 0)), 3),
                    }
                    for h in rag_result.hits[:3]
                ]
            except Exception:
                pass

        return render_template(
            'admin/budget.html',
            active_page='obs_budget',
            rows=rows,
            budget_strategies=budget_strategies,
            cost_trend_30d=cost_trend_30d,
            top_cost_callers=[
                {
                    'caller': r[0], 'calls': int(r[1] or 0),
                    'cost': float(r[2] or 0), 'tokens': int(r[3] or 0),
                }
                for r in top_cost_callers
            ],
            provider_cost_month=[
                {'provider': r[0], 'cost': float(r[1] or 0)}
                for r in provider_cost_month
            ],
            price_rec_7d=[
                {
                    'strategy': r[0], 'count': int(r[1] or 0),
                    'avg_confidence': round(float(r[2] or 0), 3),
                }
                for r in price_rec_7d
            ],
            error=None,
        )
    except Exception as e:
        return render_template('admin/budget.html', active_page='obs_budget', rows=[],
                                budget_strategies=[], cost_trend_30d=[],
                                top_cost_callers=[], price_rec_7d=[],
                                provider_cost_month=[],
                                error='預算資料暫時不可用，已切換安全空狀態。')
    finally:
        session.close()


@admin_observability_bp.route('/ai_calls/trigger_code_review', methods=['POST'])
@login_required
def ai_calls_trigger_code_review():
    """Phase 40 D-7 (L2 自動化)：對高錯誤率時段觸發 Code Review Pipeline。

    用途：admin 在觀測台看到某 caller 錯誤率飆高時，一鍵觸發 5-step
         pipeline (read→hermes_scan→openclaw_summary→ea_decision→nemoton_act)
         在 daemon thread 自動審查最近 commit 變更檔案，找出可能的 regression。
    """
    try:
        import subprocess
        import threading
        from services.code_review_pipeline_service import CodeReviewPipeline

        # 取最新 commit + 變更檔案
        commit_sha = subprocess.check_output(
            ['git', 'rev-parse', 'HEAD'], stderr=subprocess.DEVNULL,
        ).decode().strip()
        changed = subprocess.check_output(
            ['git', 'diff-tree', '--no-commit-id', '--name-only', '-r', commit_sha],
            stderr=subprocess.DEVNULL,
        ).decode().strip().split('\n')
        changed = [f for f in changed if f]

        if not changed:
            return jsonify({'ok': False, 'error': '最新 commit 無變更檔案'}), 400

        pipeline = CodeReviewPipeline(
            commit_sha=commit_sha,
            changed_files=changed,
            branch='main',
            deploy_type='manual_observability',
        )
        threading.Thread(target=pipeline.run, daemon=True).start()
        return jsonify({
            'ok': True,
            'pipeline_id': pipeline.pipeline_id,
            'commit_sha': commit_sha[:8],
            'changed_files_count': len(changed),
            'message': f'已觸發 Code Review (pipeline_id={pipeline.pipeline_id}) 在背景執行，'
                       f'5 step 完成後會推 Telegram 通知。',
        })
    except Exception as e:
        return jsonify({'ok': False, 'error': f'{type(e).__name__}: {str(e)[:200]}'}), 500


@admin_observability_bp.route('/ppt_audit/trigger_aider_heal', methods=['POST'])
@login_required
def ppt_audit_trigger_aider_heal():
    """Phase 40 D-8 (L2 自動化)：對失敗 PPT audit 觸發 AiderHeal 修 generator。

    用途：admin 在觀測台看到 PPT vision audit 連續失敗時，一鍵觸發 AiderHeal
         自動修 services/ppt_generator.py（或對應 template generator），
         結果會 git push 到 main 觸發 CD 自動部署。
    """
    try:
        from services.aider_heal_executor import execute_code_fix
        data = request.json or {}
        error_msg = (data.get('error_msg') or '').strip()
        pptx_filename = (data.get('pptx_filename') or '').strip()
        if not error_msg:
            return jsonify({'ok': False, 'error': '需提供 error_msg'}), 400

        # 構造 context 給 AiderHeal
        context = {
            'error_type': 'ppt_vision_audit_failure',
            'error_message': error_msg[:500],
            'target_file': 'services/ppt_generator.py',
            'pptx_filename': pptx_filename,
            'triggered_by': 'admin_observability',
        }
        result = execute_code_fix(context)
        return jsonify({
            'ok': bool(getattr(result, 'success', False)),
            'action': getattr(result, 'action', None),
            'message': getattr(result, 'message', '') or '已派出 AiderHeal',
        })
    except Exception as e:
        return jsonify({'ok': False, 'error': f'{type(e).__name__}: {str(e)[:200]}'}), 500


@admin_observability_bp.route('/api/health_indicator')
@login_required
def health_indicator_api():
    """Phase 52 P-1：給 topbar 觀測台 indicator 用的輕量 JSON API。

    回傳當前是否有「需要關注」的事件：
    - 三主機掛掉
    - 待審 episode > 0
    - 過去 1h 錯誤率 ≥ 30%
    - 預算 ≥ 90%
    """
    try:
        session = get_session()
        try:
            # 三主機最新狀態
            host_unhealthy = 0
            try:
                rows = session.execute(
                    sa_text("""
                        WITH latest AS (
                            SELECT host_label,
                                   FIRST_VALUE(healthy) OVER (
                                       PARTITION BY host_label ORDER BY probed_at DESC
                                   ) AS healthy
                            FROM host_health_probes
                            WHERE probed_at >= NOW() - INTERVAL '1 hour'
                        )
                        SELECT host_label, BOOL_AND(NOT healthy) AS down
                        FROM latest
                        GROUP BY host_label
                    """),
                ).fetchall()
                host_unhealthy = sum(1 for r in rows if r[1])
            except Exception:
                pass

            # 待審 episode
            ep_pending = 0
            try:
                ep_pending = int(session.execute(
                    sa_text("SELECT COUNT(*) FROM learning_episodes WHERE promotion_status = 'awaiting_review' AND reviewed_at IS NULL"),
                ).fetchone()[0] or 0)
            except Exception:
                pass

            # 1h 錯誤率
            error_rate = 0
            try:
                row = session.execute(
                    sa_text("""
                        SELECT COUNT(*),
                               COUNT(*) FILTER (WHERE status NOT IN ('ok','cache_only'))
                        FROM ai_calls WHERE called_at >= NOW() - INTERVAL '1 hour'
                    """),
                ).fetchone()
                total = int(row[0] or 0)
                errs = int(row[1] or 0)
                error_rate = (errs / total * 100) if total > 20 else 0
            except Exception:
                pass

            # 預算告警（任一 ≥ 90%）
            budget_alert = False
            try:
                from datetime import datetime as _dt
                today = _dt.now()
                ms = _dt(today.year, today.month, 1)
                bgs = session.execute(
                    sa_text("""
                        SELECT b.budget_usd,
                               COALESCE((SELECT SUM(cost_usd) FROM ai_calls
                                         WHERE called_at >= :ms
                                           AND (b.provider IS NULL OR provider = b.provider)), 0) AS spent
                        FROM ai_call_budgets b
                    """),
                    {'ms': ms},
                ).fetchall()
                for budget, spent in bgs:
                    if budget and float(budget) > 0 and float(spent) / float(budget) >= 0.9:
                        budget_alert = True
                        break
            except Exception:
                pass

            alert_count = (
                host_unhealthy
                + (1 if ep_pending > 0 else 0)
                + (1 if error_rate >= 30 else 0)
                + (1 if budget_alert else 0)
            )
            return jsonify({
                'ok': True,
                'alert_count': alert_count,
                'host_unhealthy': host_unhealthy,
                'ep_pending': ep_pending,
                'error_rate_high': error_rate >= 30,
                'budget_alert': budget_alert,
                'tooltip': _build_indicator_tooltip(host_unhealthy, ep_pending, error_rate, budget_alert),
            })
        finally:
            session.close()
    except Exception as e:
        return jsonify({'ok': False, 'error': f'{type(e).__name__}: {str(e)[:200]}'}), 500


def _build_indicator_tooltip(host_unhealthy, ep_pending, error_rate, budget_alert) -> str:
    parts = []
    if host_unhealthy:
        parts.append(f"{host_unhealthy} 主機異常")
    if ep_pending > 0:
        parts.append(f"{ep_pending} 待審")
    if error_rate >= 30:
        parts.append(f"錯誤率 {error_rate:.0f}%")
    if budget_alert:
        parts.append("預算 ≥ 90%")
    if not parts:
        return "AI 觀測台（一切正常）"
    return "AI 觀測台 — " + " / ".join(parts)


def _latest_host_probe_unhealthy(host_label: str, window_minutes: int = 30) -> bool:
    """查 DB 最新 host_health_probe，作為 AutoHeal 按鈕的真實狀態來源。

    `_is_unhealthy()` 只代表 Ollama client 在 30 秒 TTL 內的記憶體標記；
    scheduler / 頁面 probe 寫入的是 `host_health_probes`。L2 AutoHeal 入口
    必須接受 DB 最新探針異常，避免 Telegram 或 Web 顯示主機已掛、按鈕卻拒絕執行。
    """
    if not host_label:
        return False
    session = get_session()
    try:
        row = session.execute(
            sa_text("""
                SELECT healthy
                FROM host_health_probes
                WHERE host_label = :label
                  AND probed_at >= NOW() - (:minutes || ' minutes')::interval
                ORDER BY probed_at DESC
                LIMIT 1
            """),
            {'label': host_label, 'minutes': int(window_minutes)},
        ).fetchone()
        return bool(row is not None and row[0] is False)
    except Exception:
        return False
    finally:
        session.close()


@admin_observability_bp.route('/playbooks/toggle/<int:playbook_id>', methods=['POST'])
@login_required
def playbook_toggle(playbook_id: int):
    """Phase 50 N-3：一鍵啟用/停用 playbook（is_active 翻轉）。

    用途：在 host_health 觀測台直接管理 AutoHeal playbook，
         不需 SSH 188 改 DB。
    """
    try:
        session = get_session()
        try:
            row = session.execute(
                sa_text("SELECT id, name, is_active FROM playbooks WHERE id = :id"),
                {'id': playbook_id},
            ).fetchone()
            if not row:
                return jsonify({'ok': False, 'error': f'playbook #{playbook_id} 不存在'}), 404
            new_active = not bool(row[2])
            session.execute(
                sa_text("UPDATE playbooks SET is_active = :a, updated_at = NOW() WHERE id = :id"),
                {'a': new_active, 'id': playbook_id},
            )
            session.commit()
            return jsonify({
                'ok': True,
                'playbook_id': playbook_id,
                'name': row[1],
                'is_active': new_active,
                'message': f'Playbook 「{row[1]}」已{"啟用" if new_active else "停用"}',
            })
        finally:
            session.close()
    except Exception as e:
        return jsonify({'ok': False, 'error': f'{type(e).__name__}: {str(e)[:200]}'}), 500


@admin_observability_bp.route('/host_health/trigger_autoheal', methods=['POST'])
@login_required
def host_health_trigger_autoheal():
    """Phase 40 D-9 (L2 自動化)：對掛掉的主機觸發 AutoHeal playbook。

    用途：admin 看到某台 Ollama 主機標記 unhealthy 時一鍵觸發 AutoHeal
         (ADR-013) 跑對應 playbook（DOCKER_RESTART / SSH_CMD / ALERT_ONLY）。

    安全：只能對已標記 unhealthy 的 host 觸發；不接受任意 host URL（防 SSRF）。
    """
    try:
        data = request.json or {}
        host_label = (data.get('host_label') or '').strip()
        from services.auto_heal_service import auto_heal_service
        from services.ollama_service import _is_unhealthy, OLLAMA_HOST_PRIMARY, OLLAMA_HOST_SECONDARY, OLLAMA_HOST_FALLBACK

        # 白名單對應
        host_map = {
            'Primary (GCP)': OLLAMA_HOST_PRIMARY,
            'Secondary (GCP)': OLLAMA_HOST_SECONDARY,
            'Fallback (111)': OLLAMA_HOST_FALLBACK,
        }
        host_url = host_map.get(host_label)
        if not host_url:
            return jsonify({'ok': False, 'error': f'未知 host_label: {host_label}'}), 400

        if not (_is_unhealthy(host_url) or _latest_host_probe_unhealthy(host_label)):
            return jsonify({
                'ok': False,
                'error': f'{host_label} 目前未標記異常，無需 AutoHeal',
            }), 400

        result = auto_heal_service.handle_exception(
            error_type='ollama_unhealthy',
            context={
                'host_label': host_label,
                'host_url': host_url,
                'error_message': f'Ollama host {host_label} ({host_url}) marked unhealthy',
                'triggered_by': 'admin_observability',
            },
        )
        return jsonify({
            'ok': bool(getattr(result, 'success', False)),
            'action': getattr(result, 'action', None),
            'message': getattr(result, 'message', '') or 'AutoHeal 已派遣',
        })
    except Exception as e:
        return jsonify({'ok': False, 'error': f'{type(e).__name__}: {str(e)[:200]}'}), 500


@admin_observability_bp.route('/budget/force_throttle', methods=['POST'])
@login_required
def budget_force_throttle():
    """Phase 39 D-4 (L2 自動化)：立即強制執行 cost_throttle evaluate（不等 hourly cron）。

    用途：admin 在觀測台看到 ratio 飆超 110% 時不需等下次 cron，
         直接點按鈕強制 re-evaluate 三主機 throttle 狀態（claude→gemini fallback 立即生效）。
    """
    try:
        from services.cost_throttle_service import (
            evaluate_throttle_status, is_cost_throttle_enabled,
        )
        if not is_cost_throttle_enabled():
            return jsonify({
                'ok': False,
                'error': 'COST_THROTTLE_ENABLED=false（先設環境變數）',
            }), 400
        new_state = evaluate_throttle_status()
        throttled = [p for p, s in new_state.items() if s.get('throttled')]
        return jsonify({
            'ok': True,
            'throttled_providers': throttled,
            'state': new_state,
            'message': f'已立即重算 throttle 狀態，被節流的 provider：{throttled or "（無）"}',
        })
    except Exception as e:
        return jsonify({'ok': False, 'error': f'{type(e).__name__}: {str(e)[:200]}'}), 500


@admin_observability_bp.route('/budget/update/<int:budget_id>', methods=['POST'])
@login_required
def budget_update(budget_id: int):
    """更新 budget_usd / alert_pct"""
    try:
        new_budget = float(request.json.get('budget_usd'))
        new_alert = int(request.json.get('alert_pct', 80))
        if new_budget <= 0 or not (1 <= new_alert <= 100):
            return jsonify({'ok': False, 'error': 'invalid range'}), 400

        session = get_session()
        try:
            session.execute(
                sa_text("""
                    UPDATE ai_call_budgets
                    SET budget_usd = :b, alert_pct = :a, updated_at = NOW()
                    WHERE id = :id
                """),
                {'b': new_budget, 'a': new_alert, 'id': budget_id},
            )
            session.commit()
            return jsonify({'ok': True})
        finally:
            session.close()
    except Exception as e:
        return jsonify({'ok': False, 'error': str(e)[:200]}), 500


# ─────────────────────────────────────────────────────────────────────────────
# /observability/ppt_audit_history — Phase 29 PPT 視覺審核歷史
# ─────────────────────────────────────────────────────────────────────────────

@admin_observability_bp.route('/ppt_audit_history')
@login_required
def ppt_audit_history():
    """掃 reports/ 目錄列近 7 日 .pptx 檔 + 從 ppt_audit_results 表讀 audit 歷史（Phase 38）"""
    import os
    import time
    reports_dir = 'reports'
    files = []
    audit_records = []
    error = None

    try:
        if not os.path.isdir(reports_dir):
            error = f'{reports_dir} 目錄不存在'
        else:
            cutoff = time.time() - 7 * 86400
            for f in os.listdir(reports_dir):
                if not f.lower().endswith('.pptx'):
                    continue
                full = os.path.join(reports_dir, f)
                # symlink 防護：reports/ 內不接受 symlink，避免目錄逃逸（Critic MEDIUM #2）
                if os.path.islink(full):
                    continue
                try:
                    mtime = os.path.getmtime(full)
                    if mtime >= cutoff:
                        files.append({
                            'name': f,
                            'size_kb': round(os.path.getsize(full) / 1024, 1),
                            'mtime': datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M'),
                            'mtime_ts': mtime,
                        })
                except OSError:
                    continue
            files.sort(key=lambda x: x['mtime_ts'], reverse=True)
    except Exception as e:
        error = f'{type(e).__name__}: {str(e)[:200]}'

    # Phase 38：讀過去 7 日 audit 歷史
    try:
        session = get_session()
        try:
            audit_rows = session.execute(
                sa_text("""
                    SELECT audited_at, pptx_filename, audit_status,
                           issues_count, confidence, duration_ms, error_msg
                    FROM ppt_audit_results
                    WHERE audited_at >= NOW() - INTERVAL '7 days'
                    ORDER BY audited_at DESC
                    LIMIT 100
                """),
            ).fetchall()
            audit_records = [
                {
                    'audited_at': r[0].strftime('%Y-%m-%d %H:%M'),
                    'pptx_filename': r[1],
                    'audit_status': r[2],
                    'issues_count': int(r[3] or 0),
                    'confidence': float(r[4] or 0),
                    'duration_ms': int(r[5] or 0),
                    'error_msg': r[6],
                }
                for r in audit_rows
            ]
        finally:
            session.close()
    except Exception:
        pass  # 表可能尚未 migration，失敗安全

    # PPT vision 啟用狀態
    try:
        from services.ppt_vision_service import is_ppt_vision_enabled
        vision_enabled = is_ppt_vision_enabled()
    except Exception:
        vision_enabled = False

    # Phase 47 K-6: 30d 統計 + top failure files
    audit_30d_stats = {}
    top_failure_files = []
    try:
        s_ppt = get_session()
        try:
            stat_row = s_ppt.execute(
                sa_text("""
                    SELECT COUNT(*),
                           COUNT(*) FILTER (WHERE audit_status = 'passed'),
                           COUNT(*) FILTER (WHERE audit_status = 'failed'),
                           COUNT(*) FILTER (WHERE audit_status = 'skipped'),
                           COUNT(*) FILTER (WHERE audit_status = 'error'),
                           COALESCE(AVG(confidence) FILTER (WHERE audit_status = 'passed'), 0),
                           COALESCE(SUM(issues_count), 0)
                    FROM ppt_audit_results
                    WHERE audited_at >= NOW() - INTERVAL '30 days'
                """),
            ).fetchone()
            total_30d = int(stat_row[0] or 0)
            audit_30d_stats = {
                'total': total_30d,
                'passed': int(stat_row[1] or 0),
                'failed': int(stat_row[2] or 0),
                'skipped': int(stat_row[3] or 0),
                'error': int(stat_row[4] or 0),
                'avg_confidence': round(float(stat_row[5] or 0), 3),
                'total_issues': int(stat_row[6] or 0),
                'pass_rate': (float(stat_row[1] or 0) / total_30d * 100) if total_30d else 0,
            }

            top_fail_rows = s_ppt.execute(
                sa_text("""
                    SELECT pptx_filename, COUNT(*) AS attempts,
                           SUM(issues_count) AS total_issues,
                           MAX(audited_at) AS last_audit
                    FROM ppt_audit_results
                    WHERE audit_status IN ('failed', 'error')
                      AND audited_at >= NOW() - INTERVAL '30 days'
                    GROUP BY pptx_filename
                    ORDER BY attempts DESC, total_issues DESC LIMIT 10
                """),
            ).fetchall()
            top_failure_files = [
                {
                    'filename': r[0], 'attempts': int(r[1] or 0),
                    'total_issues': int(r[2] or 0),
                    'last_audit': r[3].strftime('%Y-%m-%d %H:%M') if r[3] else '',
                }
                for r in top_fail_rows
            ]
        finally:
            s_ppt.close()
    except Exception:
        pass

    # Phase 41 E-2: 對最近 3 筆 failed audit 跑 RAG 找相似修法
    rag_fixes = []
    failed_records = [r for r in audit_records if r.get('audit_status') in ('failed', 'error')][:3]
    if failed_records:
        try:
            from services.rag_service import rag_service
            for fr in failed_records:
                try:
                    err_text = fr.get('error_msg') or 'PPT vision audit failed'
                    rag_result = rag_service.query(
                        text=f"PPT 視覺審核失敗 {err_text[:200]} 怎麼修",
                        caller='admin_ppt_audit',
                        top_k=2,
                        threshold=0.6,
                    )
                    if rag_result.hits:
                        rag_fixes.append({
                            'pptx_filename': fr.get('pptx_filename'),
                            'audited_at': fr.get('audited_at'),
                            'error_msg': (err_text or '')[:160],
                            'hits': [
                                {
                                    'id': h.get('id'),
                                    'insight_type': h.get('insight_type'),
                                    'content': (h.get('content') or '')[:200],
                                    'similarity': round(float(h.get('similarity', 0)), 3),
                                }
                                for h in rag_result.hits[:2]
                            ],
                        })
                except Exception:
                    pass
        except Exception:
            pass

    return render_template(
        'admin/ppt_audit_history.html',
        active_page='obs_ppt_audit',
        files=files,
        audit_records=audit_records,
        rag_fixes=rag_fixes,
        audit_30d_stats=audit_30d_stats,
        top_failure_files=top_failure_files,
        vision_enabled=vision_enabled,
        error=error,
    )


# ─────────────────────────────────────────────────────────────────────────────
# /observability/host_health — 三主機 + MCP 健康度
# ─────────────────────────────────────────────────────────────────────────────

@admin_observability_bp.route('/host_health')
@login_required
def host_health_dashboard():
    """三主機 Ollama + 4 個 MCP server 即時健康（同時寫入 host_health_probes 留歷史）"""
    import time as _time
    ollama_hosts = []
    probe_records = []  # 收集本次 probe 結果以批次寫 DB
    try:
        from services.ollama_service import (
            OLLAMA_HOST_PRIMARY, OLLAMA_HOST_SECONDARY, OLLAMA_HOST_FALLBACK,
            _is_unhealthy, _unhealthy_marks,
        )
        import requests as _r
        for label, host in [
            ('Primary (GCP)', OLLAMA_HOST_PRIMARY),
            ('Secondary (GCP)', OLLAMA_HOST_SECONDARY),
            ('Fallback (111)', OLLAMA_HOST_FALLBACK),
        ]:
            entry = {'label': label, 'host': host, 'healthy': False,
                     'unhealthy_mark': _is_unhealthy(host), 'models': []}
            t0 = _time.monotonic()
            err = None
            try:
                resp = _r.get(f"{host.rstrip('/')}/api/tags", timeout=3)
                if resp.status_code == 200:
                    entry['healthy'] = True
                    entry['models'] = [
                        m.get('name', '') for m in resp.json().get('models', [])
                    ][:15]
                else:
                    err = f"HTTP {resp.status_code}"
            except Exception as e:
                err = f"{type(e).__name__}: {str(e)[:200]}"
            response_ms = int((_time.monotonic() - t0) * 1000)
            probe_records.append({
                'host_label': label, 'host_url': host, 'healthy': entry['healthy'],
                'unhealthy_mark': entry['unhealthy_mark'],
                'models_count': len(entry['models']), 'response_ms': response_ms,
                'error_msg': err,
            })
            ollama_hosts.append(entry)
    except Exception:
        pass

    # Phase 38：寫入 host_health_probes 留歷史（失敗安全，不擋頁面渲染）
    if probe_records:
        try:
            _session = get_session()
            try:
                for rec in probe_records:
                    _session.execute(
                        sa_text("""
                            INSERT INTO host_health_probes
                                (host_label, host_url, healthy, unhealthy_mark,
                                 models_count, response_ms, error_msg)
                            VALUES
                                (:host_label, :host_url, :healthy, :unhealthy_mark,
                                 :models_count, :response_ms, :error_msg)
                        """),
                        rec,
                    )
                _session.commit()
            finally:
                _session.close()
        except Exception:
            pass  # DB 寫入失敗不影響頁面顯示

    # MCP server 健康
    mcp_status = {}
    try:
        from services.mcp_router import mcp_router
        mcp_status = mcp_router.health_check()
    except Exception:
        pass

    # cost throttle 狀態
    throttle_state = {}
    try:
        from services.cost_throttle_service import get_throttle_state
        throttle_state = get_throttle_state()
    except Exception:
        pass

    # Phase 38：讀過去 24h 三主機健康歷史（給趨勢卡片）
    health_history = []
    mcp_24h = []  # Phase 39 D-2: MCP 24h 各 server 工作量
    aiops_summary = {}  # Phase 39 D-5: incidents + heal_logs 7d 統計
    try:
        _session2 = get_session()
        try:
            history_rows = _session2.execute(
                sa_text("""
                    SELECT host_label,
                           COUNT(*) FILTER (WHERE healthy)            AS up_count,
                           COUNT(*) FILTER (WHERE NOT healthy)        AS down_count,
                           COALESCE(AVG(response_ms) FILTER (WHERE healthy), 0) AS avg_ms,
                           COUNT(*) AS total
                    FROM host_health_probes
                    WHERE probed_at >= NOW() - INTERVAL '24 hours'
                    GROUP BY host_label
                    ORDER BY host_label
                """),
            ).fetchall()
            health_history = [
                {
                    'host_label': r[0],
                    'up_count': int(r[1] or 0),
                    'down_count': int(r[2] or 0),
                    'avg_ms': int(r[3] or 0),
                    'total': int(r[4] or 0),
                    'uptime_pct': (float(r[1] or 0) / float(r[4]) * 100) if r[4] else 0,
                }
                for r in history_rows
            ]

            # Phase 39 D-5：incidents + heal_logs 過去 7d 統計
            try:
                inc_rows = _session2.execute(
                    sa_text("""
                        SELECT
                            COUNT(*) AS total_incidents,
                            COUNT(*) FILTER (WHERE status = 'open') AS open_count,
                            COUNT(*) FILTER (WHERE status = 'resolved') AS resolved_count,
                            COUNT(*) FILTER (WHERE severity = 'P0') AS p0_count,
                            COUNT(*) FILTER (WHERE severity = 'P1') AS p1_count
                        FROM incidents
                        WHERE created_at >= NOW() - INTERVAL '7 days'
                    """),
                ).fetchone()
                heal_rows = _session2.execute(
                    sa_text("""
                        SELECT
                            COUNT(*) AS total_heals,
                            COUNT(*) FILTER (WHERE result = 'success') AS heal_success,
                            COUNT(*) FILTER (WHERE result = 'failed') AS heal_failed,
                            COALESCE(AVG(duration_ms) FILTER (WHERE result = 'success'), 0) AS avg_ms
                        FROM heal_logs
                        WHERE created_at >= NOW() - INTERVAL '7 days'
                    """),
                ).fetchone()
                aiops_summary = {
                    'incidents_total': int(inc_rows[0] or 0),
                    'incidents_open': int(inc_rows[1] or 0),
                    'incidents_resolved': int(inc_rows[2] or 0),
                    'incidents_p0': int(inc_rows[3] or 0),
                    'incidents_p1': int(inc_rows[4] or 0),
                    'heals_total': int(heal_rows[0] or 0),
                    'heals_success': int(heal_rows[1] or 0),
                    'heals_failed': int(heal_rows[2] or 0),
                    'heals_avg_ms': int(heal_rows[3] or 0),
                    'heal_success_rate': (
                        float(heal_rows[1] or 0) / float(heal_rows[0]) * 100
                    ) if heal_rows[0] else 0,
                }

                # Phase 54 R-3: heal 7d daily success rate sparkline
                heal_daily = _session2.execute(
                    sa_text("""
                        SELECT date_trunc('day', created_at)::date AS d,
                               COUNT(*) AS total,
                               COUNT(*) FILTER (WHERE result = 'success') AS ok
                        FROM heal_logs
                        WHERE created_at >= NOW() - INTERVAL '7 days'
                        GROUP BY d ORDER BY d ASC
                    """),
                ).fetchall()
                aiops_summary['heal_sparkline'] = [
                    {
                        'date': r[0].strftime('%m-%d') if r[0] else '',
                        'total': int(r[1] or 0),
                        'ok': int(r[2] or 0),
                        'rate': (float(r[2] or 0) / float(r[1]) * 100) if r[1] else 0,
                    }
                    for r in heal_daily
                ]
            except Exception:
                aiops_summary = {}

            # Phase 39 D-2：MCP 24h 工作量（每個 server）
            mcp_rows = _session2.execute(
                sa_text("""
                    SELECT server,
                           COUNT(*) AS total_calls,
                           COUNT(*) FILTER (WHERE status = 'ok') AS ok_calls,
                           COUNT(*) FILTER (WHERE cache_hit) AS cache_hits,
                           COALESCE(SUM(cost_usd), 0) AS total_cost,
                           COALESCE(AVG(duration_ms), 0) AS avg_ms,
                           COUNT(DISTINCT tool) AS tools_used
                    FROM mcp_calls
                    WHERE called_at >= NOW() - INTERVAL '24 hours'
                    GROUP BY server
                    ORDER BY total_calls DESC
                """),
            ).fetchall()
            mcp_24h = [
                {
                    'server': r[0],
                    'total_calls': int(r[1] or 0),
                    'ok_calls': int(r[2] or 0),
                    'cache_hits': int(r[3] or 0),
                    'total_cost': float(r[4] or 0),
                    'avg_ms': int(r[5] or 0),
                    'tools_used': int(r[6] or 0),
                    'success_rate': (float(r[2] or 0) / float(r[1]) * 100) if r[1] else 0,
                    'cache_rate': (float(r[3] or 0) / float(r[1]) * 100) if r[1] else 0,
                }
                for r in mcp_rows
            ]
        finally:
            _session2.close()
    except Exception:
        pass  # 表可能尚未 migration，失敗安全

    # Phase 47 K-1: incidents + heal_logs 詳細列表 + playbooks 排行 + backup + embed queue
    recent_incidents = []
    recent_heals = []
    playbook_ranking = []
    backup_history = []
    embed_queue_pending = 0
    embed_queue_failed = 0
    try:
        s3 = get_session()
        try:
            inc_rows = s3.execute(
                sa_text("""
                    SELECT id, created_at, task_name, error_type, severity,
                           status, error_message, retry_count, resolved_at
                    FROM incidents
                    ORDER BY created_at DESC LIMIT 10
                """),
            ).fetchall()
            recent_incidents = [
                {
                    'id': r[0], 'created_at': r[1].strftime('%Y-%m-%d %H:%M'),
                    'task_name': r[2], 'error_type': r[3], 'severity': r[4],
                    'status': r[5], 'error_message': (r[6] or '')[:200],
                    'retry_count': int(r[7] or 0),
                    'resolved_at': r[8].strftime('%Y-%m-%d %H:%M') if r[8] else None,
                }
                for r in inc_rows
            ]
            heal_rows = s3.execute(
                sa_text("""
                    SELECT h.id, h.created_at, h.action_type, h.result,
                           h.duration_ms, h.action_detail, h.incident_id,
                           i.error_type
                    FROM heal_logs h
                    LEFT JOIN incidents i ON i.id = h.incident_id
                    ORDER BY h.created_at DESC LIMIT 10
                """),
            ).fetchall()
            recent_heals = [
                {
                    'id': r[0], 'created_at': r[1].strftime('%Y-%m-%d %H:%M'),
                    'action_type': r[2], 'result': r[3],
                    'duration_ms': int(r[4] or 0),
                    'action_detail': (r[5] or '')[:160],
                    'incident_id': r[6], 'error_type': r[7],
                }
                for r in heal_rows
            ]

            # playbooks 庫排行（success_count + fail_count + 是否 active）
            pb_rows = s3.execute(
                sa_text("""
                    SELECT id, name, error_type, action_type, severity_min,
                           success_count, fail_count, is_active, cooldown_min
                    FROM playbooks
                    ORDER BY (success_count + fail_count) DESC, success_count DESC
                    LIMIT 12
                """),
            ).fetchall()
            playbook_ranking = [
                {
                    'id': int(r[0]),
                    'name': r[1], 'error_type': r[2], 'action_type': r[3],
                    'severity': r[4], 'success': int(r[5] or 0),
                    'fail': int(r[6] or 0), 'is_active': bool(r[7]),
                    'cooldown_min': int(r[8] or 0),
                    'success_rate': (
                        float(r[5] or 0) / float((r[5] or 0) + (r[6] or 0)) * 100
                    ) if ((r[5] or 0) + (r[6] or 0)) > 0 else 0,
                }
                for r in pb_rows
            ]

            # backup_log 7d 歷史
            bk_rows = s3.execute(
                sa_text("""
                    SELECT created_at, backup_type, status, file_size_bytes,
                           duration_seconds, error_message
                    FROM backup_log
                    WHERE created_at >= NOW() - INTERVAL '7 days'
                    ORDER BY created_at DESC LIMIT 10
                """),
            ).fetchall()
            backup_history = [
                {
                    'created_at': r[0].strftime('%Y-%m-%d %H:%M'),
                    'backup_type': r[1], 'status': r[2],
                    'size_mb': round(float(r[3] or 0) / (1024 * 1024), 1),
                    'duration_s': round(float(r[4] or 0), 1),
                    'error': (r[5] or '')[:120],
                }
                for r in bk_rows
            ]

            # embedding_retry_queue pending / failed
            embed_q = s3.execute(
                sa_text("""
                    SELECT
                        COUNT(*) FILTER (WHERE status = 'pending'),
                        COUNT(*) FILTER (WHERE status = 'failed')
                    FROM embedding_retry_queue
                """),
            ).fetchone()
            embed_queue_pending = int(embed_q[0] or 0)
            embed_queue_failed = int(embed_q[1] or 0)
        finally:
            s3.close()
    except Exception:
        pass

    return render_template(
        'admin/host_health.html',
        active_page='obs_host_health',
        ollama_hosts=ollama_hosts,
        mcp_status=mcp_status,
        throttle_state=throttle_state,
        health_history=health_history,
        mcp_24h=mcp_24h,
        aiops_summary=aiops_summary,
        recent_incidents=recent_incidents,
        recent_heals=recent_heals,
        playbook_ranking=playbook_ranking,
        backup_history=backup_history,
        embed_queue_pending=embed_queue_pending,
        embed_queue_failed=embed_queue_failed,
    )