ewoooc/services/feedback_quality_tracker.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
services/feedback_quality_tracker.py
Operation Ollama-First v5.0 / Phase 25 — 反饋環深化（caller-level quality 趨勢）

設計原則：
- 純 SQL 統計，零 LLM 成本（不跑 LLM 評估反饋）
- 從 rag_query_log.feedback_score (1-5) + learning_episodes.promotion_status
  推算每個 caller 的近 7/30 日反饋趨勢
- 持續 👎 ≥ N 次 → 標記 caller 為「需檢視」（給 token 日報 + ROI 月報用）
- 持續 👍 → 提升 caller 信任分
- 不直接改 caller 行為（避免循環自動修正失控）；只產出「建議」給統帥

Public API:
- compute_caller_quality_trend() — 取近 N 日各 caller 反饋摘要
- get_caller_recommendations() — 給 token 日報 Section 6 用
- should_demote_caller(caller) — 判斷是否該降權（持續 👎）
"""

from __future__ import annotations
import logging
from datetime import datetime, timedelta
from typing import Dict, Any, List, Optional

logger = logging.getLogger(__name__)


# ─────────────────────────────────────────────────────────────────────────────
# 反饋規則閾值（純 Python 常數，可調）
# ─────────────────────────────────────────────────────────────────────────────
DEMOTE_THUMBS_DOWN_THRESHOLD = 5      # 7 日內 👎 (score=1) ≥ 5 次 → 建議檢視
DEMOTE_AVG_SCORE_THRESHOLD = 2.5       # 7 日 avg score < 2.5 → 建議檢視
PROMOTE_THUMBS_UP_THRESHOLD = 10       # 7 日內 👍 (score=5) ≥ 10 次 → 信任分提升
PROMOTE_AVG_SCORE_THRESHOLD = 4.5      # 7 日 avg score >= 4.5 → 信任分提升
TREND_DAYS = 7                         # 趨勢預設窗格


# ─────────────────────────────────────────────────────────────────────────────
# Public API
# ─────────────────────────────────────────────────────────────────────────────
def compute_caller_quality_trend(
    days: int = TREND_DAYS,
) -> Dict[str, Dict[str, Any]]:
    """取近 N 日各 caller 的反饋趨勢摘要。

    Returns:
        {
            'caller_name': {
                'total_feedback': N,
                'thumbs_up': X,        # score = 5
                'thumbs_down': Y,      # score = 1
                'avg_score': float,
                'trend': 'positive' | 'neutral' | 'negative' | 'no_data',
            }
        }
    """
    try:
        from sqlalchemy import text as sa_text
        from database.manager import get_session
    except Exception as exc:
        logger.warning('[FeedbackTracker] DB import failed: %s', exc)
        return {}

    since = datetime.now() - timedelta(days=days)
    session = get_session()
    try:
        rows = session.execute(
            sa_text("""
                SELECT
                  caller,
                  COUNT(*) FILTER (WHERE feedback_score IS NOT NULL) AS total_feedback,
                  COUNT(*) FILTER (WHERE feedback_score = 5) AS thumbs_up,
                  COUNT(*) FILTER (WHERE feedback_score = 1) AS thumbs_down,
                  AVG(feedback_score) FILTER (WHERE feedback_score IS NOT NULL) AS avg_score
                FROM rag_query_log
                WHERE queried_at >= :since
                GROUP BY caller
                HAVING COUNT(*) FILTER (WHERE feedback_score IS NOT NULL) > 0
            """),
            {'since': since},
        ).fetchall()
    except Exception as exc:
        logger.warning('[FeedbackTracker] SQL failed (rag_query_log 可能未建): %s', exc)
        session.close()
        return {}
    finally:
        session.close()

    result: Dict[str, Dict[str, Any]] = {}
    for r in rows:
        caller = r[0]
        total = int(r[1] or 0)
        thumbs_up = int(r[2] or 0)
        thumbs_down = int(r[3] or 0)
        avg_score = float(r[4] or 0)

        # 推估趨勢
        if total < 3:
            trend = 'no_data'
        elif avg_score >= PROMOTE_AVG_SCORE_THRESHOLD:
            trend = 'positive'
        elif avg_score < DEMOTE_AVG_SCORE_THRESHOLD:
            trend = 'negative'
        else:
            trend = 'neutral'

        result[caller] = {
            'total_feedback': total,
            'thumbs_up': thumbs_up,
            'thumbs_down': thumbs_down,
            'avg_score': round(avg_score, 2),
            'trend': trend,
        }
    return result


def get_caller_recommendations(days: int = TREND_DAYS) -> List[Dict[str, Any]]:
    """給 token 日報 / ROI 月報 Section 6 用。

    Returns:
        [
            {'caller': '...', 'action': 'review' | 'promote', 'reason': '...'},
            ...
        ]
    """
    trends = compute_caller_quality_trend(days=days)
    recommendations = []

    for caller, info in trends.items():
        # 規則 1：連續 👎 ≥ 5 → 建議檢視
        if info['thumbs_down'] >= DEMOTE_THUMBS_DOWN_THRESHOLD:
            recommendations.append({
                'caller': caller,
                'action': 'review',
                'reason': (
                    f'近 {days} 日 👎 反饋 {info["thumbs_down"]} 次 '
                    f'(avg {info["avg_score"]}/5) — 建議統帥檢視 prompt 或切換 model'
                ),
            })
            continue

        # 規則 2：avg < 2.5 + 樣本足 → 建議檢視
        if info['trend'] == 'negative' and info['total_feedback'] >= 5:
            recommendations.append({
                'caller': caller,
                'action': 'review',
                'reason': f'近 {days} 日 avg={info["avg_score"]}/5 過低 (n={info["total_feedback"]})',
            })
            continue

        # 規則 3：👍 ≥ 10 + avg ≥ 4.5 → 建議升權
        if (info['thumbs_up'] >= PROMOTE_THUMBS_UP_THRESHOLD
                and info['trend'] == 'positive'):
            recommendations.append({
                'caller': caller,
                'action': 'promote',
                'reason': (
                    f'近 {days} 日 👍 反饋 {info["thumbs_up"]} 次 '
                    f'(avg {info["avg_score"]}/5) — 可考慮關閉 Gemini fallback 純走 Ollama'
                ),
            })

    return recommendations


def should_demote_caller(caller: str, days: int = TREND_DAYS) -> bool:
    """判斷某 caller 是否應該降權（持續 👎）。

    用於 ai_call_logger 啟動 / RAG worker 動態調整 — 但戰役預設不啟用。
    """
    trends = compute_caller_quality_trend(days=days)
    info = trends.get(caller)
    if not info or info['total_feedback'] < 5:
        return False
    return (info['thumbs_down'] >= DEMOTE_THUMBS_DOWN_THRESHOLD
            or info['avg_score'] < DEMOTE_AVG_SCORE_THRESHOLD)


def render_quality_summary(trends: Dict[str, Dict[str, Any]]) -> str:
    """渲染反饋趨勢摘要（給 token 日報 / ROI 月報用）"""
    if not trends:
        return '（無反饋資料）'

    lines = []
    sorted_callers = sorted(trends.items(), key=lambda kv: kv[1]['avg_score'])
    for caller, info in sorted_callers[:10]:  # TOP 10 by lowest avg
        emoji = {'positive': '✅', 'negative': '⚠️', 'neutral': '➖', 'no_data': '❓'}[info['trend']]
        lines.append(
            f"  {emoji} {caller}: avg {info['avg_score']:.2f}/5 "
            f"(👍{info['thumbs_up']} 👎{info['thumbs_down']} n={info['total_feedback']})"
        )
    return '\n'.join(lines)


__all__ = [
    'compute_caller_quality_trend',
    'get_caller_recommendations',
    'should_demote_caller',
    'render_quality_summary',
    'DEMOTE_THUMBS_DOWN_THRESHOLD',
    'DEMOTE_AVG_SCORE_THRESHOLD',
    'PROMOTE_THUMBS_UP_THRESHOLD',
    'PROMOTE_AVG_SCORE_THRESHOLD',
]