#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ services/feedback_quality_tracker.py Operation Ollama-First v5.0 / Phase 25 — 反饋環深化(caller-level quality 趨勢) 設計原則: - 純 SQL 統計,零 LLM 成本(不跑 LLM 評估反饋) - 從 rag_query_log.feedback_score (1-5) + learning_episodes.promotion_status 推算每個 caller 的近 7/30 日反饋趨勢 - 持續 👎 ≥ N 次 → 標記 caller 為「需檢視」(給 token 日報 + ROI 月報用) - 持續 👍 → 提升 caller 信任分 - 不直接改 caller 行為(避免循環自動修正失控);只產出「建議」給統帥 Public API: - compute_caller_quality_trend() — 取近 N 日各 caller 反饋摘要 - get_caller_recommendations() — 給 token 日報 Section 6 用 - should_demote_caller(caller) — 判斷是否該降權(持續 👎) """ from __future__ import annotations import logging from datetime import datetime, timedelta from typing import Dict, Any, List, Optional logger = logging.getLogger(__name__) # ───────────────────────────────────────────────────────────────────────────── # 反饋規則閾值(純 Python 常數,可調) # ───────────────────────────────────────────────────────────────────────────── DEMOTE_THUMBS_DOWN_THRESHOLD = 5 # 7 日內 👎 (score=1) ≥ 5 次 → 建議檢視 DEMOTE_AVG_SCORE_THRESHOLD = 2.5 # 7 日 avg score < 2.5 → 建議檢視 PROMOTE_THUMBS_UP_THRESHOLD = 10 # 7 日內 👍 (score=5) ≥ 10 次 → 信任分提升 PROMOTE_AVG_SCORE_THRESHOLD = 4.5 # 7 日 avg score >= 4.5 → 信任分提升 TREND_DAYS = 7 # 趨勢預設窗格 # ───────────────────────────────────────────────────────────────────────────── # Public API # ───────────────────────────────────────────────────────────────────────────── def compute_caller_quality_trend( days: int = TREND_DAYS, ) -> Dict[str, Dict[str, Any]]: """取近 N 日各 caller 的反饋趨勢摘要。 Returns: { 'caller_name': { 'total_feedback': N, 'thumbs_up': X, # score = 5 'thumbs_down': Y, # score = 1 'avg_score': float, 'trend': 'positive' | 'neutral' | 'negative' | 'no_data', } } """ try: from sqlalchemy import text as sa_text from database.manager import get_session except Exception as exc: logger.warning('[FeedbackTracker] DB import failed: %s', exc) return {} since = datetime.now() - timedelta(days=days) session = get_session() try: rows = session.execute( sa_text(""" SELECT caller, COUNT(*) FILTER (WHERE feedback_score IS NOT NULL) AS total_feedback, COUNT(*) FILTER (WHERE feedback_score = 5) AS thumbs_up, COUNT(*) FILTER (WHERE feedback_score = 1) AS thumbs_down, AVG(feedback_score) FILTER (WHERE feedback_score IS NOT NULL) AS avg_score FROM rag_query_log WHERE queried_at >= :since GROUP BY caller HAVING COUNT(*) FILTER (WHERE feedback_score IS NOT NULL) > 0 """), {'since': since}, ).fetchall() except Exception as exc: logger.warning('[FeedbackTracker] SQL failed (rag_query_log 可能未建): %s', exc) session.close() return {} finally: session.close() result: Dict[str, Dict[str, Any]] = {} for r in rows: caller = r[0] total = int(r[1] or 0) thumbs_up = int(r[2] or 0) thumbs_down = int(r[3] or 0) avg_score = float(r[4] or 0) # 推估趨勢 if total < 3: trend = 'no_data' elif avg_score >= PROMOTE_AVG_SCORE_THRESHOLD: trend = 'positive' elif avg_score < DEMOTE_AVG_SCORE_THRESHOLD: trend = 'negative' else: trend = 'neutral' result[caller] = { 'total_feedback': total, 'thumbs_up': thumbs_up, 'thumbs_down': thumbs_down, 'avg_score': round(avg_score, 2), 'trend': trend, } return result def get_caller_recommendations(days: int = TREND_DAYS) -> List[Dict[str, Any]]: """給 token 日報 / ROI 月報 Section 6 用。 Returns: [ {'caller': '...', 'action': 'review' | 'promote', 'reason': '...'}, ... ] """ trends = compute_caller_quality_trend(days=days) recommendations = [] for caller, info in trends.items(): # 規則 1:連續 👎 ≥ 5 → 建議檢視 if info['thumbs_down'] >= DEMOTE_THUMBS_DOWN_THRESHOLD: recommendations.append({ 'caller': caller, 'action': 'review', 'reason': ( f'近 {days} 日 👎 反饋 {info["thumbs_down"]} 次 ' f'(avg {info["avg_score"]}/5) — 建議統帥檢視 prompt 或切換 model' ), }) continue # 規則 2:avg < 2.5 + 樣本足 → 建議檢視 if info['trend'] == 'negative' and info['total_feedback'] >= 5: recommendations.append({ 'caller': caller, 'action': 'review', 'reason': f'近 {days} 日 avg={info["avg_score"]}/5 過低 (n={info["total_feedback"]})', }) continue # 規則 3:👍 ≥ 10 + avg ≥ 4.5 → 建議升權 if (info['thumbs_up'] >= PROMOTE_THUMBS_UP_THRESHOLD and info['trend'] == 'positive'): recommendations.append({ 'caller': caller, 'action': 'promote', 'reason': ( f'近 {days} 日 👍 反饋 {info["thumbs_up"]} 次 ' f'(avg {info["avg_score"]}/5) — 可考慮關閉 Gemini fallback 純走 Ollama' ), }) return recommendations def should_demote_caller(caller: str, days: int = TREND_DAYS) -> bool: """判斷某 caller 是否應該降權(持續 👎)。 用於 ai_call_logger 啟動 / RAG worker 動態調整 — 但戰役預設不啟用。 """ trends = compute_caller_quality_trend(days=days) info = trends.get(caller) if not info or info['total_feedback'] < 5: return False return (info['thumbs_down'] >= DEMOTE_THUMBS_DOWN_THRESHOLD or info['avg_score'] < DEMOTE_AVG_SCORE_THRESHOLD) def render_quality_summary(trends: Dict[str, Dict[str, Any]]) -> str: """渲染反饋趨勢摘要(給 token 日報 / ROI 月報用)""" if not trends: return '(無反饋資料)' lines = [] sorted_callers = sorted(trends.items(), key=lambda kv: kv[1]['avg_score']) for caller, info in sorted_callers[:10]: # TOP 10 by lowest avg emoji = {'positive': '✅', 'negative': '⚠️', 'neutral': '➖', 'no_data': '❓'}[info['trend']] lines.append( f" {emoji} {caller}: avg {info['avg_score']:.2f}/5 " f"(👍{info['thumbs_up']} 👎{info['thumbs_down']} n={info['total_feedback']})" ) return '\n'.join(lines) __all__ = [ 'compute_caller_quality_trend', 'get_caller_recommendations', 'should_demote_caller', 'render_quality_summary', 'DEMOTE_THUMBS_DOWN_THRESHOLD', 'DEMOTE_AVG_SCORE_THRESHOLD', 'PROMOTE_THUMBS_UP_THRESHOLD', 'PROMOTE_AVG_SCORE_THRESHOLD', ]