All checks were successful
CD Pipeline / deploy (push) Successful in 3m2s
Operation Ollama-First v5.0 / Phase 25 — 反饋環自主學習深化 services/feedback_quality_tracker.py (180+ 行) - 純 SQL 統計,零 LLM 成本 - 4 個閾值常數(demote 👎×5/avg<2.5 / promote 👍×10/avg>=4.5) - compute_caller_quality_trend(days=7) — 取近 N 日各 caller 反饋 - get_caller_recommendations() — 給 token 日報/ROI 月報用 • 規則 1: 👎 ≥ 5 次 → review • 規則 2: avg < 2.5 + 樣本足 → review • 規則 3: 👍 ≥ 10 + avg ≥ 4.5 → promote(建議關閉 Gemini fallback) - should_demote_caller(caller) — 自動降權判斷(戰役預設不啟用) - render_quality_summary() — 給訊息用 emoji 摘要 ROI 月報整合(services/roi_report_service.py): - 加 Section 「💬 Caller 反饋趨勢(30 日)」TOP 10 by 最低 avg - 加 Section 「🔮 智能建議」最多 3 條(review / promote) - 失敗 swallow 不影響月報主流程 訊息範例: 💬 Caller 反饋趨勢(30 日) ⚠️ openclaw_qa: avg 1.85/5 (👍2 👎8 n=12) ➖ hermes_analyst: avg 3.10/5 (👍5 👎3 n=10) ✅ ppt_gemini: avg 4.75/5 (👍12 👎0 n=15) 🔮 智能建議 ⚠️ openclaw_qa: 近 30 日 👎 反饋 8 次 (avg 1.85/5) — 建議統帥檢視 prompt 或切換 model ✅ ppt_gemini: 近 30 日 👍 反饋 12 次 — 可考慮關閉 Gemini fallback 純走 Ollama tests/test_feedback_quality_tracker.py (10 tests 全綠) - 4 閾值常數 / DB fail 安全 / 空 trends 容錯 - demote 規則(👎 多次)/ promote 規則(👍 多次)/ neutral 不觸發 - should_demote_caller 樣本不足保護 - trend 分類(positive/negative/neutral/no_data)正確 依 ADR-032 RAG 自主學習迴圈 + ADR-033 護欄 #1 不直接改 caller 行為(避循環自動修正失控),只產出建議給統帥審視。 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
206 lines
8.0 KiB
Python
206 lines
8.0 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
services/feedback_quality_tracker.py
|
||
Operation Ollama-First v5.0 / Phase 25 — 反饋環深化(caller-level quality 趨勢)
|
||
|
||
設計原則:
|
||
- 純 SQL 統計,零 LLM 成本(不跑 LLM 評估反饋)
|
||
- 從 rag_query_log.feedback_score (1-5) + learning_episodes.promotion_status
|
||
推算每個 caller 的近 7/30 日反饋趨勢
|
||
- 持續 👎 ≥ N 次 → 標記 caller 為「需檢視」(給 token 日報 + ROI 月報用)
|
||
- 持續 👍 → 提升 caller 信任分
|
||
- 不直接改 caller 行為(避免循環自動修正失控);只產出「建議」給統帥
|
||
|
||
Public API:
|
||
- compute_caller_quality_trend() — 取近 N 日各 caller 反饋摘要
|
||
- get_caller_recommendations() — 給 token 日報 Section 6 用
|
||
- should_demote_caller(caller) — 判斷是否該降權(持續 👎)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
import logging
|
||
from datetime import datetime, timedelta
|
||
from typing import Dict, Any, List, Optional
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# 反饋規則閾值(純 Python 常數,可調)
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
DEMOTE_THUMBS_DOWN_THRESHOLD = 5 # 7 日內 👎 (score=1) ≥ 5 次 → 建議檢視
|
||
DEMOTE_AVG_SCORE_THRESHOLD = 2.5 # 7 日 avg score < 2.5 → 建議檢視
|
||
PROMOTE_THUMBS_UP_THRESHOLD = 10 # 7 日內 👍 (score=5) ≥ 10 次 → 信任分提升
|
||
PROMOTE_AVG_SCORE_THRESHOLD = 4.5 # 7 日 avg score >= 4.5 → 信任分提升
|
||
TREND_DAYS = 7 # 趨勢預設窗格
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Public API
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
def compute_caller_quality_trend(
|
||
days: int = TREND_DAYS,
|
||
) -> Dict[str, Dict[str, Any]]:
|
||
"""取近 N 日各 caller 的反饋趨勢摘要。
|
||
|
||
Returns:
|
||
{
|
||
'caller_name': {
|
||
'total_feedback': N,
|
||
'thumbs_up': X, # score = 5
|
||
'thumbs_down': Y, # score = 1
|
||
'avg_score': float,
|
||
'trend': 'positive' | 'neutral' | 'negative' | 'no_data',
|
||
}
|
||
}
|
||
"""
|
||
try:
|
||
from sqlalchemy import text as sa_text
|
||
from database.manager import get_session
|
||
except Exception as exc:
|
||
logger.warning('[FeedbackTracker] DB import failed: %s', exc)
|
||
return {}
|
||
|
||
since = datetime.now() - timedelta(days=days)
|
||
session = get_session()
|
||
try:
|
||
rows = session.execute(
|
||
sa_text("""
|
||
SELECT
|
||
caller,
|
||
COUNT(*) FILTER (WHERE feedback_score IS NOT NULL) AS total_feedback,
|
||
COUNT(*) FILTER (WHERE feedback_score = 5) AS thumbs_up,
|
||
COUNT(*) FILTER (WHERE feedback_score = 1) AS thumbs_down,
|
||
AVG(feedback_score) FILTER (WHERE feedback_score IS NOT NULL) AS avg_score
|
||
FROM rag_query_log
|
||
WHERE queried_at >= :since
|
||
GROUP BY caller
|
||
HAVING COUNT(*) FILTER (WHERE feedback_score IS NOT NULL) > 0
|
||
"""),
|
||
{'since': since},
|
||
).fetchall()
|
||
except Exception as exc:
|
||
logger.warning('[FeedbackTracker] SQL failed (rag_query_log 可能未建): %s', exc)
|
||
session.close()
|
||
return {}
|
||
finally:
|
||
session.close()
|
||
|
||
result: Dict[str, Dict[str, Any]] = {}
|
||
for r in rows:
|
||
caller = r[0]
|
||
total = int(r[1] or 0)
|
||
thumbs_up = int(r[2] or 0)
|
||
thumbs_down = int(r[3] or 0)
|
||
avg_score = float(r[4] or 0)
|
||
|
||
# 推估趨勢
|
||
if total < 3:
|
||
trend = 'no_data'
|
||
elif avg_score >= PROMOTE_AVG_SCORE_THRESHOLD:
|
||
trend = 'positive'
|
||
elif avg_score < DEMOTE_AVG_SCORE_THRESHOLD:
|
||
trend = 'negative'
|
||
else:
|
||
trend = 'neutral'
|
||
|
||
result[caller] = {
|
||
'total_feedback': total,
|
||
'thumbs_up': thumbs_up,
|
||
'thumbs_down': thumbs_down,
|
||
'avg_score': round(avg_score, 2),
|
||
'trend': trend,
|
||
}
|
||
return result
|
||
|
||
|
||
def get_caller_recommendations(days: int = TREND_DAYS) -> List[Dict[str, Any]]:
|
||
"""給 token 日報 / ROI 月報 Section 6 用。
|
||
|
||
Returns:
|
||
[
|
||
{'caller': '...', 'action': 'review' | 'promote', 'reason': '...'},
|
||
...
|
||
]
|
||
"""
|
||
trends = compute_caller_quality_trend(days=days)
|
||
recommendations = []
|
||
|
||
for caller, info in trends.items():
|
||
# 規則 1:連續 👎 ≥ 5 → 建議檢視
|
||
if info['thumbs_down'] >= DEMOTE_THUMBS_DOWN_THRESHOLD:
|
||
recommendations.append({
|
||
'caller': caller,
|
||
'action': 'review',
|
||
'reason': (
|
||
f'近 {days} 日 👎 反饋 {info["thumbs_down"]} 次 '
|
||
f'(avg {info["avg_score"]}/5) — 建議統帥檢視 prompt 或切換 model'
|
||
),
|
||
})
|
||
continue
|
||
|
||
# 規則 2:avg < 2.5 + 樣本足 → 建議檢視
|
||
if info['trend'] == 'negative' and info['total_feedback'] >= 5:
|
||
recommendations.append({
|
||
'caller': caller,
|
||
'action': 'review',
|
||
'reason': f'近 {days} 日 avg={info["avg_score"]}/5 過低 (n={info["total_feedback"]})',
|
||
})
|
||
continue
|
||
|
||
# 規則 3:👍 ≥ 10 + avg ≥ 4.5 → 建議升權
|
||
if (info['thumbs_up'] >= PROMOTE_THUMBS_UP_THRESHOLD
|
||
and info['trend'] == 'positive'):
|
||
recommendations.append({
|
||
'caller': caller,
|
||
'action': 'promote',
|
||
'reason': (
|
||
f'近 {days} 日 👍 反饋 {info["thumbs_up"]} 次 '
|
||
f'(avg {info["avg_score"]}/5) — 可考慮關閉 Gemini fallback 純走 Ollama'
|
||
),
|
||
})
|
||
|
||
return recommendations
|
||
|
||
|
||
def should_demote_caller(caller: str, days: int = TREND_DAYS) -> bool:
|
||
"""判斷某 caller 是否應該降權(持續 👎)。
|
||
|
||
用於 ai_call_logger 啟動 / RAG worker 動態調整 — 但戰役預設不啟用。
|
||
"""
|
||
trends = compute_caller_quality_trend(days=days)
|
||
info = trends.get(caller)
|
||
if not info or info['total_feedback'] < 5:
|
||
return False
|
||
return (info['thumbs_down'] >= DEMOTE_THUMBS_DOWN_THRESHOLD
|
||
or info['avg_score'] < DEMOTE_AVG_SCORE_THRESHOLD)
|
||
|
||
|
||
def render_quality_summary(trends: Dict[str, Dict[str, Any]]) -> str:
|
||
"""渲染反饋趨勢摘要(給 token 日報 / ROI 月報用)"""
|
||
if not trends:
|
||
return '(無反饋資料)'
|
||
|
||
lines = []
|
||
sorted_callers = sorted(trends.items(), key=lambda kv: kv[1]['avg_score'])
|
||
for caller, info in sorted_callers[:10]: # TOP 10 by lowest avg
|
||
emoji = {'positive': '✅', 'negative': '⚠️', 'neutral': '➖', 'no_data': '❓'}[info['trend']]
|
||
lines.append(
|
||
f" {emoji} {caller}: avg {info['avg_score']:.2f}/5 "
|
||
f"(👍{info['thumbs_up']} 👎{info['thumbs_down']} n={info['total_feedback']})"
|
||
)
|
||
return '\n'.join(lines)
|
||
|
||
|
||
__all__ = [
|
||
'compute_caller_quality_trend',
|
||
'get_caller_recommendations',
|
||
'should_demote_caller',
|
||
'render_quality_summary',
|
||
'DEMOTE_THUMBS_DOWN_THRESHOLD',
|
||
'DEMOTE_AVG_SCORE_THRESHOLD',
|
||
'PROMOTE_THUMBS_UP_THRESHOLD',
|
||
'PROMOTE_AVG_SCORE_THRESHOLD',
|
||
]
|