diff --git a/services/feedback_quality_tracker.py b/services/feedback_quality_tracker.py
new file mode 100644
index 0000000..2d33b90
--- /dev/null
+++ b/services/feedback_quality_tracker.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+services/feedback_quality_tracker.py
+Operation Ollama-First v5.0 / Phase 25 — 反饋環深化(caller-level quality 趨勢)
+
+設計原則:
+- 純 SQL 統計,零 LLM 成本(不跑 LLM 評估反饋)
+- 從 rag_query_log.feedback_score (1-5) + learning_episodes.promotion_status
+ 推算每個 caller 的近 7/30 日反饋趨勢
+- 持續 👎 ≥ N 次 → 標記 caller 為「需檢視」(給 token 日報 + ROI 月報用)
+- 持續 👍 → 提升 caller 信任分
+- 不直接改 caller 行為(避免循環自動修正失控);只產出「建議」給統帥
+
+Public API:
+- compute_caller_quality_trend() — 取近 N 日各 caller 反饋摘要
+- get_caller_recommendations() — 給 token 日報 Section 6 用
+- should_demote_caller(caller) — 判斷是否該降權(持續 👎)
+"""
+
+from __future__ import annotations
+import logging
+from datetime import datetime, timedelta
+from typing import Dict, Any, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 反饋規則閾值(純 Python 常數,可調)
+# ─────────────────────────────────────────────────────────────────────────────
+DEMOTE_THUMBS_DOWN_THRESHOLD = 5 # 7 日內 👎 (score=1) ≥ 5 次 → 建議檢視
+DEMOTE_AVG_SCORE_THRESHOLD = 2.5 # 7 日 avg score < 2.5 → 建議檢視
+PROMOTE_THUMBS_UP_THRESHOLD = 10 # 7 日內 👍 (score=5) ≥ 10 次 → 信任分提升
+PROMOTE_AVG_SCORE_THRESHOLD = 4.5 # 7 日 avg score >= 4.5 → 信任分提升
+TREND_DAYS = 7 # 趨勢預設窗格
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Public API
+# ─────────────────────────────────────────────────────────────────────────────
+def compute_caller_quality_trend(
+ days: int = TREND_DAYS,
+) -> Dict[str, Dict[str, Any]]:
+ """取近 N 日各 caller 的反饋趨勢摘要。
+
+ Returns:
+ {
+ 'caller_name': {
+ 'total_feedback': N,
+ 'thumbs_up': X, # score = 5
+ 'thumbs_down': Y, # score = 1
+ 'avg_score': float,
+ 'trend': 'positive' | 'neutral' | 'negative' | 'no_data',
+ }
+ }
+ """
+ try:
+ from sqlalchemy import text as sa_text
+ from database.manager import get_session
+ except Exception as exc:
+ logger.warning('[FeedbackTracker] DB import failed: %s', exc)
+ return {}
+
+ since = datetime.now() - timedelta(days=days)
+ session = get_session()
+ try:
+ rows = session.execute(
+ sa_text("""
+ SELECT
+ caller,
+ COUNT(*) FILTER (WHERE feedback_score IS NOT NULL) AS total_feedback,
+ COUNT(*) FILTER (WHERE feedback_score = 5) AS thumbs_up,
+ COUNT(*) FILTER (WHERE feedback_score = 1) AS thumbs_down,
+ AVG(feedback_score) FILTER (WHERE feedback_score IS NOT NULL) AS avg_score
+ FROM rag_query_log
+ WHERE queried_at >= :since
+ GROUP BY caller
+ HAVING COUNT(*) FILTER (WHERE feedback_score IS NOT NULL) > 0
+ """),
+ {'since': since},
+ ).fetchall()
+ except Exception as exc:
+ logger.warning('[FeedbackTracker] SQL failed (rag_query_log 可能未建): %s', exc)
+ session.close()
+ return {}
+ finally:
+ session.close()
+
+ result: Dict[str, Dict[str, Any]] = {}
+ for r in rows:
+ caller = r[0]
+ total = int(r[1] or 0)
+ thumbs_up = int(r[2] or 0)
+ thumbs_down = int(r[3] or 0)
+ avg_score = float(r[4] or 0)
+
+ # 推估趨勢
+ if total < 3:
+ trend = 'no_data'
+ elif avg_score >= PROMOTE_AVG_SCORE_THRESHOLD:
+ trend = 'positive'
+ elif avg_score < DEMOTE_AVG_SCORE_THRESHOLD:
+ trend = 'negative'
+ else:
+ trend = 'neutral'
+
+ result[caller] = {
+ 'total_feedback': total,
+ 'thumbs_up': thumbs_up,
+ 'thumbs_down': thumbs_down,
+ 'avg_score': round(avg_score, 2),
+ 'trend': trend,
+ }
+ return result
+
+
+def get_caller_recommendations(days: int = TREND_DAYS) -> List[Dict[str, Any]]:
+ """給 token 日報 / ROI 月報 Section 6 用。
+
+ Returns:
+ [
+ {'caller': '...', 'action': 'review' | 'promote', 'reason': '...'},
+ ...
+ ]
+ """
+ trends = compute_caller_quality_trend(days=days)
+ recommendations = []
+
+ for caller, info in trends.items():
+ # 規則 1:連續 👎 ≥ 5 → 建議檢視
+ if info['thumbs_down'] >= DEMOTE_THUMBS_DOWN_THRESHOLD:
+ recommendations.append({
+ 'caller': caller,
+ 'action': 'review',
+ 'reason': (
+ f'近 {days} 日 👎 反饋 {info["thumbs_down"]} 次 '
+ f'(avg {info["avg_score"]}/5) — 建議統帥檢視 prompt 或切換 model'
+ ),
+ })
+ continue
+
+ # 規則 2:avg < 2.5 + 樣本足 → 建議檢視
+ if info['trend'] == 'negative' and info['total_feedback'] >= 5:
+ recommendations.append({
+ 'caller': caller,
+ 'action': 'review',
+ 'reason': f'近 {days} 日 avg={info["avg_score"]}/5 過低 (n={info["total_feedback"]})',
+ })
+ continue
+
+ # 規則 3:👍 ≥ 10 + avg ≥ 4.5 → 建議升權
+ if (info['thumbs_up'] >= PROMOTE_THUMBS_UP_THRESHOLD
+ and info['trend'] == 'positive'):
+ recommendations.append({
+ 'caller': caller,
+ 'action': 'promote',
+ 'reason': (
+ f'近 {days} 日 👍 反饋 {info["thumbs_up"]} 次 '
+ f'(avg {info["avg_score"]}/5) — 可考慮關閉 Gemini fallback 純走 Ollama'
+ ),
+ })
+
+ return recommendations
+
+
+def should_demote_caller(caller: str, days: int = TREND_DAYS) -> bool:
+ """判斷某 caller 是否應該降權(持續 👎)。
+
+ 用於 ai_call_logger 啟動 / RAG worker 動態調整 — 但戰役預設不啟用。
+ """
+ trends = compute_caller_quality_trend(days=days)
+ info = trends.get(caller)
+ if not info or info['total_feedback'] < 5:
+ return False
+ return (info['thumbs_down'] >= DEMOTE_THUMBS_DOWN_THRESHOLD
+ or info['avg_score'] < DEMOTE_AVG_SCORE_THRESHOLD)
+
+
+def render_quality_summary(trends: Dict[str, Dict[str, Any]]) -> str:
+ """渲染反饋趨勢摘要(給 token 日報 / ROI 月報用)"""
+ if not trends:
+ return '(無反饋資料)'
+
+ lines = []
+ sorted_callers = sorted(trends.items(), key=lambda kv: kv[1]['avg_score'])
+ for caller, info in sorted_callers[:10]: # TOP 10 by lowest avg
+ emoji = {'positive': '✅', 'negative': '⚠️', 'neutral': '➖', 'no_data': '❓'}[info['trend']]
+ lines.append(
+ f" {emoji} {caller}: avg {info['avg_score']:.2f}/5 "
+ f"(👍{info['thumbs_up']} 👎{info['thumbs_down']} n={info['total_feedback']})"
+ )
+ return '\n'.join(lines)
+
+
+__all__ = [
+ 'compute_caller_quality_trend',
+ 'get_caller_recommendations',
+ 'should_demote_caller',
+ 'render_quality_summary',
+ 'DEMOTE_THUMBS_DOWN_THRESHOLD',
+ 'DEMOTE_AVG_SCORE_THRESHOLD',
+ 'PROMOTE_THUMBS_UP_THRESHOLD',
+ 'PROMOTE_AVG_SCORE_THRESHOLD',
+]
diff --git a/services/roi_report_service.py b/services/roi_report_service.py
index 63be39a..b52c88a 100644
--- a/services/roi_report_service.py
+++ b/services/roi_report_service.py
@@ -155,6 +155,26 @@ def render_roi_report(stats: Dict[str, Any]) -> str:
if BASELINE['gemini_monthly_tokens'] else 0
)
+ # Phase 25 整合:caller-level feedback 趨勢
+ feedback_summary = ''
+ recommendations_block = ''
+ try:
+ from services.feedback_quality_tracker import (
+ compute_caller_quality_trend, get_caller_recommendations,
+ render_quality_summary,
+ )
+ trends = compute_caller_quality_trend(days=30) # 月報用 30 日窗格
+ if trends:
+ feedback_summary = '\n💬 Caller 反饋趨勢(30 日)\n' + render_quality_summary(trends)
+ recs = get_caller_recommendations(days=30)
+ if recs:
+ recommendations_block = '\n🔮 智能建議\n'
+ for r in recs[:3]: # 最多 3 條
+ action_emoji = '⚠️' if r['action'] == 'review' else '✅'
+ recommendations_block += f" {action_emoji} {r['caller']}: {r['reason']}\n"
+ except Exception:
+ pass # 反饋查詢失敗不影響月報主流程
+
return (
f"📊 ROI 月報 {period}\n"
f"━━━━━━━━━━━━━━━━━━━━\n"
@@ -177,6 +197,8 @@ def render_roi_report(stats: Dict[str, Any]) -> str:
f"🔧 MCP + Cache\n"
f" MCP 呼叫: {stats['mcp_total']:,}\n"
f" Cache 命中: {stats['cache_hit_calls']:,} ai_calls + {stats['mcp_cache_hits']:,} mcp_calls\n"
+ f"{feedback_summary}"
+ f"{recommendations_block}"
f"\n"
f"📈 戰役 v5.0 KPI\n"
f" Gemini -23.5% 目標:{'✅ 達標' if saved_pct >= 23 else f'⚠️ {saved_pct:.1f}%'}\n"
diff --git a/tests/test_feedback_quality_tracker.py b/tests/test_feedback_quality_tracker.py
new file mode 100644
index 0000000..e5838a2
--- /dev/null
+++ b/tests/test_feedback_quality_tracker.py
@@ -0,0 +1,166 @@
+"""
+tests/test_feedback_quality_tracker.py
+─────────────────────────────────────────────────────────────────
+Operation Ollama-First v5.0 / Phase 25 — 反饋環深化驗證
+"""
+
+from datetime import datetime, timedelta
+from unittest.mock import MagicMock
+
+import pytest
+
+
+def test_constants_defined():
+ """4 個閾值常數應存在"""
+ from services.feedback_quality_tracker import (
+ DEMOTE_THUMBS_DOWN_THRESHOLD, DEMOTE_AVG_SCORE_THRESHOLD,
+ PROMOTE_THUMBS_UP_THRESHOLD, PROMOTE_AVG_SCORE_THRESHOLD,
+ )
+ assert DEMOTE_THUMBS_DOWN_THRESHOLD == 5
+ assert DEMOTE_AVG_SCORE_THRESHOLD == 2.5
+ assert PROMOTE_THUMBS_UP_THRESHOLD == 10
+ assert PROMOTE_AVG_SCORE_THRESHOLD == 4.5
+
+
+def test_compute_trend_db_fail_returns_empty(monkeypatch):
+ """DB 異常應回 {} 不 raise"""
+ from services.feedback_quality_tracker import compute_caller_quality_trend
+
+ class _BrokenSession:
+ def execute(self, *a, **kw):
+ raise RuntimeError('rag_query_log not exist')
+ def close(self):
+ pass
+
+ monkeypatch.setattr('database.manager.get_session', lambda: _BrokenSession())
+ result = compute_caller_quality_trend(days=7)
+ assert result == {}
+
+
+def test_render_summary_empty():
+ from services.feedback_quality_tracker import render_quality_summary
+ assert '無反饋資料' in render_quality_summary({})
+
+
+def test_render_summary_with_trends():
+ from services.feedback_quality_tracker import render_quality_summary
+ trends = {
+ 'openclaw_qa': {
+ 'total_feedback': 20, 'thumbs_up': 15, 'thumbs_down': 2,
+ 'avg_score': 4.2, 'trend': 'neutral',
+ },
+ 'hermes_analyst': {
+ 'total_feedback': 8, 'thumbs_up': 1, 'thumbs_down': 6,
+ 'avg_score': 1.8, 'trend': 'negative',
+ },
+ }
+ out = render_quality_summary(trends)
+ assert 'openclaw_qa' in out
+ assert 'hermes_analyst' in out
+ # negative 排前面(avg_score 升序)
+ assert out.index('hermes_analyst') < out.index('openclaw_qa')
+ assert '⚠️' in out # negative emoji
+ assert '➖' in out # neutral emoji
+
+
+def test_get_recommendations_demote_on_thumbs_down(monkeypatch):
+ """👎 ≥ 5 → review 建議"""
+ from services.feedback_quality_tracker import get_caller_recommendations
+ import services.feedback_quality_tracker as fqt
+
+ monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: {
+ 'bad_caller': {
+ 'total_feedback': 8, 'thumbs_up': 1, 'thumbs_down': 6,
+ 'avg_score': 1.8, 'trend': 'negative',
+ },
+ })
+
+ recs = get_caller_recommendations(days=7)
+ assert len(recs) == 1
+ assert recs[0]['caller'] == 'bad_caller'
+ assert recs[0]['action'] == 'review'
+ assert '6' in recs[0]['reason'] # 👎 6 次
+
+
+def test_get_recommendations_promote_on_thumbs_up(monkeypatch):
+ """👍 ≥ 10 + avg ≥ 4.5 → promote 建議"""
+ from services.feedback_quality_tracker import get_caller_recommendations
+ import services.feedback_quality_tracker as fqt
+
+ monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: {
+ 'great_caller': {
+ 'total_feedback': 15, 'thumbs_up': 12, 'thumbs_down': 0,
+ 'avg_score': 4.8, 'trend': 'positive',
+ },
+ })
+
+ recs = get_caller_recommendations(days=7)
+ assert len(recs) == 1
+ assert recs[0]['action'] == 'promote'
+ assert '可考慮關閉 Gemini fallback' in recs[0]['reason']
+
+
+def test_get_recommendations_neutral_no_action(monkeypatch):
+ """中等樣本不該觸發任何建議"""
+ from services.feedback_quality_tracker import get_caller_recommendations
+ import services.feedback_quality_tracker as fqt
+
+ monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: {
+ 'avg_caller': {
+ 'total_feedback': 5, 'thumbs_up': 2, 'thumbs_down': 1,
+ 'avg_score': 3.5, 'trend': 'neutral',
+ },
+ })
+
+ recs = get_caller_recommendations(days=7)
+ assert recs == []
+
+
+def test_should_demote_caller_with_low_avg(monkeypatch):
+ from services.feedback_quality_tracker import should_demote_caller
+ import services.feedback_quality_tracker as fqt
+
+ monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: {
+ 'troubled_caller': {
+ 'total_feedback': 10, 'thumbs_up': 0, 'thumbs_down': 8,
+ 'avg_score': 1.5, 'trend': 'negative',
+ },
+ })
+
+ assert should_demote_caller('troubled_caller', days=7) is True
+ assert should_demote_caller('not_in_trends', days=7) is False
+
+
+def test_should_demote_caller_insufficient_feedback(monkeypatch):
+ """樣本 < 5 不該降權(避免少量負面誤判)"""
+ from services.feedback_quality_tracker import should_demote_caller
+ import services.feedback_quality_tracker as fqt
+
+ monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: {
+ 'new_caller': {
+ 'total_feedback': 3, 'thumbs_up': 0, 'thumbs_down': 2,
+ 'avg_score': 1.5, 'trend': 'negative',
+ },
+ })
+
+ assert should_demote_caller('new_caller', days=7) is False
+
+
+def test_compute_trend_classifies_correctly(monkeypatch):
+ """模擬 SQL 結果驗證 trend 分類"""
+ from services.feedback_quality_tracker import compute_caller_quality_trend
+
+ fake_session = MagicMock()
+ fake_session.execute.return_value.fetchall.return_value = [
+ ('caller_positive', 12, 10, 0, 4.8),
+ ('caller_negative', 8, 0, 6, 1.5),
+ ('caller_neutral', 6, 3, 2, 3.2),
+ ('caller_no_data', 2, 1, 0, 4.0),
+ ]
+ monkeypatch.setattr('database.manager.get_session', lambda: fake_session)
+
+ trends = compute_caller_quality_trend(days=7)
+ assert trends['caller_positive']['trend'] == 'positive'
+ assert trends['caller_negative']['trend'] == 'negative'
+ assert trends['caller_neutral']['trend'] == 'neutral'
+ assert trends['caller_no_data']['trend'] == 'no_data' # n < 3