All checks were successful
CD Pipeline / deploy (push) Successful in 3m2s
Operation Ollama-First v5.0 / Phase 25 — 反饋環自主學習深化 services/feedback_quality_tracker.py (180+ 行) - 純 SQL 統計,零 LLM 成本 - 4 個閾值常數(demote 👎×5/avg<2.5 / promote 👍×10/avg>=4.5) - compute_caller_quality_trend(days=7) — 取近 N 日各 caller 反饋 - get_caller_recommendations() — 給 token 日報/ROI 月報用 • 規則 1: 👎 ≥ 5 次 → review • 規則 2: avg < 2.5 + 樣本足 → review • 規則 3: 👍 ≥ 10 + avg ≥ 4.5 → promote(建議關閉 Gemini fallback) - should_demote_caller(caller) — 自動降權判斷(戰役預設不啟用) - render_quality_summary() — 給訊息用 emoji 摘要 ROI 月報整合(services/roi_report_service.py): - 加 Section 「💬 Caller 反饋趨勢(30 日)」TOP 10 by 最低 avg - 加 Section 「🔮 智能建議」最多 3 條(review / promote) - 失敗 swallow 不影響月報主流程 訊息範例: 💬 Caller 反饋趨勢(30 日) ⚠️ openclaw_qa: avg 1.85/5 (👍2 👎8 n=12) ➖ hermes_analyst: avg 3.10/5 (👍5 👎3 n=10) ✅ ppt_gemini: avg 4.75/5 (👍12 👎0 n=15) 🔮 智能建議 ⚠️ openclaw_qa: 近 30 日 👎 反饋 8 次 (avg 1.85/5) — 建議統帥檢視 prompt 或切換 model ✅ ppt_gemini: 近 30 日 👍 反饋 12 次 — 可考慮關閉 Gemini fallback 純走 Ollama tests/test_feedback_quality_tracker.py (10 tests 全綠) - 4 閾值常數 / DB fail 安全 / 空 trends 容錯 - demote 規則(👎 多次)/ promote 規則(👍 多次)/ neutral 不觸發 - should_demote_caller 樣本不足保護 - trend 分類(positive/negative/neutral/no_data)正確 依 ADR-032 RAG 自主學習迴圈 + ADR-033 護欄 #1 不直接改 caller 行為(避循環自動修正失控),只產出建議給統帥審視。 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
167 lines
6.0 KiB
Python
167 lines
6.0 KiB
Python
"""
|
||
tests/test_feedback_quality_tracker.py
|
||
─────────────────────────────────────────────────────────────────
|
||
Operation Ollama-First v5.0 / Phase 25 — 反饋環深化驗證
|
||
"""
|
||
|
||
from datetime import datetime, timedelta
|
||
from unittest.mock import MagicMock
|
||
|
||
import pytest
|
||
|
||
|
||
def test_constants_defined():
|
||
"""4 個閾值常數應存在"""
|
||
from services.feedback_quality_tracker import (
|
||
DEMOTE_THUMBS_DOWN_THRESHOLD, DEMOTE_AVG_SCORE_THRESHOLD,
|
||
PROMOTE_THUMBS_UP_THRESHOLD, PROMOTE_AVG_SCORE_THRESHOLD,
|
||
)
|
||
assert DEMOTE_THUMBS_DOWN_THRESHOLD == 5
|
||
assert DEMOTE_AVG_SCORE_THRESHOLD == 2.5
|
||
assert PROMOTE_THUMBS_UP_THRESHOLD == 10
|
||
assert PROMOTE_AVG_SCORE_THRESHOLD == 4.5
|
||
|
||
|
||
def test_compute_trend_db_fail_returns_empty(monkeypatch):
|
||
"""DB 異常應回 {} 不 raise"""
|
||
from services.feedback_quality_tracker import compute_caller_quality_trend
|
||
|
||
class _BrokenSession:
|
||
def execute(self, *a, **kw):
|
||
raise RuntimeError('rag_query_log not exist')
|
||
def close(self):
|
||
pass
|
||
|
||
monkeypatch.setattr('database.manager.get_session', lambda: _BrokenSession())
|
||
result = compute_caller_quality_trend(days=7)
|
||
assert result == {}
|
||
|
||
|
||
def test_render_summary_empty():
|
||
from services.feedback_quality_tracker import render_quality_summary
|
||
assert '無反饋資料' in render_quality_summary({})
|
||
|
||
|
||
def test_render_summary_with_trends():
|
||
from services.feedback_quality_tracker import render_quality_summary
|
||
trends = {
|
||
'openclaw_qa': {
|
||
'total_feedback': 20, 'thumbs_up': 15, 'thumbs_down': 2,
|
||
'avg_score': 4.2, 'trend': 'neutral',
|
||
},
|
||
'hermes_analyst': {
|
||
'total_feedback': 8, 'thumbs_up': 1, 'thumbs_down': 6,
|
||
'avg_score': 1.8, 'trend': 'negative',
|
||
},
|
||
}
|
||
out = render_quality_summary(trends)
|
||
assert 'openclaw_qa' in out
|
||
assert 'hermes_analyst' in out
|
||
# negative 排前面(avg_score 升序)
|
||
assert out.index('hermes_analyst') < out.index('openclaw_qa')
|
||
assert '⚠️' in out # negative emoji
|
||
assert '➖' in out # neutral emoji
|
||
|
||
|
||
def test_get_recommendations_demote_on_thumbs_down(monkeypatch):
|
||
"""👎 ≥ 5 → review 建議"""
|
||
from services.feedback_quality_tracker import get_caller_recommendations
|
||
import services.feedback_quality_tracker as fqt
|
||
|
||
monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: {
|
||
'bad_caller': {
|
||
'total_feedback': 8, 'thumbs_up': 1, 'thumbs_down': 6,
|
||
'avg_score': 1.8, 'trend': 'negative',
|
||
},
|
||
})
|
||
|
||
recs = get_caller_recommendations(days=7)
|
||
assert len(recs) == 1
|
||
assert recs[0]['caller'] == 'bad_caller'
|
||
assert recs[0]['action'] == 'review'
|
||
assert '6' in recs[0]['reason'] # 👎 6 次
|
||
|
||
|
||
def test_get_recommendations_promote_on_thumbs_up(monkeypatch):
|
||
"""👍 ≥ 10 + avg ≥ 4.5 → promote 建議"""
|
||
from services.feedback_quality_tracker import get_caller_recommendations
|
||
import services.feedback_quality_tracker as fqt
|
||
|
||
monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: {
|
||
'great_caller': {
|
||
'total_feedback': 15, 'thumbs_up': 12, 'thumbs_down': 0,
|
||
'avg_score': 4.8, 'trend': 'positive',
|
||
},
|
||
})
|
||
|
||
recs = get_caller_recommendations(days=7)
|
||
assert len(recs) == 1
|
||
assert recs[0]['action'] == 'promote'
|
||
assert '可考慮關閉 Gemini fallback' in recs[0]['reason']
|
||
|
||
|
||
def test_get_recommendations_neutral_no_action(monkeypatch):
|
||
"""中等樣本不該觸發任何建議"""
|
||
from services.feedback_quality_tracker import get_caller_recommendations
|
||
import services.feedback_quality_tracker as fqt
|
||
|
||
monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: {
|
||
'avg_caller': {
|
||
'total_feedback': 5, 'thumbs_up': 2, 'thumbs_down': 1,
|
||
'avg_score': 3.5, 'trend': 'neutral',
|
||
},
|
||
})
|
||
|
||
recs = get_caller_recommendations(days=7)
|
||
assert recs == []
|
||
|
||
|
||
def test_should_demote_caller_with_low_avg(monkeypatch):
|
||
from services.feedback_quality_tracker import should_demote_caller
|
||
import services.feedback_quality_tracker as fqt
|
||
|
||
monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: {
|
||
'troubled_caller': {
|
||
'total_feedback': 10, 'thumbs_up': 0, 'thumbs_down': 8,
|
||
'avg_score': 1.5, 'trend': 'negative',
|
||
},
|
||
})
|
||
|
||
assert should_demote_caller('troubled_caller', days=7) is True
|
||
assert should_demote_caller('not_in_trends', days=7) is False
|
||
|
||
|
||
def test_should_demote_caller_insufficient_feedback(monkeypatch):
|
||
"""樣本 < 5 不該降權(避免少量負面誤判)"""
|
||
from services.feedback_quality_tracker import should_demote_caller
|
||
import services.feedback_quality_tracker as fqt
|
||
|
||
monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: {
|
||
'new_caller': {
|
||
'total_feedback': 3, 'thumbs_up': 0, 'thumbs_down': 2,
|
||
'avg_score': 1.5, 'trend': 'negative',
|
||
},
|
||
})
|
||
|
||
assert should_demote_caller('new_caller', days=7) is False
|
||
|
||
|
||
def test_compute_trend_classifies_correctly(monkeypatch):
|
||
"""模擬 SQL 結果驗證 trend 分類"""
|
||
from services.feedback_quality_tracker import compute_caller_quality_trend
|
||
|
||
fake_session = MagicMock()
|
||
fake_session.execute.return_value.fetchall.return_value = [
|
||
('caller_positive', 12, 10, 0, 4.8),
|
||
('caller_negative', 8, 0, 6, 1.5),
|
||
('caller_neutral', 6, 3, 2, 3.2),
|
||
('caller_no_data', 2, 1, 0, 4.0),
|
||
]
|
||
monkeypatch.setattr('database.manager.get_session', lambda: fake_session)
|
||
|
||
trends = compute_caller_quality_trend(days=7)
|
||
assert trends['caller_positive']['trend'] == 'positive'
|
||
assert trends['caller_negative']['trend'] == 'negative'
|
||
assert trends['caller_neutral']['trend'] == 'neutral'
|
||
assert trends['caller_no_data']['trend'] == 'no_data' # n < 3
|