""" tests/test_feedback_quality_tracker.py ───────────────────────────────────────────────────────────────── Operation Ollama-First v5.0 / Phase 25 — 反饋環深化驗證 """ from datetime import datetime, timedelta from unittest.mock import MagicMock import pytest def test_constants_defined(): """4 個閾值常數應存在""" from services.feedback_quality_tracker import ( DEMOTE_THUMBS_DOWN_THRESHOLD, DEMOTE_AVG_SCORE_THRESHOLD, PROMOTE_THUMBS_UP_THRESHOLD, PROMOTE_AVG_SCORE_THRESHOLD, ) assert DEMOTE_THUMBS_DOWN_THRESHOLD == 5 assert DEMOTE_AVG_SCORE_THRESHOLD == 2.5 assert PROMOTE_THUMBS_UP_THRESHOLD == 10 assert PROMOTE_AVG_SCORE_THRESHOLD == 4.5 def test_compute_trend_db_fail_returns_empty(monkeypatch): """DB 異常應回 {} 不 raise""" from services.feedback_quality_tracker import compute_caller_quality_trend class _BrokenSession: def execute(self, *a, **kw): raise RuntimeError('rag_query_log not exist') def close(self): pass monkeypatch.setattr('database.manager.get_session', lambda: _BrokenSession()) result = compute_caller_quality_trend(days=7) assert result == {} def test_render_summary_empty(): from services.feedback_quality_tracker import render_quality_summary assert '無反饋資料' in render_quality_summary({}) def test_render_summary_with_trends(): from services.feedback_quality_tracker import render_quality_summary trends = { 'openclaw_qa': { 'total_feedback': 20, 'thumbs_up': 15, 'thumbs_down': 2, 'avg_score': 4.2, 'trend': 'neutral', }, 'hermes_analyst': { 'total_feedback': 8, 'thumbs_up': 1, 'thumbs_down': 6, 'avg_score': 1.8, 'trend': 'negative', }, } out = render_quality_summary(trends) assert 'openclaw_qa' in out assert 'hermes_analyst' in out # negative 排前面(avg_score 升序) assert out.index('hermes_analyst') < out.index('openclaw_qa') assert '⚠️' in out # negative emoji assert '➖' in out # neutral emoji def test_get_recommendations_demote_on_thumbs_down(monkeypatch): """👎 ≥ 5 → review 建議""" from services.feedback_quality_tracker import get_caller_recommendations import services.feedback_quality_tracker as fqt monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: { 'bad_caller': { 'total_feedback': 8, 'thumbs_up': 1, 'thumbs_down': 6, 'avg_score': 1.8, 'trend': 'negative', }, }) recs = get_caller_recommendations(days=7) assert len(recs) == 1 assert recs[0]['caller'] == 'bad_caller' assert recs[0]['action'] == 'review' assert '6' in recs[0]['reason'] # 👎 6 次 def test_get_recommendations_promote_on_thumbs_up(monkeypatch): """👍 ≥ 10 + avg ≥ 4.5 → promote 建議""" from services.feedback_quality_tracker import get_caller_recommendations import services.feedback_quality_tracker as fqt monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: { 'great_caller': { 'total_feedback': 15, 'thumbs_up': 12, 'thumbs_down': 0, 'avg_score': 4.8, 'trend': 'positive', }, }) recs = get_caller_recommendations(days=7) assert len(recs) == 1 assert recs[0]['action'] == 'promote' assert '可考慮關閉 Gemini fallback' in recs[0]['reason'] def test_get_recommendations_neutral_no_action(monkeypatch): """中等樣本不該觸發任何建議""" from services.feedback_quality_tracker import get_caller_recommendations import services.feedback_quality_tracker as fqt monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: { 'avg_caller': { 'total_feedback': 5, 'thumbs_up': 2, 'thumbs_down': 1, 'avg_score': 3.5, 'trend': 'neutral', }, }) recs = get_caller_recommendations(days=7) assert recs == [] def test_should_demote_caller_with_low_avg(monkeypatch): from services.feedback_quality_tracker import should_demote_caller import services.feedback_quality_tracker as fqt monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: { 'troubled_caller': { 'total_feedback': 10, 'thumbs_up': 0, 'thumbs_down': 8, 'avg_score': 1.5, 'trend': 'negative', }, }) assert should_demote_caller('troubled_caller', days=7) is True assert should_demote_caller('not_in_trends', days=7) is False def test_should_demote_caller_insufficient_feedback(monkeypatch): """樣本 < 5 不該降權(避免少量負面誤判)""" from services.feedback_quality_tracker import should_demote_caller import services.feedback_quality_tracker as fqt monkeypatch.setattr(fqt, 'compute_caller_quality_trend', lambda days: { 'new_caller': { 'total_feedback': 3, 'thumbs_up': 0, 'thumbs_down': 2, 'avg_score': 1.5, 'trend': 'negative', }, }) assert should_demote_caller('new_caller', days=7) is False def test_compute_trend_classifies_correctly(monkeypatch): """模擬 SQL 結果驗證 trend 分類""" from services.feedback_quality_tracker import compute_caller_quality_trend fake_session = MagicMock() fake_session.execute.return_value.fetchall.return_value = [ ('caller_positive', 12, 10, 0, 4.8), ('caller_negative', 8, 0, 6, 1.5), ('caller_neutral', 6, 3, 2, 3.2), ('caller_no_data', 2, 1, 0, 4.0), ] monkeypatch.setattr('database.manager.get_session', lambda: fake_session) trends = compute_caller_quality_trend(days=7) assert trends['caller_positive']['trend'] == 'positive' assert trends['caller_negative']['trend'] == 'negative' assert trends['caller_neutral']['trend'] == 'neutral' assert trends['caller_no_data']['trend'] == 'no_data' # n < 3