ewoooc/tests/test_learning_pipeline.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
tests/test_learning_pipeline.py
Operation Ollama-First v5.0 / Phase 11 — Distiller + LearningPipeline 單元測試

涵蓋：
  - Distiller 各 quality_score 規則（mcp / llm_response / user_feedback / manual_curated）
  - LearningPipeline.enqueue() DB 寫入路徑
  - expire_stale_reviews() 24h 自動降級
  - hash_human_approver() PII 保護
"""

from __future__ import annotations

import json
from unittest.mock import MagicMock, patch

import pytest


# ─────────────────────────────────────────────────────────────────────────────
# Distiller 各規則
# ─────────────────────────────────────────────────────────────────────────────
class TestDistillerMcpResult:
    def test_long_with_keywords_high_quality(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        text = "本週業績分析顯示，建議聚焦保濕品類。" + "詳細說明 " * 80  # > 200 字
        result = d.distill(episode_type='mcp_result', raw_content=text)
        assert result is not None
        assert result.quality_score == 0.8
        assert result.episode_type == 'mcp_result'

    def test_long_no_keywords_medium_quality(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        text = "啦啦啦" * 100  # > 200 字但無關鍵字
        result = d.distill(episode_type='mcp_result', raw_content=text)
        assert result.quality_score == 0.65

    def test_short_low_quality(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        text = "短內容"
        result = d.distill(episode_type='mcp_result', raw_content=text)
        assert result.quality_score == 0.5

    def test_empty_returns_none(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        assert d.distill(episode_type='mcp_result', raw_content='') is None
        assert d.distill(episode_type='mcp_result', raw_content='   ') is None


class TestDistillerLlmResponse:
    def test_json_structured_high_quality(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        text = json.dumps({"status": "ok", "summary": "本週重點"})
        result = d.distill(episode_type='llm_response', raw_content=text)
        assert result.quality_score == 0.9

    def test_json_array_non_empty_high(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        text = json.dumps([{"sku": "A001", "risk": "HIGH"}])
        result = d.distill(episode_type='llm_response', raw_content=text)
        assert result.quality_score == 0.9

    def test_json_dict_no_status_lower(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        text = json.dumps({"some_field": "value"})
        result = d.distill(episode_type='llm_response', raw_content=text)
        # dict 非空 → 0.9 (status_ok 條件含 "len(obj)>0")
        assert result.quality_score == 0.9

    def test_free_text_long_with_numbers(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        text = "本週業績漲了 15.3%。" + "詳細說明 " * 100  # > 500 字 + 數字
        result = d.distill(episode_type='llm_response', raw_content=text)
        assert result.quality_score == 0.65

    def test_free_text_long_no_numbers(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        text = "本週業績趨勢上升。" + "詳細說明 " * 100  # > 500 字無數字
        result = d.distill(episode_type='llm_response', raw_content=text)
        assert result.quality_score == 0.55

    def test_free_text_short_below_quality_gate(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        text = "本週業績有變化"  # 短文本
        result = d.distill(episode_type='llm_response', raw_content=text)
        # 0.4 → Stage 1 會 reject
        assert result.quality_score == 0.4


class TestDistillerUserFeedback:
    def test_score_5_high_quality(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        result = d.distill(
            episode_type='user_feedback',
            raw_content='這個建議幫我增加了 8% 銷量',
            user_feedback_score=5,
        )
        assert result.quality_score == 1.0
        assert result.weight == 0.9  # 高權重 → Stage 4 人工驗收

    def test_score_1_negative_sample(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        result = d.distill(
            episode_type='user_feedback',
            raw_content='完全沒幫助',
            user_feedback_score=1,
        )
        assert result.quality_score == 0.0  # Stage 1 reject

    def test_default_score_3_mid(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        result = d.distill(
            episode_type='user_feedback',
            raw_content='普通',
            user_feedback_score=None,
        )
        # 預設 3 → (3-1)/4 = 0.5
        assert result.quality_score == 0.5


class TestDistillerManualCurated:
    def test_max_quality_and_weight(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        result = d.distill(episode_type='manual_curated', raw_content='手動入庫')
        assert result.quality_score == 1.0
        assert result.weight == 1.0


class TestDistillerInvalidType:
    def test_unknown_type_returns_none(self):
        from services.learning_pipeline import Distiller
        d = Distiller()
        result = d.distill(episode_type='garbage', raw_content='whatever')
        assert result is None


class TestDistillerLengthGuard:
    def test_distilled_text_truncated_to_16kb(self):
        from services.learning_pipeline import Distiller, DISTILLED_TEXT_MAX_BYTES
        d = Distiller()
        text = '建議分析 ' * 5000  # 遠超 16KB
        result = d.distill(episode_type='mcp_result', raw_content=text)
        encoded = result.distilled_text.encode('utf-8')
        assert len(encoded) <= DISTILLED_TEXT_MAX_BYTES


# ─────────────────────────────────────────────────────────────────────────────
# LearningPipeline.enqueue
# ─────────────────────────────────────────────────────────────────────────────
class TestLearningPipelineEnqueue:
    def test_enqueue_returns_id_on_success(self, monkeypatch):
        from services.learning_pipeline import learning_pipeline

        fake_session = MagicMock()
        fake_row = MagicMock()
        fake_row.__getitem__.return_value = 42
        fake_session.execute.return_value.fetchone.return_value = fake_row
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        new_id = learning_pipeline.enqueue(
            episode_type='manual_curated',
            raw_content='手動入庫測試內容',
        )
        assert new_id == 42
        # Phase 11.5: enqueue 後 _enqueue_embedding 也用同一個 fake session → commit 2 次
        # （1: episode INSERT, 2: embedding_retry_queue INSERT）
        # 失敗安全：_enqueue_embedding 失敗會 swallow 不影響 episode_id 回傳
        assert fake_session.commit.call_count >= 1

    def test_enqueue_returns_none_when_distill_fails(self):
        from services.learning_pipeline import learning_pipeline
        # 空內容 → distill 回 None → enqueue 回 None
        result = learning_pipeline.enqueue(
            episode_type='mcp_result',
            raw_content='',
        )
        assert result is None

    def test_enqueue_db_failure_returns_none(self, monkeypatch):
        from services.learning_pipeline import learning_pipeline

        fake_session = MagicMock()
        fake_session.execute.side_effect = RuntimeError("db down")
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        result = learning_pipeline.enqueue(
            episode_type='manual_curated',
            raw_content='測試內容',
        )
        assert result is None


# ─────────────────────────────────────────────────────────────────────────────
# expire_stale_reviews
# ─────────────────────────────────────────────────────────────────────────────
class TestExpireStaleReviews:
    def test_expire_uses_correct_sql(self, monkeypatch):
        from services.learning_pipeline import expire_stale_reviews

        fake_session = MagicMock()
        fake_result = MagicMock()
        fake_result.rowcount = 3
        fake_session.execute.return_value = fake_result
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        count = expire_stale_reviews(hours=24)
        assert count == 3
        # 確認 commit 跑了
        fake_session.commit.assert_called_once()

    def test_expire_db_failure_returns_zero(self, monkeypatch):
        from services.learning_pipeline import expire_stale_reviews

        fake_session = MagicMock()
        fake_session.execute.side_effect = RuntimeError("db down")
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        count = expire_stale_reviews(hours=24)
        assert count == 0


class TestAwaitingReviewPush:
    def test_push_uses_chat_ids_keyword_for_telegram(self, monkeypatch):
        from services.learning_pipeline import push_awaiting_reviews_to_telegram

        fake_session = MagicMock()
        fake_session.execute.return_value.fetchall.return_value = [
            (7, "這是一段待審核學習內容", 0.91, 0.84)
        ]
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        sent = {}

        def fake_send(msg, chat_ids=None, reply_markup=None, parse_mode="HTML"):
            sent["msg"] = msg
            sent["chat_ids"] = chat_ids
            sent["reply_markup"] = reply_markup
            sent["parse_mode"] = parse_mode
            return True

        monkeypatch.setattr("services.telegram_templates._send_telegram_raw", fake_send)

        count = push_awaiting_reviews_to_telegram(batch=1, chat_id="12345")

        assert count == 1
        assert sent["chat_ids"] == ["12345"]
        assert "episode #7" in sent["msg"]
        assert sent["reply_markup"]["inline_keyboard"][0][0]["callback_data"] == "pg_ok:7"
        fake_session.close.assert_called_once()


# ─────────────────────────────────────────────────────────────────────────────
# hash_human_approver
# ─────────────────────────────────────────────────────────────────────────────
class TestHashHumanApprover:
    def test_returns_8_char_hex(self):
        from services.learning_pipeline import hash_human_approver
        h = hash_human_approver('owen.tsai')
        assert len(h) == 8
        assert all(c in '0123456789abcdef' for c in h)

    def test_empty_returns_empty(self):
        from services.learning_pipeline import hash_human_approver
        assert hash_human_approver('') == ''
        assert hash_human_approver(None) == ''  # type: ignore

    def test_deterministic(self):
        from services.learning_pipeline import hash_human_approver
        a = hash_human_approver('alice')
        b = hash_human_approver('alice')
        c = hash_human_approver('bob')
        assert a == b
        assert a != c


# ─────────────────────────────────────────────────────────────────────────────
# 工具函式：_detect_simple_contradiction
# ─────────────────────────────────────────────────────────────────────────────
class TestContradictionDetector:
    def test_no_contradiction_returns_none(self):
        from services.learning_pipeline import _detect_simple_contradiction
        text = "業績是上升。市場是競爭。"
        # subject=業績→上升, subject=市場→競爭，沒矛盾
        assert _detect_simple_contradiction(text) is None

    def test_contradiction_detected(self):
        from services.learning_pipeline import _detect_simple_contradiction
        text = "A是黑色。A是白色。"
        result = _detect_simple_contradiction(text)
        assert result is not None
        assert 'A' in result