ewoooc/tests/test_promotion_gate.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
tests/test_promotion_gate.py
Operation Ollama-First v5.0 / Phase 11 — PromotionGate 4 階段晉升閘單元測試

涵蓋：
  Stage 1: quality_score < 0.7 → rejected_quality
  Stage 2: 規則引擎幻覺檢測（hedge words / 矛盾）
  Stage 3: cosine similarity >= 0.95 → rejected_duplicate
  Stage 4: weight >= 0.8 強制 awaiting_review（不能跳）
  promote() / reject() / mark_awaiting_review() DB 操作
"""

from __future__ import annotations

from unittest.mock import MagicMock

import pytest


# ─────────────────────────────────────────────────────────────────────────────
# 共用工具
# ─────────────────────────────────────────────────────────────────────────────
def _fake_episode(
    id_=1, episode_type='llm_response',
    distilled_text='內容', quality_score=0.8, weight=0.5,
    embedding=None, status='pending',
):
    return {
        'id': id_,
        'episode_type': episode_type,
        'distilled_text': distilled_text,
        'quality_score': quality_score,
        'weight': weight,
        'embedding': embedding,
        'promotion_status': status,
    }


def _patch_load_episode(monkeypatch, episode):
    """讓 PromotionGate._load_episode 直接回 episode（不走 DB）。"""
    from services.learning_pipeline import PromotionGate
    monkeypatch.setattr(
        PromotionGate, '_load_episode',
        staticmethod(lambda episode_id: episode if episode else None),
    )


# ─────────────────────────────────────────────────────────────────────────────
# Stage 1: quality_score
# ─────────────────────────────────────────────────────────────────────────────
class TestStage1Quality:
    def test_low_quality_rejected(self, monkeypatch):
        from services.learning_pipeline import PromotionGate
        ep = _fake_episode(quality_score=0.5)
        _patch_load_episode(monkeypatch, ep)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        assert decision.can_promote is False
        assert decision.reason == 'rejected_quality'
        assert '0.500' in (decision.detail or '')

    def test_quality_at_threshold_passes(self, monkeypatch):
        from services.learning_pipeline import PromotionGate, STAGE_1_AUTO_QUALITY
        ep = _fake_episode(quality_score=STAGE_1_AUTO_QUALITY, weight=0.5)
        _patch_load_episode(monkeypatch, ep)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        # 過 Stage 1 + 2 + 3 + 4 自動晉升
        assert decision.can_promote is True
        assert decision.reason == 'approved'

    def test_episode_not_found(self, monkeypatch):
        from services.learning_pipeline import PromotionGate
        _patch_load_episode(monkeypatch, None)

        gate = PromotionGate()
        decision = gate.can_promote(99999)
        assert decision.can_promote is False
        assert decision.reason == 'rejected_quality'
        assert 'not found' in (decision.detail or '')


# ─────────────────────────────────────────────────────────────────────────────
# Stage 2: 幻覺檢測
# ─────────────────────────────────────────────────────────────────────────────
class TestStage2Hallucination:
    def test_hedge_words_without_numbers_rejected(self, monkeypatch):
        from services.learning_pipeline import PromotionGate
        ep = _fake_episode(
            quality_score=0.8,
            distilled_text='我猜本週業績可能會有點成長吧，也許不錯。',
        )
        _patch_load_episode(monkeypatch, ep)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        assert decision.can_promote is False
        assert decision.reason == 'rejected_hallucination'

    def test_hedge_words_with_numbers_passes(self, monkeypatch):
        from services.learning_pipeline import PromotionGate
        ep = _fake_episode(
            quality_score=0.8, weight=0.5,
            distilled_text='我猜本週業績會漲 5.2%，根據過去 30 天平均。',
        )
        _patch_load_episode(monkeypatch, ep)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        # 通過 Stage 2（有具體數字）→ 進到 Stage 4 → approved
        assert decision.can_promote is True

    def test_contradiction_rejected(self, monkeypatch):
        from services.learning_pipeline import PromotionGate
        ep = _fake_episode(
            quality_score=0.8,
            distilled_text='A是黑色。A是白色。',
        )
        _patch_load_episode(monkeypatch, ep)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        assert decision.can_promote is False
        assert decision.reason == 'rejected_hallucination'
        assert '自相矛盾' in (decision.detail or '')

    def test_clean_text_passes(self, monkeypatch):
        from services.learning_pipeline import PromotionGate
        ep = _fake_episode(
            quality_score=0.8, weight=0.5,
            distilled_text='本週業績漲 5.2%，建議聚焦保濕品類。',
        )
        _patch_load_episode(monkeypatch, ep)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        assert decision.can_promote is True
        assert decision.reason == 'approved'


# ─────────────────────────────────────────────────────────────────────────────
# Stage 3: 去重
# ─────────────────────────────────────────────────────────────────────────────
class TestStage3Dedup:
    def test_high_similarity_rejected(self, monkeypatch):
        from services.learning_pipeline import PromotionGate, STAGE_3_DEDUP_THRESHOLD
        ep = _fake_episode(
            quality_score=0.8, weight=0.5,
            distilled_text='本週業績漲 5%。',
            embedding=[0.1] * 1024,  # 模擬非空 embedding
        )
        _patch_load_episode(monkeypatch, ep)

        # 模擬 DB 回 similarity=0.96
        fake_row = MagicMock()
        fake_row.id = 999
        fake_row.similarity = 0.96
        fake_session = MagicMock()
        fake_session.execute.return_value.fetchone.return_value = fake_row
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        assert decision.can_promote is False
        assert decision.reason == 'rejected_duplicate'
        assert decision.similar_insight_id == 999

    def test_low_similarity_passes(self, monkeypatch):
        from services.learning_pipeline import PromotionGate
        ep = _fake_episode(
            quality_score=0.8, weight=0.5,
            distilled_text='全新內容', embedding=[0.1] * 1024,
        )
        _patch_load_episode(monkeypatch, ep)

        fake_row = MagicMock()
        fake_row.id = 999
        fake_row.similarity = 0.5
        fake_session = MagicMock()
        fake_session.execute.return_value.fetchone.return_value = fake_row
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        assert decision.can_promote is True

    def test_null_embedding_skips_dedup(self, monkeypatch):
        """蒸餾時尚未 embed → 略過 Stage 3，不阻擋晉升。"""
        from services.learning_pipeline import PromotionGate
        ep = _fake_episode(quality_score=0.8, weight=0.5, embedding=None)
        _patch_load_episode(monkeypatch, ep)

        # DB 不應被呼叫
        called = {'count': 0}

        def _spy_session():
            called['count'] += 1
            return MagicMock()

        monkeypatch.setattr('database.manager.get_session', _spy_session)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        assert decision.can_promote is True
        assert called['count'] == 0

    def test_dedup_query_failure_passes(self, monkeypatch):
        """DB 查詢失敗 → 視為通過（避免 DB 故障阻塞晉升）。"""
        from services.learning_pipeline import PromotionGate
        ep = _fake_episode(
            quality_score=0.8, weight=0.5,
            embedding=[0.1] * 1024,
        )
        _patch_load_episode(monkeypatch, ep)

        fake_session = MagicMock()
        fake_session.execute.side_effect = RuntimeError("db down")
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        assert decision.can_promote is True


# ─────────────────────────────────────────────────────────────────────────────
# Stage 4: 強制人工驗收（v5.0 護欄 #1 核心）
# ─────────────────────────────────────────────────────────────────────────────
class TestStage4HumanReview:
    def test_high_weight_forces_awaiting_review(self, monkeypatch):
        """weight=0.85 (>=0.8) 必經人工驗收，不能跳 Stage 4。"""
        from services.learning_pipeline import PromotionGate, STAGE_4_HUMAN_REVIEW_WEIGHT
        ep = _fake_episode(quality_score=0.9, weight=0.85)
        _patch_load_episode(monkeypatch, ep)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        assert decision.can_promote is False
        assert decision.reason == 'awaiting_review'
        assert str(STAGE_4_HUMAN_REVIEW_WEIGHT) in (decision.detail or '')

    def test_high_weight_at_threshold_forces_review(self, monkeypatch):
        """weight 剛好 0.8 也要進人工驗收（>= not >）。"""
        from services.learning_pipeline import PromotionGate
        ep = _fake_episode(quality_score=0.9, weight=0.8)
        _patch_load_episode(monkeypatch, ep)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        assert decision.reason == 'awaiting_review'

    def test_low_weight_auto_promoted(self, monkeypatch):
        from services.learning_pipeline import PromotionGate
        ep = _fake_episode(quality_score=0.9, weight=0.79)
        _patch_load_episode(monkeypatch, ep)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        assert decision.can_promote is True
        assert decision.reason == 'approved'

    def test_high_weight_user_feedback_forces_review(self, monkeypatch):
        """user_feedback episode_type 預設 weight=0.9 → 必經人工。"""
        from services.learning_pipeline import PromotionGate
        ep = _fake_episode(
            episode_type='user_feedback',
            quality_score=1.0, weight=0.9,
        )
        _patch_load_episode(monkeypatch, ep)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        assert decision.reason == 'awaiting_review'


# ─────────────────────────────────────────────────────────────────────────────
# promote() DB 操作
# ─────────────────────────────────────────────────────────────────────────────
class TestPromote:
    def test_promote_inserts_ai_insights_and_updates_episode(self, monkeypatch):
        from services.learning_pipeline import PromotionGate
        ep = _fake_episode(
            id_=1, episode_type='llm_response',
            quality_score=0.9, weight=0.5,
            distilled_text='本週業績漲 5%',
        )
        _patch_load_episode(monkeypatch, ep)

        # 模擬 INSERT RETURNING id = 555
        fake_row = MagicMock()
        fake_row.__getitem__.return_value = 555
        fake_session = MagicMock()
        fake_session.execute.return_value.fetchone.return_value = fake_row
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        gate = PromotionGate()
        insight_id = gate.promote(1)
        assert insight_id == 555
        # 檢查 INSERT + UPDATE 各跑一次（execute 至少 2 次）
        assert fake_session.execute.call_count >= 2
        fake_session.commit.assert_called_once()

    def test_promote_records_human_approver_hash(self, monkeypatch):
        from services.learning_pipeline import PromotionGate

        ep = _fake_episode(quality_score=0.9, weight=0.5)
        _patch_load_episode(monkeypatch, ep)

        fake_row = MagicMock()
        fake_row.__getitem__.return_value = 555
        fake_session = MagicMock()
        fake_session.execute.return_value.fetchone.return_value = fake_row
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        gate = PromotionGate()
        assert gate.promote(1, human_approver='abc123ef') == 555

        update_params = fake_session.execute.call_args_list[1].args[1]
        assert update_params['human_approver'] == 'abc123ef'

    def test_promote_episode_not_found_returns_none(self, monkeypatch):
        from services.learning_pipeline import PromotionGate
        _patch_load_episode(monkeypatch, None)
        gate = PromotionGate()
        assert gate.promote(99999) is None

    def test_promote_db_failure_returns_none(self, monkeypatch):
        from services.learning_pipeline import PromotionGate
        ep = _fake_episode(quality_score=0.9, weight=0.5)
        _patch_load_episode(monkeypatch, ep)

        fake_session = MagicMock()
        fake_session.execute.side_effect = RuntimeError("db down")
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        gate = PromotionGate()
        assert gate.promote(1) is None


# ─────────────────────────────────────────────────────────────────────────────
# reject() / mark_awaiting_review()
# ─────────────────────────────────────────────────────────────────────────────
class TestRejectAndMark:
    def test_reject_valid_reason(self, monkeypatch):
        from services.learning_pipeline import PromotionGate

        fake_session = MagicMock()
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        gate = PromotionGate()
        ok = gate.reject(1, 'rejected_quality', detail='quality 0.3 < 0.7')
        assert ok is True
        fake_session.commit.assert_called_once()

    def test_reject_records_human_approver_hash(self, monkeypatch):
        from services.learning_pipeline import PromotionGate

        fake_session = MagicMock()
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        gate = PromotionGate()
        ok = gate.reject(
            1,
            'rejected_human',
            detail='manual reject',
            human_approver='abc123ef',
        )

        assert ok is True
        params = fake_session.execute.call_args.args[1]
        assert params['human_approver'] == 'abc123ef'

    def test_reject_invalid_reason_returns_false(self):
        from services.learning_pipeline import PromotionGate
        gate = PromotionGate()
        assert gate.reject(1, 'invalid_reason') is False

    def test_reject_db_failure_returns_false(self, monkeypatch):
        from services.learning_pipeline import PromotionGate

        fake_session = MagicMock()
        fake_session.execute.side_effect = RuntimeError("db down")
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        gate = PromotionGate()
        assert gate.reject(1, 'rejected_quality') is False

    def test_mark_awaiting_review_runs_update(self, monkeypatch):
        from services.learning_pipeline import PromotionGate

        fake_session = MagicMock()
        monkeypatch.setattr('database.manager.get_session', lambda: fake_session)

        gate = PromotionGate()
        ok = gate.mark_awaiting_review(1)
        assert ok is True
        fake_session.execute.assert_called_once()
        fake_session.commit.assert_called_once()


# ─────────────────────────────────────────────────────────────────────────────
# 完整 4 階段流程串接（高 weight 必經人工）
# ─────────────────────────────────────────────────────────────────────────────
class TestEndToEndFlow:
    def test_user_feedback_high_quality_must_await_review(self, monkeypatch):
        """v5.0 護欄 #1 核心案例：user_feedback weight=0.9 強制 awaiting_review，
        即使 quality=1.0 + 無幻覺 + 無重複也不能直接晉升。
        """
        from services.learning_pipeline import PromotionGate
        ep = _fake_episode(
            episode_type='user_feedback',
            quality_score=1.0,        # Stage 1 過
            distilled_text='2026-04-29 業績漲 12%，廣告 ROI 4.2 倍。',  # 有數字 → Stage 2 過
            embedding=None,           # Stage 3 略過（無 embedding）
            weight=0.9,               # >= 0.8 → 強制 Stage 4
        )
        _patch_load_episode(monkeypatch, ep)

        gate = PromotionGate()
        decision = gate.can_promote(1)
        # 鐵律：高權重必經人工
        assert decision.can_promote is False
        assert decision.reason == 'awaiting_review'