#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ tests/test_promotion_gate.py Operation Ollama-First v5.0 / Phase 11 — PromotionGate 4 階段晉升閘單元測試 涵蓋: Stage 1: quality_score < 0.7 → rejected_quality Stage 2: 規則引擎幻覺檢測(hedge words / 矛盾) Stage 3: cosine similarity >= 0.95 → rejected_duplicate Stage 4: weight >= 0.8 強制 awaiting_review(不能跳) promote() / reject() / mark_awaiting_review() DB 操作 """ from __future__ import annotations from unittest.mock import MagicMock import pytest # ───────────────────────────────────────────────────────────────────────────── # 共用工具 # ───────────────────────────────────────────────────────────────────────────── def _fake_episode( id_=1, episode_type='llm_response', distilled_text='內容', quality_score=0.8, weight=0.5, embedding=None, status='pending', ): return { 'id': id_, 'episode_type': episode_type, 'distilled_text': distilled_text, 'quality_score': quality_score, 'weight': weight, 'embedding': embedding, 'promotion_status': status, } def _patch_load_episode(monkeypatch, episode): """讓 PromotionGate._load_episode 直接回 episode(不走 DB)。""" from services.learning_pipeline import PromotionGate monkeypatch.setattr( PromotionGate, '_load_episode', staticmethod(lambda episode_id: episode if episode else None), ) # ───────────────────────────────────────────────────────────────────────────── # Stage 1: quality_score # ───────────────────────────────────────────────────────────────────────────── class TestStage1Quality: def test_low_quality_rejected(self, monkeypatch): from services.learning_pipeline import PromotionGate ep = _fake_episode(quality_score=0.5) _patch_load_episode(monkeypatch, ep) gate = PromotionGate() decision = gate.can_promote(1) assert decision.can_promote is False assert decision.reason == 'rejected_quality' assert '0.500' in (decision.detail or '') def test_quality_at_threshold_passes(self, monkeypatch): from services.learning_pipeline import PromotionGate, STAGE_1_AUTO_QUALITY ep = _fake_episode(quality_score=STAGE_1_AUTO_QUALITY, weight=0.5) _patch_load_episode(monkeypatch, ep) gate = PromotionGate() decision = gate.can_promote(1) # 過 Stage 1 + 2 + 3 + 4 自動晉升 assert decision.can_promote is True assert decision.reason == 'approved' def test_episode_not_found(self, monkeypatch): from services.learning_pipeline import PromotionGate _patch_load_episode(monkeypatch, None) gate = PromotionGate() decision = gate.can_promote(99999) assert decision.can_promote is False assert decision.reason == 'rejected_quality' assert 'not found' in (decision.detail or '') # ───────────────────────────────────────────────────────────────────────────── # Stage 2: 幻覺檢測 # ───────────────────────────────────────────────────────────────────────────── class TestStage2Hallucination: def test_hedge_words_without_numbers_rejected(self, monkeypatch): from services.learning_pipeline import PromotionGate ep = _fake_episode( quality_score=0.8, distilled_text='我猜本週業績可能會有點成長吧,也許不錯。', ) _patch_load_episode(monkeypatch, ep) gate = PromotionGate() decision = gate.can_promote(1) assert decision.can_promote is False assert decision.reason == 'rejected_hallucination' def test_hedge_words_with_numbers_passes(self, monkeypatch): from services.learning_pipeline import PromotionGate ep = _fake_episode( quality_score=0.8, weight=0.5, distilled_text='我猜本週業績會漲 5.2%,根據過去 30 天平均。', ) _patch_load_episode(monkeypatch, ep) gate = PromotionGate() decision = gate.can_promote(1) # 通過 Stage 2(有具體數字)→ 進到 Stage 4 → approved assert decision.can_promote is True def test_contradiction_rejected(self, monkeypatch): from services.learning_pipeline import PromotionGate ep = _fake_episode( quality_score=0.8, distilled_text='A是黑色。A是白色。', ) _patch_load_episode(monkeypatch, ep) gate = PromotionGate() decision = gate.can_promote(1) assert decision.can_promote is False assert decision.reason == 'rejected_hallucination' assert '自相矛盾' in (decision.detail or '') def test_clean_text_passes(self, monkeypatch): from services.learning_pipeline import PromotionGate ep = _fake_episode( quality_score=0.8, weight=0.5, distilled_text='本週業績漲 5.2%,建議聚焦保濕品類。', ) _patch_load_episode(monkeypatch, ep) gate = PromotionGate() decision = gate.can_promote(1) assert decision.can_promote is True assert decision.reason == 'approved' # ───────────────────────────────────────────────────────────────────────────── # Stage 3: 去重 # ───────────────────────────────────────────────────────────────────────────── class TestStage3Dedup: def test_high_similarity_rejected(self, monkeypatch): from services.learning_pipeline import PromotionGate, STAGE_3_DEDUP_THRESHOLD ep = _fake_episode( quality_score=0.8, weight=0.5, distilled_text='本週業績漲 5%。', embedding=[0.1] * 1024, # 模擬非空 embedding ) _patch_load_episode(monkeypatch, ep) # 模擬 DB 回 similarity=0.96 fake_row = MagicMock() fake_row.id = 999 fake_row.similarity = 0.96 fake_session = MagicMock() fake_session.execute.return_value.fetchone.return_value = fake_row monkeypatch.setattr('database.manager.get_session', lambda: fake_session) gate = PromotionGate() decision = gate.can_promote(1) assert decision.can_promote is False assert decision.reason == 'rejected_duplicate' assert decision.similar_insight_id == 999 def test_low_similarity_passes(self, monkeypatch): from services.learning_pipeline import PromotionGate ep = _fake_episode( quality_score=0.8, weight=0.5, distilled_text='全新內容', embedding=[0.1] * 1024, ) _patch_load_episode(monkeypatch, ep) fake_row = MagicMock() fake_row.id = 999 fake_row.similarity = 0.5 fake_session = MagicMock() fake_session.execute.return_value.fetchone.return_value = fake_row monkeypatch.setattr('database.manager.get_session', lambda: fake_session) gate = PromotionGate() decision = gate.can_promote(1) assert decision.can_promote is True def test_null_embedding_skips_dedup(self, monkeypatch): """蒸餾時尚未 embed → 略過 Stage 3,不阻擋晉升。""" from services.learning_pipeline import PromotionGate ep = _fake_episode(quality_score=0.8, weight=0.5, embedding=None) _patch_load_episode(monkeypatch, ep) # DB 不應被呼叫 called = {'count': 0} def _spy_session(): called['count'] += 1 return MagicMock() monkeypatch.setattr('database.manager.get_session', _spy_session) gate = PromotionGate() decision = gate.can_promote(1) assert decision.can_promote is True assert called['count'] == 0 def test_dedup_query_failure_passes(self, monkeypatch): """DB 查詢失敗 → 視為通過(避免 DB 故障阻塞晉升)。""" from services.learning_pipeline import PromotionGate ep = _fake_episode( quality_score=0.8, weight=0.5, embedding=[0.1] * 1024, ) _patch_load_episode(monkeypatch, ep) fake_session = MagicMock() fake_session.execute.side_effect = RuntimeError("db down") monkeypatch.setattr('database.manager.get_session', lambda: fake_session) gate = PromotionGate() decision = gate.can_promote(1) assert decision.can_promote is True # ───────────────────────────────────────────────────────────────────────────── # Stage 4: 強制人工驗收(v5.0 護欄 #1 核心) # ───────────────────────────────────────────────────────────────────────────── class TestStage4HumanReview: def test_high_weight_forces_awaiting_review(self, monkeypatch): """weight=0.85 (>=0.8) 必經人工驗收,不能跳 Stage 4。""" from services.learning_pipeline import PromotionGate, STAGE_4_HUMAN_REVIEW_WEIGHT ep = _fake_episode(quality_score=0.9, weight=0.85) _patch_load_episode(monkeypatch, ep) gate = PromotionGate() decision = gate.can_promote(1) assert decision.can_promote is False assert decision.reason == 'awaiting_review' assert str(STAGE_4_HUMAN_REVIEW_WEIGHT) in (decision.detail or '') def test_high_weight_at_threshold_forces_review(self, monkeypatch): """weight 剛好 0.8 也要進人工驗收(>= not >)。""" from services.learning_pipeline import PromotionGate ep = _fake_episode(quality_score=0.9, weight=0.8) _patch_load_episode(monkeypatch, ep) gate = PromotionGate() decision = gate.can_promote(1) assert decision.reason == 'awaiting_review' def test_low_weight_auto_promoted(self, monkeypatch): from services.learning_pipeline import PromotionGate ep = _fake_episode(quality_score=0.9, weight=0.79) _patch_load_episode(monkeypatch, ep) gate = PromotionGate() decision = gate.can_promote(1) assert decision.can_promote is True assert decision.reason == 'approved' def test_high_weight_user_feedback_forces_review(self, monkeypatch): """user_feedback episode_type 預設 weight=0.9 → 必經人工。""" from services.learning_pipeline import PromotionGate ep = _fake_episode( episode_type='user_feedback', quality_score=1.0, weight=0.9, ) _patch_load_episode(monkeypatch, ep) gate = PromotionGate() decision = gate.can_promote(1) assert decision.reason == 'awaiting_review' # ───────────────────────────────────────────────────────────────────────────── # promote() DB 操作 # ───────────────────────────────────────────────────────────────────────────── class TestPromote: def test_promote_inserts_ai_insights_and_updates_episode(self, monkeypatch): from services.learning_pipeline import PromotionGate ep = _fake_episode( id_=1, episode_type='llm_response', quality_score=0.9, weight=0.5, distilled_text='本週業績漲 5%', ) _patch_load_episode(monkeypatch, ep) # 模擬 INSERT RETURNING id = 555 fake_row = MagicMock() fake_row.__getitem__.return_value = 555 fake_session = MagicMock() fake_session.execute.return_value.fetchone.return_value = fake_row monkeypatch.setattr('database.manager.get_session', lambda: fake_session) gate = PromotionGate() insight_id = gate.promote(1) assert insight_id == 555 # 檢查 INSERT + UPDATE 各跑一次(execute 至少 2 次) assert fake_session.execute.call_count >= 2 fake_session.commit.assert_called_once() def test_promote_records_human_approver_hash(self, monkeypatch): from services.learning_pipeline import PromotionGate ep = _fake_episode(quality_score=0.9, weight=0.5) _patch_load_episode(monkeypatch, ep) fake_row = MagicMock() fake_row.__getitem__.return_value = 555 fake_session = MagicMock() fake_session.execute.return_value.fetchone.return_value = fake_row monkeypatch.setattr('database.manager.get_session', lambda: fake_session) gate = PromotionGate() assert gate.promote(1, human_approver='abc123ef') == 555 update_params = fake_session.execute.call_args_list[1].args[1] assert update_params['human_approver'] == 'abc123ef' def test_promote_episode_not_found_returns_none(self, monkeypatch): from services.learning_pipeline import PromotionGate _patch_load_episode(monkeypatch, None) gate = PromotionGate() assert gate.promote(99999) is None def test_promote_db_failure_returns_none(self, monkeypatch): from services.learning_pipeline import PromotionGate ep = _fake_episode(quality_score=0.9, weight=0.5) _patch_load_episode(monkeypatch, ep) fake_session = MagicMock() fake_session.execute.side_effect = RuntimeError("db down") monkeypatch.setattr('database.manager.get_session', lambda: fake_session) gate = PromotionGate() assert gate.promote(1) is None # ───────────────────────────────────────────────────────────────────────────── # reject() / mark_awaiting_review() # ───────────────────────────────────────────────────────────────────────────── class TestRejectAndMark: def test_reject_valid_reason(self, monkeypatch): from services.learning_pipeline import PromotionGate fake_session = MagicMock() monkeypatch.setattr('database.manager.get_session', lambda: fake_session) gate = PromotionGate() ok = gate.reject(1, 'rejected_quality', detail='quality 0.3 < 0.7') assert ok is True fake_session.commit.assert_called_once() def test_reject_records_human_approver_hash(self, monkeypatch): from services.learning_pipeline import PromotionGate fake_session = MagicMock() monkeypatch.setattr('database.manager.get_session', lambda: fake_session) gate = PromotionGate() ok = gate.reject( 1, 'rejected_human', detail='manual reject', human_approver='abc123ef', ) assert ok is True params = fake_session.execute.call_args.args[1] assert params['human_approver'] == 'abc123ef' def test_reject_invalid_reason_returns_false(self): from services.learning_pipeline import PromotionGate gate = PromotionGate() assert gate.reject(1, 'invalid_reason') is False def test_reject_db_failure_returns_false(self, monkeypatch): from services.learning_pipeline import PromotionGate fake_session = MagicMock() fake_session.execute.side_effect = RuntimeError("db down") monkeypatch.setattr('database.manager.get_session', lambda: fake_session) gate = PromotionGate() assert gate.reject(1, 'rejected_quality') is False def test_mark_awaiting_review_runs_update(self, monkeypatch): from services.learning_pipeline import PromotionGate fake_session = MagicMock() monkeypatch.setattr('database.manager.get_session', lambda: fake_session) gate = PromotionGate() ok = gate.mark_awaiting_review(1) assert ok is True fake_session.execute.assert_called_once() fake_session.commit.assert_called_once() # ───────────────────────────────────────────────────────────────────────────── # 完整 4 階段流程串接(高 weight 必經人工) # ───────────────────────────────────────────────────────────────────────────── class TestEndToEndFlow: def test_user_feedback_high_quality_must_await_review(self, monkeypatch): """v5.0 護欄 #1 核心案例:user_feedback weight=0.9 強制 awaiting_review, 即使 quality=1.0 + 無幻覺 + 無重複也不能直接晉升。 """ from services.learning_pipeline import PromotionGate ep = _fake_episode( episode_type='user_feedback', quality_score=1.0, # Stage 1 過 distilled_text='2026-04-29 業績漲 12%,廣告 ROI 4.2 倍。', # 有數字 → Stage 2 過 embedding=None, # Stage 3 略過(無 embedding) weight=0.9, # >= 0.8 → 強制 Stage 4 ) _patch_load_episode(monkeypatch, ep) gate = PromotionGate() decision = gate.can_promote(1) # 鐵律:高權重必經人工 assert decision.can_promote is False assert decision.reason == 'awaiting_review'