Files
ewoooc/tests/test_promotion_gate.py
OoO f2b91beb61
All checks were successful
CD Pipeline / deploy (push) Successful in 58s
記錄 RAG 人工審核者 hash
2026-05-13 09:13:29 +08:00

427 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
tests/test_promotion_gate.py
Operation Ollama-First v5.0 / Phase 11 — PromotionGate 4 階段晉升閘單元測試
涵蓋:
Stage 1: quality_score < 0.7 → rejected_quality
Stage 2: 規則引擎幻覺檢測hedge words / 矛盾)
Stage 3: cosine similarity >= 0.95 → rejected_duplicate
Stage 4: weight >= 0.8 強制 awaiting_review不能跳
promote() / reject() / mark_awaiting_review() DB 操作
"""
from __future__ import annotations
from unittest.mock import MagicMock
import pytest
# ─────────────────────────────────────────────────────────────────────────────
# 共用工具
# ─────────────────────────────────────────────────────────────────────────────
def _fake_episode(
id_=1, episode_type='llm_response',
distilled_text='內容', quality_score=0.8, weight=0.5,
embedding=None, status='pending',
):
return {
'id': id_,
'episode_type': episode_type,
'distilled_text': distilled_text,
'quality_score': quality_score,
'weight': weight,
'embedding': embedding,
'promotion_status': status,
}
def _patch_load_episode(monkeypatch, episode):
"""讓 PromotionGate._load_episode 直接回 episode不走 DB"""
from services.learning_pipeline import PromotionGate
monkeypatch.setattr(
PromotionGate, '_load_episode',
staticmethod(lambda episode_id: episode if episode else None),
)
# ─────────────────────────────────────────────────────────────────────────────
# Stage 1: quality_score
# ─────────────────────────────────────────────────────────────────────────────
class TestStage1Quality:
def test_low_quality_rejected(self, monkeypatch):
from services.learning_pipeline import PromotionGate
ep = _fake_episode(quality_score=0.5)
_patch_load_episode(monkeypatch, ep)
gate = PromotionGate()
decision = gate.can_promote(1)
assert decision.can_promote is False
assert decision.reason == 'rejected_quality'
assert '0.500' in (decision.detail or '')
def test_quality_at_threshold_passes(self, monkeypatch):
from services.learning_pipeline import PromotionGate, STAGE_1_AUTO_QUALITY
ep = _fake_episode(quality_score=STAGE_1_AUTO_QUALITY, weight=0.5)
_patch_load_episode(monkeypatch, ep)
gate = PromotionGate()
decision = gate.can_promote(1)
# 過 Stage 1 + 2 + 3 + 4 自動晉升
assert decision.can_promote is True
assert decision.reason == 'approved'
def test_episode_not_found(self, monkeypatch):
from services.learning_pipeline import PromotionGate
_patch_load_episode(monkeypatch, None)
gate = PromotionGate()
decision = gate.can_promote(99999)
assert decision.can_promote is False
assert decision.reason == 'rejected_quality'
assert 'not found' in (decision.detail or '')
# ─────────────────────────────────────────────────────────────────────────────
# Stage 2: 幻覺檢測
# ─────────────────────────────────────────────────────────────────────────────
class TestStage2Hallucination:
def test_hedge_words_without_numbers_rejected(self, monkeypatch):
from services.learning_pipeline import PromotionGate
ep = _fake_episode(
quality_score=0.8,
distilled_text='我猜本週業績可能會有點成長吧,也許不錯。',
)
_patch_load_episode(monkeypatch, ep)
gate = PromotionGate()
decision = gate.can_promote(1)
assert decision.can_promote is False
assert decision.reason == 'rejected_hallucination'
def test_hedge_words_with_numbers_passes(self, monkeypatch):
from services.learning_pipeline import PromotionGate
ep = _fake_episode(
quality_score=0.8, weight=0.5,
distilled_text='我猜本週業績會漲 5.2%,根據過去 30 天平均。',
)
_patch_load_episode(monkeypatch, ep)
gate = PromotionGate()
decision = gate.can_promote(1)
# 通過 Stage 2有具體數字→ 進到 Stage 4 → approved
assert decision.can_promote is True
def test_contradiction_rejected(self, monkeypatch):
from services.learning_pipeline import PromotionGate
ep = _fake_episode(
quality_score=0.8,
distilled_text='A是黑色。A是白色。',
)
_patch_load_episode(monkeypatch, ep)
gate = PromotionGate()
decision = gate.can_promote(1)
assert decision.can_promote is False
assert decision.reason == 'rejected_hallucination'
assert '自相矛盾' in (decision.detail or '')
def test_clean_text_passes(self, monkeypatch):
from services.learning_pipeline import PromotionGate
ep = _fake_episode(
quality_score=0.8, weight=0.5,
distilled_text='本週業績漲 5.2%,建議聚焦保濕品類。',
)
_patch_load_episode(monkeypatch, ep)
gate = PromotionGate()
decision = gate.can_promote(1)
assert decision.can_promote is True
assert decision.reason == 'approved'
# ─────────────────────────────────────────────────────────────────────────────
# Stage 3: 去重
# ─────────────────────────────────────────────────────────────────────────────
class TestStage3Dedup:
def test_high_similarity_rejected(self, monkeypatch):
from services.learning_pipeline import PromotionGate, STAGE_3_DEDUP_THRESHOLD
ep = _fake_episode(
quality_score=0.8, weight=0.5,
distilled_text='本週業績漲 5%',
embedding=[0.1] * 1024, # 模擬非空 embedding
)
_patch_load_episode(monkeypatch, ep)
# 模擬 DB 回 similarity=0.96
fake_row = MagicMock()
fake_row.id = 999
fake_row.similarity = 0.96
fake_session = MagicMock()
fake_session.execute.return_value.fetchone.return_value = fake_row
monkeypatch.setattr('database.manager.get_session', lambda: fake_session)
gate = PromotionGate()
decision = gate.can_promote(1)
assert decision.can_promote is False
assert decision.reason == 'rejected_duplicate'
assert decision.similar_insight_id == 999
def test_low_similarity_passes(self, monkeypatch):
from services.learning_pipeline import PromotionGate
ep = _fake_episode(
quality_score=0.8, weight=0.5,
distilled_text='全新內容', embedding=[0.1] * 1024,
)
_patch_load_episode(monkeypatch, ep)
fake_row = MagicMock()
fake_row.id = 999
fake_row.similarity = 0.5
fake_session = MagicMock()
fake_session.execute.return_value.fetchone.return_value = fake_row
monkeypatch.setattr('database.manager.get_session', lambda: fake_session)
gate = PromotionGate()
decision = gate.can_promote(1)
assert decision.can_promote is True
def test_null_embedding_skips_dedup(self, monkeypatch):
"""蒸餾時尚未 embed → 略過 Stage 3不阻擋晉升。"""
from services.learning_pipeline import PromotionGate
ep = _fake_episode(quality_score=0.8, weight=0.5, embedding=None)
_patch_load_episode(monkeypatch, ep)
# DB 不應被呼叫
called = {'count': 0}
def _spy_session():
called['count'] += 1
return MagicMock()
monkeypatch.setattr('database.manager.get_session', _spy_session)
gate = PromotionGate()
decision = gate.can_promote(1)
assert decision.can_promote is True
assert called['count'] == 0
def test_dedup_query_failure_passes(self, monkeypatch):
"""DB 查詢失敗 → 視為通過(避免 DB 故障阻塞晉升)。"""
from services.learning_pipeline import PromotionGate
ep = _fake_episode(
quality_score=0.8, weight=0.5,
embedding=[0.1] * 1024,
)
_patch_load_episode(monkeypatch, ep)
fake_session = MagicMock()
fake_session.execute.side_effect = RuntimeError("db down")
monkeypatch.setattr('database.manager.get_session', lambda: fake_session)
gate = PromotionGate()
decision = gate.can_promote(1)
assert decision.can_promote is True
# ─────────────────────────────────────────────────────────────────────────────
# Stage 4: 強制人工驗收v5.0 護欄 #1 核心)
# ─────────────────────────────────────────────────────────────────────────────
class TestStage4HumanReview:
def test_high_weight_forces_awaiting_review(self, monkeypatch):
"""weight=0.85 (>=0.8) 必經人工驗收,不能跳 Stage 4。"""
from services.learning_pipeline import PromotionGate, STAGE_4_HUMAN_REVIEW_WEIGHT
ep = _fake_episode(quality_score=0.9, weight=0.85)
_patch_load_episode(monkeypatch, ep)
gate = PromotionGate()
decision = gate.can_promote(1)
assert decision.can_promote is False
assert decision.reason == 'awaiting_review'
assert str(STAGE_4_HUMAN_REVIEW_WEIGHT) in (decision.detail or '')
def test_high_weight_at_threshold_forces_review(self, monkeypatch):
"""weight 剛好 0.8 也要進人工驗收(>= not >)。"""
from services.learning_pipeline import PromotionGate
ep = _fake_episode(quality_score=0.9, weight=0.8)
_patch_load_episode(monkeypatch, ep)
gate = PromotionGate()
decision = gate.can_promote(1)
assert decision.reason == 'awaiting_review'
def test_low_weight_auto_promoted(self, monkeypatch):
from services.learning_pipeline import PromotionGate
ep = _fake_episode(quality_score=0.9, weight=0.79)
_patch_load_episode(monkeypatch, ep)
gate = PromotionGate()
decision = gate.can_promote(1)
assert decision.can_promote is True
assert decision.reason == 'approved'
def test_high_weight_user_feedback_forces_review(self, monkeypatch):
"""user_feedback episode_type 預設 weight=0.9 → 必經人工。"""
from services.learning_pipeline import PromotionGate
ep = _fake_episode(
episode_type='user_feedback',
quality_score=1.0, weight=0.9,
)
_patch_load_episode(monkeypatch, ep)
gate = PromotionGate()
decision = gate.can_promote(1)
assert decision.reason == 'awaiting_review'
# ─────────────────────────────────────────────────────────────────────────────
# promote() DB 操作
# ─────────────────────────────────────────────────────────────────────────────
class TestPromote:
def test_promote_inserts_ai_insights_and_updates_episode(self, monkeypatch):
from services.learning_pipeline import PromotionGate
ep = _fake_episode(
id_=1, episode_type='llm_response',
quality_score=0.9, weight=0.5,
distilled_text='本週業績漲 5%',
)
_patch_load_episode(monkeypatch, ep)
# 模擬 INSERT RETURNING id = 555
fake_row = MagicMock()
fake_row.__getitem__.return_value = 555
fake_session = MagicMock()
fake_session.execute.return_value.fetchone.return_value = fake_row
monkeypatch.setattr('database.manager.get_session', lambda: fake_session)
gate = PromotionGate()
insight_id = gate.promote(1)
assert insight_id == 555
# 檢查 INSERT + UPDATE 各跑一次execute 至少 2 次)
assert fake_session.execute.call_count >= 2
fake_session.commit.assert_called_once()
def test_promote_records_human_approver_hash(self, monkeypatch):
from services.learning_pipeline import PromotionGate
ep = _fake_episode(quality_score=0.9, weight=0.5)
_patch_load_episode(monkeypatch, ep)
fake_row = MagicMock()
fake_row.__getitem__.return_value = 555
fake_session = MagicMock()
fake_session.execute.return_value.fetchone.return_value = fake_row
monkeypatch.setattr('database.manager.get_session', lambda: fake_session)
gate = PromotionGate()
assert gate.promote(1, human_approver='abc123ef') == 555
update_params = fake_session.execute.call_args_list[1].args[1]
assert update_params['human_approver'] == 'abc123ef'
def test_promote_episode_not_found_returns_none(self, monkeypatch):
from services.learning_pipeline import PromotionGate
_patch_load_episode(monkeypatch, None)
gate = PromotionGate()
assert gate.promote(99999) is None
def test_promote_db_failure_returns_none(self, monkeypatch):
from services.learning_pipeline import PromotionGate
ep = _fake_episode(quality_score=0.9, weight=0.5)
_patch_load_episode(monkeypatch, ep)
fake_session = MagicMock()
fake_session.execute.side_effect = RuntimeError("db down")
monkeypatch.setattr('database.manager.get_session', lambda: fake_session)
gate = PromotionGate()
assert gate.promote(1) is None
# ─────────────────────────────────────────────────────────────────────────────
# reject() / mark_awaiting_review()
# ─────────────────────────────────────────────────────────────────────────────
class TestRejectAndMark:
def test_reject_valid_reason(self, monkeypatch):
from services.learning_pipeline import PromotionGate
fake_session = MagicMock()
monkeypatch.setattr('database.manager.get_session', lambda: fake_session)
gate = PromotionGate()
ok = gate.reject(1, 'rejected_quality', detail='quality 0.3 < 0.7')
assert ok is True
fake_session.commit.assert_called_once()
def test_reject_records_human_approver_hash(self, monkeypatch):
from services.learning_pipeline import PromotionGate
fake_session = MagicMock()
monkeypatch.setattr('database.manager.get_session', lambda: fake_session)
gate = PromotionGate()
ok = gate.reject(
1,
'rejected_human',
detail='manual reject',
human_approver='abc123ef',
)
assert ok is True
params = fake_session.execute.call_args.args[1]
assert params['human_approver'] == 'abc123ef'
def test_reject_invalid_reason_returns_false(self):
from services.learning_pipeline import PromotionGate
gate = PromotionGate()
assert gate.reject(1, 'invalid_reason') is False
def test_reject_db_failure_returns_false(self, monkeypatch):
from services.learning_pipeline import PromotionGate
fake_session = MagicMock()
fake_session.execute.side_effect = RuntimeError("db down")
monkeypatch.setattr('database.manager.get_session', lambda: fake_session)
gate = PromotionGate()
assert gate.reject(1, 'rejected_quality') is False
def test_mark_awaiting_review_runs_update(self, monkeypatch):
from services.learning_pipeline import PromotionGate
fake_session = MagicMock()
monkeypatch.setattr('database.manager.get_session', lambda: fake_session)
gate = PromotionGate()
ok = gate.mark_awaiting_review(1)
assert ok is True
fake_session.execute.assert_called_once()
fake_session.commit.assert_called_once()
# ─────────────────────────────────────────────────────────────────────────────
# 完整 4 階段流程串接(高 weight 必經人工)
# ─────────────────────────────────────────────────────────────────────────────
class TestEndToEndFlow:
def test_user_feedback_high_quality_must_await_review(self, monkeypatch):
"""v5.0 護欄 #1 核心案例user_feedback weight=0.9 強制 awaiting_review
即使 quality=1.0 + 無幻覺 + 無重複也不能直接晉升。
"""
from services.learning_pipeline import PromotionGate
ep = _fake_episode(
episode_type='user_feedback',
quality_score=1.0, # Stage 1 過
distilled_text='2026-04-29 業績漲 12%,廣告 ROI 4.2 倍。', # 有數字 → Stage 2 過
embedding=None, # Stage 3 略過(無 embedding
weight=0.9, # >= 0.8 → 強制 Stage 4
)
_patch_load_episode(monkeypatch, ep)
gate = PromotionGate()
decision = gate.can_promote(1)
# 鐵律:高權重必經人工
assert decision.can_promote is False
assert decision.reason == 'awaiting_review'