ewoooc/services/learning_pipeline.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
services/learning_pipeline.py
Operation Ollama-First v5.0 / Phase 11 — 自主學習管線

兩大核心：
  1. Distiller        — LLM/MCP 結果 → learning_episodes（pending）
  2. PromotionGate    — learning_episodes → ai_insights 4 階段晉升閘
                        + expire_stale_reviews() 24h 自動降級

Owen 強調的 v5.0 護欄 #1（ADR-033）：
  反饋按鈕從「選配」升級為「強制晉升門檻」
    Stage 1: quality_score >= 0.7
    Stage 2: 規則引擎幻覺檢測
    Stage 3: 與既有 insight cosine < 0.95（去重）
    Stage 4: weight >= 0.8 必經 Telegram 👍/👎 人工驗收（24h 無回應降級 0.5）

對應:
  - migrations/028_create_learning_episodes.sql
  - docs/adr/ADR-033 (Promotion Gate)
"""

from __future__ import annotations

import hashlib
import json
import logging
import os
import re
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple

logger = logging.getLogger(__name__)


# ─────────────────────────────────────────────────────────────────────────────
# 常數（避免 magic number）
# ─────────────────────────────────────────────────────────────────────────────
STAGE_1_AUTO_QUALITY = 0.7              # quality_score 下限
STAGE_3_DEDUP_THRESHOLD = 0.95          # cosine similarity 視為重複
STAGE_4_HUMAN_REVIEW_WEIGHT = 0.8       # weight >= 此值強制人工驗收
HUMAN_REVIEW_TIMEOUT_HOURS = 24         # awaiting_review 過期門檻
EXPIRED_FALLBACK_WEIGHT = 0.5           # 過期降級後的 weight

DISTILLED_TEXT_MAX_BYTES = 16384        # 與 028 CHECK octet_length<=16384 對齊

# 蒸餾品質規則
_MCP_MIN_LEN = 200                      # MCP 結果最小長度
_LLM_FREE_TEXT_MIN = 500                # LLM 自由文本最小長度
_LLM_KEYWORDS_HINT = ['結論', '建議', '分析', '預測', '趨勢', '威脅', '機會']

# 幻覺規則（Stage 2）
_HALLUCINATION_HEDGE_WORDS = ['我猜', '可能是', '也許', '大概是', '應該是', '我猜測']
# 「具體數字」啟發式：阿拉伯數字 ≥ 1 個
_NUMBER_PATTERN = re.compile(r'\d')
# 「自相矛盾」啟發式：簡化判斷 — 同一句裡同一主詞同時被指派不同值（v5.0 不深做語意，rule-only）

# user_feedback 蒸餾後預設 weight（高權重，必入 Stage 4 人工驗收）
_USER_FEEDBACK_DEFAULT_WEIGHT = 0.9
_MANUAL_CURATED_DEFAULT_WEIGHT = 1.0


# ─────────────────────────────────────────────────────────────────────────────
# 容器
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class DistillResult:
    """Distiller 產出的候選筆（尚未寫入 DB）。caller 拿到後可再調整或直接 enqueue。"""

    episode_type: str            # mcp_result / llm_response / user_feedback / manual_curated
    distilled_text: str
    quality_score: float
    weight: float
    source_table: Optional[str] = None
    source_id: Optional[int] = None


@dataclass
class GateDecision:
    """PromotionGate 4 階段決策。"""

    can_promote: bool
    reason: str                  # approved / awaiting_review / rejected_*
    detail: Optional[str] = None # 失敗時的人類可讀原因
    similar_insight_id: Optional[int] = None  # Stage 3 命中的相似 insight id


# ─────────────────────────────────────────────────────────────────────────────
# Distiller
# ─────────────────────────────────────────────────────────────────────────────
class Distiller:
    """LLM/MCP 結果蒸餾器。

    純 Hermes 規則引擎（不再呼叫 LLM 避免循環燒錢）。
    輸出 DistillResult，由 caller 決定是否 enqueue。
    """

    def distill(
        self,
        episode_type: str,
        raw_content: str,
        source_table: Optional[str] = None,
        source_id: Optional[int] = None,
        user_feedback_score: Optional[int] = None,
    ) -> Optional[DistillResult]:
        """主入口。

        Args:
            episode_type:        mcp_result / llm_response / user_feedback / manual_curated
            raw_content:         原始文本（MCP 結果 / LLM response / 用戶留言）
            source_table:        ai_calls / mcp_calls / None
            source_id:           對應 source_table.id
            user_feedback_score: 1-5（episode_type=user_feedback 時帶入；
                                 5 → quality 1.0；1 → 0.0 負樣本不晉升）

        Returns:
            DistillResult 或 None（不可蒸餾，例如空字串）。
        """
        if not raw_content or not raw_content.strip():
            return None

        cleaned = raw_content.strip()
        # 截長度（與 028 CHECK octet_length<=16384 對齊）
        encoded = cleaned.encode('utf-8', errors='replace')
        if len(encoded) > DISTILLED_TEXT_MAX_BYTES:
            cleaned = encoded[:DISTILLED_TEXT_MAX_BYTES].decode('utf-8', errors='ignore')

        et = (episode_type or '').strip()
        if et not in ('mcp_result', 'llm_response', 'user_feedback', 'manual_curated'):
            logger.warning("[Distiller] unknown episode_type=%s", et)
            return None

        # ── 各類型品質規則 ──
        if et == 'manual_curated':
            quality, weight = 1.0, _MANUAL_CURATED_DEFAULT_WEIGHT

        elif et == 'user_feedback':
            score = max(1, min(int(user_feedback_score or 3), 5))
            # 5 → 1.0 高品質；1 → 0.0 負樣本
            quality = round((score - 1) / 4.0, 3)
            weight = _USER_FEEDBACK_DEFAULT_WEIGHT  # 用戶直陳的事實，需人工驗收

        elif et == 'mcp_result':
            # MCP：>200 字 + 含 2+ 關鍵字 → 0.8；否則 0.5
            keyword_hits = sum(1 for kw in _LLM_KEYWORDS_HINT if kw in cleaned)
            if len(cleaned) >= _MCP_MIN_LEN and keyword_hits >= 2:
                quality = 0.8
            elif len(cleaned) >= _MCP_MIN_LEN:
                quality = 0.65
            else:
                quality = 0.5
            weight = 0.6  # MCP 事實但非用戶確認，中等權重

        else:  # llm_response
            # LLM 結構化 JSON + status='ok' → 0.9；自由文本 >500 字 → 0.6
            quality, weight = self._distill_llm_response(cleaned)

        return DistillResult(
            episode_type=et,
            distilled_text=cleaned,
            quality_score=round(quality, 3),
            weight=round(weight, 3),
            source_table=source_table,
            source_id=source_id,
        )

    @staticmethod
    def _distill_llm_response(text: str) -> Tuple[float, float]:
        """LLM 回應蒸餾：JSON 結構化 vs 自由文本兩種路徑。"""
        # 路徑 1：嘗試解析為 JSON（結構化 ⇒ 高品質）
        stripped = text.strip()
        if stripped.startswith('{') or stripped.startswith('['):
            try:
                obj = json.loads(stripped)
                # status == 'ok' 或非空 dict/list → 0.9
                status_ok = (
                    isinstance(obj, dict) and obj.get('status') == 'ok'
                ) or (
                    isinstance(obj, list) and len(obj) > 0
                ) or (
                    isinstance(obj, dict) and len(obj) > 0
                )
                if status_ok:
                    return 0.9, 0.7
                return 0.7, 0.6
            except (json.JSONDecodeError, ValueError):
                pass

        # 路徑 2：自由文本 — 長度 + 繁中數字判斷
        if len(text) >= _LLM_FREE_TEXT_MIN:
            # >500 字 + 含具體數字 → 0.65；無數字 → 0.55
            if _NUMBER_PATTERN.search(text):
                return 0.65, 0.55
            return 0.55, 0.5

        # 太短的自由文本 → 0.4 不會過 Stage 1
        return 0.4, 0.5


# ─────────────────────────────────────────────────────────────────────────────
# Learning Pipeline 主入口（enqueue + 整合 Distiller）
# ─────────────────────────────────────────────────────────────────────────────
class LearningPipeline:
    """蒸餾 + enqueue 統一入口。

    使用範例:
        from services.learning_pipeline import learning_pipeline
        learning_pipeline.enqueue(
            episode_type='llm_response',
            raw_content=response_text,
            source_table='ai_calls', source_id=ai_call_id,
        )
    """

    def __init__(self):
        self.distiller = Distiller()

    def enqueue(
        self,
        episode_type: str,
        raw_content: str,
        source_table: Optional[str] = None,
        source_id: Optional[int] = None,
        user_feedback_score: Optional[int] = None,
    ) -> Optional[int]:
        """蒸餾後寫入 learning_episodes（pending 狀態）。

        Returns:
            learning_episodes.id 或 None（蒸餾失敗 / DB 寫入失敗）。
        """
        result = self.distiller.distill(
            episode_type=episode_type,
            raw_content=raw_content,
            source_table=source_table,
            source_id=source_id,
            user_feedback_score=user_feedback_score,
        )
        if not result:
            return None

        try:
            from sqlalchemy import text as sa_text
            from database.manager import get_session

            session = get_session()
            try:
                row = session.execute(
                    sa_text("""
                        INSERT INTO learning_episodes (
                            episode_type, source_table, source_id,
                            distilled_text, quality_score, weight,
                            promotion_status
                        ) VALUES (
                            :episode_type, :source_table, :source_id,
                            :distilled_text, :quality_score, :weight,
                            'pending'
                        )
                        RETURNING id
                    """),
                    {
                        'episode_type':   result.episode_type,
                        'source_table':   result.source_table,
                        'source_id':      result.source_id,
                        'distilled_text': result.distilled_text,
                        'quality_score':  result.quality_score,
                        'weight':         result.weight,
                    },
                ).fetchone()
                session.commit()
                episode_id = int(row[0]) if row else None
            except Exception:
                session.rollback()
                raise
            finally:
                session.close()

            # Phase 11.5（2026-05-04 收尾）：解鎖 PromotionGate Stage 3 dedup
            # episode 寫入後 enqueue embedding worker（既有 ADR-007 retry queue 自動處理）
            # _process_one_embedding 動態 UPDATE {target_table}，已支援 learning_episodes
            if episode_id and result.distilled_text:
                try:
                    from services.openclaw_learning_service import _enqueue_embedding
                    _enqueue_embedding(
                        target_table='learning_episodes',
                        target_id=episode_id,
                        text_content=result.distilled_text[:4000],  # 截長文避免 queue 表膨脹
                    )
                except Exception as embed_err:
                    # 失敗不影響主流程；只是 Stage 3 dedup 對該筆會 skip
                    logger.debug("[LearningPipeline] embedding enqueue failed (non-blocking): %s", embed_err)

            return episode_id
        except Exception as exc:
            logger.warning("[LearningPipeline] enqueue failed: %s", exc)
            return None


# ─────────────────────────────────────────────────────────────────────────────
# PromotionGate (v5.0 核心護欄 #1)
# ─────────────────────────────────────────────────────────────────────────────
class PromotionGate:
    """learning_episodes → ai_insights 4 階段晉升閘。

    Owen 強調：高權重必經人工驗收，避免幻覺污染 RAG。

    使用範例:
        gate = PromotionGate()
        decision = gate.can_promote(episode_id)
        if decision.can_promote:
            insight_id = gate.promote(episode_id)
        elif decision.reason == 'awaiting_review':
            # 推 Telegram 等 👍/👎
        else:
            gate.reject(episode_id, decision.detail or decision.reason)
    """

    # ──────────────────────────────────────────────────────────────────────
    # Stage 1: quality_score 自動門檻
    # ──────────────────────────────────────────────────────────────────────
    def _stage_1_quality(self, episode: Dict[str, Any]) -> Optional[GateDecision]:
        """quality_score < 0.7 → rejected_quality。"""
        q = float(episode.get('quality_score') or 0)
        if q < STAGE_1_AUTO_QUALITY:
            return GateDecision(
                can_promote=False,
                reason='rejected_quality',
                detail=f'quality_score={q:.3f} < {STAGE_1_AUTO_QUALITY}',
            )
        return None

    # ──────────────────────────────────────────────────────────────────────
    # Stage 2: 幻覺檢測（規則引擎）
    # ──────────────────────────────────────────────────────────────────────
    def _stage_2_hallucination(self, episode: Dict[str, Any]) -> Optional[GateDecision]:
        """規則：
            R1. 含 hedge words（'我猜' / '可能' / '也許'）+ 無具體數字 → suspect
            R2. 文本中含「A 是 X」又含「A 是 Y」（同主詞矛盾，rule-light） → suspect
            R3. （可擴充）引用 SKU 不在 product_master → suspect

        Returns: rejected_hallucination 或 None（通過）。
        """
        text = (episode.get('distilled_text') or '').strip()
        if not text:
            return None

        # R1: hedge + 無數字
        hedge_hits = [w for w in _HALLUCINATION_HEDGE_WORDS if w in text]
        if hedge_hits and not _NUMBER_PATTERN.search(text):
            return GateDecision(
                can_promote=False,
                reason='rejected_hallucination',
                detail=f'hedge words {hedge_hits} 但缺具體數字',
            )

        # R2: 簡單矛盾偵測 — 找「X 是 A」與「X 是 B」（A != B）
        contradiction = _detect_simple_contradiction(text)
        if contradiction:
            return GateDecision(
                can_promote=False,
                reason='rejected_hallucination',
                detail=f'自相矛盾偵測: {contradiction}',
            )

        return None

    # ──────────────────────────────────────────────────────────────────────
    # Stage 3: 去重（cosine similarity vs ai_insights）
    # ──────────────────────────────────────────────────────────────────────
    def _stage_3_dedup(self, episode: Dict[str, Any]) -> Optional[GateDecision]:
        """與既有 ai_insights cosine similarity >= 0.95 → rejected_duplicate。

        若 episode.embedding 為 NULL（蒸餾時尚未 embed）：略過此 stage（warning）。
        """
        embedding = episode.get('embedding')
        if not embedding:
            logger.debug(
                "[PromotionGate] episode_id=%s embedding 為 NULL，略過 Stage 3 去重",
                episode.get('id'),
            )
            return None

        try:
            from sqlalchemy import text as sa_text
            from database.manager import get_session

            session = get_session()
            try:
                row = session.execute(
                    sa_text("""
                        SELECT id, content,
                               1.0 - (embedding <=> CAST(:vec AS vector)) AS similarity
                          FROM ai_insights
                         WHERE embedding IS NOT NULL
                           AND status IN ('approved','active','executed')
                         ORDER BY embedding <=> CAST(:vec AS vector) ASC
                         LIMIT 1
                    """),
                    {'vec': str(embedding)},
                ).fetchone()
            finally:
                session.close()

            if row and float(row.similarity or 0) >= STAGE_3_DEDUP_THRESHOLD:
                return GateDecision(
                    can_promote=False,
                    reason='rejected_duplicate',
                    detail=f'similarity={row.similarity:.4f} >= {STAGE_3_DEDUP_THRESHOLD}',
                    similar_insight_id=int(row.id),
                )
            return None
        except Exception as exc:
            logger.warning(
                "[PromotionGate] Stage 3 dedup query failed (episode_id=%s): %s — 視為通過",
                episode.get('id'), exc,
            )
            return None

    # ──────────────────────────────────────────────────────────────────────
    # Stage 4: 高權重強制人工驗收
    # ──────────────────────────────────────────────────────────────────────
    def _stage_4_review(self, episode: Dict[str, Any]) -> GateDecision:
        """weight >= 0.8 → awaiting_review；否則自動晉升 approved。"""
        weight = float(episode.get('weight') or 0)
        if weight >= STAGE_4_HUMAN_REVIEW_WEIGHT:
            return GateDecision(
                can_promote=False,
                reason='awaiting_review',
                detail=f'weight={weight:.3f} >= {STAGE_4_HUMAN_REVIEW_WEIGHT} 強制人工驗收',
            )
        return GateDecision(can_promote=True, reason='approved')

    # ──────────────────────────────────────────────────────────────────────
    # 主入口
    # ──────────────────────────────────────────────────────────────────────
    def can_promote(self, episode_id: int) -> GateDecision:
        """執行 4 階段檢查，回 GateDecision。

        - approved          → 可呼叫 promote()
        - awaiting_review   → 不晉升，caller 負責推 Telegram 等 👍/👎
        - rejected_*        → 不晉升，caller 應呼叫 reject() 標狀態
        """
        episode = self._load_episode(episode_id)
        if not episode:
            return GateDecision(
                can_promote=False,
                reason='rejected_quality',
                detail=f'episode_id={episode_id} not found',
            )

        for stage_fn in (self._stage_1_quality, self._stage_2_hallucination, self._stage_3_dedup):
            verdict = stage_fn(episode)
            if verdict:
                return verdict

        return self._stage_4_review(episode)

    def promote(self, episode_id: int) -> Optional[int]:
        """執行晉升：寫 ai_insights + 更新 learning_episodes.{insight_id, promotion_status}。

        Returns:
            ai_insights.id 或 None（晉升失敗）。

        注意：呼叫前 caller 必須先 can_promote() 確認 reason='approved'。
        本函式不重複跑 4 stage（避免 race condition + 雙重檢查浪費 query）。
        """
        episode = self._load_episode(episode_id)
        if not episode:
            logger.warning("[PromotionGate] promote skipped: episode_id=%s not found", episode_id)
            return None

        try:
            from sqlalchemy import text as sa_text
            from database.manager import get_session

            session = get_session()
            try:
                # 1. 寫入 ai_insights（content + insight_type 從 episode 推斷）
                inferred_type = _infer_insight_type(episode)
                row = session.execute(
                    sa_text("""
                        INSERT INTO ai_insights (
                            insight_type, content, avg_quality, status,
                            confidence, created_by
                        ) VALUES (
                            :insight_type, :content, :quality, 'approved',
                            :confidence, 'learning_pipeline'
                        )
                        RETURNING id
                    """),
                    {
                        'insight_type': inferred_type,
                        'content':      episode['distilled_text'],
                        'quality':      float(episode.get('quality_score') or 0.5),
                        'confidence':   float(episode.get('weight') or 0.5),
                    },
                ).fetchone()
                insight_id = int(row[0]) if row else None

                if not insight_id:
                    raise RuntimeError('ai_insights INSERT 未回 id')

                # 2. 更新 learning_episodes（approved + insight_id 回填，CHECK chk_le_approved_consistent 強制一致）
                session.execute(
                    sa_text("""
                        UPDATE learning_episodes
                           SET promotion_status = 'approved',
                               insight_id = :insight_id,
                               reviewed_at = NOW()
                         WHERE id = :id
                    """),
                    {'insight_id': insight_id, 'id': episode_id},
                )
                session.commit()
                logger.info(
                    "[PromotionGate] episode_id=%s promoted → insight_id=%s",
                    episode_id, insight_id,
                )
                return insight_id
            except Exception:
                session.rollback()
                raise
            finally:
                session.close()
        except Exception as exc:
            logger.error("[PromotionGate] promote failed (episode_id=%s): %s", episode_id, exc)
            return None

    def reject(self, episode_id: int, reason: str, detail: Optional[str] = None) -> bool:
        """拒絕晉升：標 promotion_status='rejected_*' + rejected_reason。

        Args:
            reason: rejected_quality / rejected_hallucination / rejected_duplicate / rejected_human
            detail: 補充說明（會與 reason 拼成 rejected_reason 文本）
        """
        valid_statuses = (
            'rejected_quality',
            'rejected_hallucination',
            'rejected_duplicate',
            'rejected_human',
        )
        if reason not in valid_statuses:
            logger.warning("[PromotionGate] invalid reject reason=%s", reason)
            return False

        try:
            from sqlalchemy import text as sa_text
            from database.manager import get_session

            session = get_session()
            try:
                full_reason = detail or reason
                session.execute(
                    sa_text("""
                        UPDATE learning_episodes
                           SET promotion_status = :status,
                               rejected_reason = :rej_reason,
                               reviewed_at = NOW()
                         WHERE id = :id
                    """),
                    {'status': reason, 'rej_reason': full_reason, 'id': episode_id},
                )
                session.commit()
                return True
            except Exception:
                session.rollback()
                raise
            finally:
                session.close()
        except Exception as exc:
            logger.warning(
                "[PromotionGate] reject failed (episode_id=%s, reason=%s): %s",
                episode_id, reason, exc,
            )
            return False

    def mark_awaiting_review(self, episode_id: int) -> bool:
        """進入 awaiting_review 狀態（caller 推 Telegram 後呼叫，準備等 👍/👎）。"""
        try:
            from sqlalchemy import text as sa_text
            from database.manager import get_session

            session = get_session()
            try:
                session.execute(
                    sa_text("""
                        UPDATE learning_episodes
                           SET promotion_status = 'awaiting_review'
                         WHERE id = :id
                           AND promotion_status = 'pending'
                    """),
                    {'id': episode_id},
                )
                session.commit()
                return True
            except Exception:
                session.rollback()
                raise
            finally:
                session.close()
        except Exception as exc:
            logger.warning(
                "[PromotionGate] mark_awaiting_review failed (episode_id=%s): %s",
                episode_id, exc,
            )
            return False

    # ──────────────────────────────────────────────────────────────────────
    # 內部
    # ──────────────────────────────────────────────────────────────────────
    @staticmethod
    def _load_episode(episode_id: int) -> Optional[Dict[str, Any]]:
        """讀取 learning_episodes 單筆（dict 化方便 stage 函式 unit test mock）。"""
        try:
            from sqlalchemy import text as sa_text
            from database.manager import get_session

            session = get_session()
            try:
                row = session.execute(
                    sa_text("""
                        SELECT id, episode_type, distilled_text,
                               quality_score, weight, embedding,
                               promotion_status
                          FROM learning_episodes
                         WHERE id = :id
                    """),
                    {'id': int(episode_id)},
                ).fetchone()
                if not row:
                    return None
                return {
                    'id':              int(row.id),
                    'episode_type':    row.episode_type,
                    'distilled_text':  row.distilled_text,
                    'quality_score':   float(row.quality_score or 0),
                    'weight':          float(row.weight or 0),
                    'embedding':       row.embedding,
                    'promotion_status': row.promotion_status,
                }
            finally:
                session.close()
        except Exception as exc:
            logger.warning("[PromotionGate] load_episode failed (id=%s): %s", episode_id, exc)
            return None


# ─────────────────────────────────────────────────────────────────────────────
# 24h 自動降級
# ─────────────────────────────────────────────────────────────────────────────
def expire_stale_reviews(hours: int = HUMAN_REVIEW_TIMEOUT_HOURS) -> int:
    """awaiting_review 超過 N 小時無 👍/👎 → 自動降級 weight=0.5 + status='expired'。

    呼叫時機：scheduler 每 4 小時跑一次（建議與 ai_calls 90 天保留同排程）。

    Returns:
        被降級的筆數。
    """
    try:
        from sqlalchemy import text as sa_text
        from database.manager import get_session

        session = get_session()
        try:
            result = session.execute(
                sa_text("""
                    UPDATE learning_episodes
                       SET promotion_status = 'expired',
                           weight = :fallback_weight,
                           rejected_reason = COALESCE(rejected_reason, '')
                                          || '24h 無人工反饋自動降級',
                           reviewed_at = NOW()
                     WHERE promotion_status = 'awaiting_review'
                       AND created_at < NOW() - (:hours || ' hours')::INTERVAL
                """),
                {
                    'fallback_weight': EXPIRED_FALLBACK_WEIGHT,
                    'hours':          str(int(hours)),
                },
            )
            session.commit()
            count = result.rowcount or 0
            if count:
                logger.info(
                    "[PromotionGate] expire_stale_reviews: %d episodes 降級 weight=%.2f",
                    count, EXPIRED_FALLBACK_WEIGHT,
                )
            return count
        except Exception:
            session.rollback()
            raise
        finally:
            session.close()
    except Exception as exc:
        logger.warning("[PromotionGate] expire_stale_reviews failed: %s", exc)
        return 0


# ─────────────────────────────────────────────────────────────────────────────
# 工具函式
# ─────────────────────────────────────────────────────────────────────────────
def _detect_simple_contradiction(text: str) -> Optional[str]:
    """偵測「X 是 A」與「X 是 B」（A != B）的同主詞矛盾。

    純規則 light 偵測；不深做語意，避免誤殺合理推論。
    Returns: 矛盾描述 或 None。
    """
    pattern = re.compile(r'([一-龥A-Za-z0-9]{1,8})\s*[是為]\s*([一-龥A-Za-z0-9]{1,12})')
    matches = pattern.findall(text)
    if len(matches) < 2:
        return None
    seen: Dict[str, str] = {}
    for subject, value in matches:
        if subject in seen and seen[subject] != value:
            return f'"{subject}" 同時被指為 "{seen[subject]}" 與 "{value}"'
        seen[subject] = value
    return None


def _infer_insight_type(episode: Dict[str, Any]) -> str:
    """從 episode 推斷 ai_insights.insight_type。

    規則：
      - episode_type=user_feedback → 'human_review'
      - episode_type=manual_curated → 'manual_curated'
      - episode_type=mcp_result → 'mcp_grounding'
      - episode_type=llm_response → 'llm_distilled'
    """
    et = episode.get('episode_type') or ''
    return {
        'user_feedback':   'human_review',
        'manual_curated':  'manual_curated',
        'mcp_result':      'mcp_grounding',
        'llm_response':    'llm_distilled',
    }.get(et, 'llm_distilled')


def hash_human_approver(username: str) -> str:
    """Telegram username SHA1[:8]（與 028 schema 註解一致 — 避免 PII 落地）。"""
    if not username:
        return ''
    return hashlib.sha1(username.encode('utf-8')).hexdigest()[:8]


# ─────────────────────────────────────────────────────────────────────────────
# Worker 函數（給 run_scheduler.py 排程用）— Phase 11+ 收尾
# ─────────────────────────────────────────────────────────────────────────────

# 預設批次大小：每次處理 N 筆 pending，避免 worker 一次跑太久阻塞排程
PENDING_BATCH_SIZE = int(os.environ.get('PROMOTION_PENDING_BATCH_SIZE', '50'))
AWAITING_REVIEW_PUSH_BATCH = int(os.environ.get('AWAITING_REVIEW_PUSH_BATCH', '5'))


def process_pending_episodes(batch_size: int = PENDING_BATCH_SIZE) -> Dict[str, int]:
    """批次處理 learning_episodes pending → can_promote → promote/reject/await_review。

    給 run_scheduler.py 每 5 分鐘跑一次。
    依 ADR-032 PromotionGate 4 階段，每筆走完整檢查。

    Returns:
        {'pending_seen': N, 'promoted': X, 'rejected': Y, 'awaiting': Z, 'errors': E}
    """
    stats = {'pending_seen': 0, 'promoted': 0, 'rejected': 0, 'awaiting': 0, 'errors': 0}

    try:
        from sqlalchemy import text as sa_text
        from database.manager import get_session
    except Exception as exc:
        logger.warning('[PromotionWorker] DB import failed: %s', exc)
        return stats

    session = get_session()
    try:
        rows = session.execute(
            sa_text("""
                SELECT id FROM learning_episodes
                WHERE promotion_status = 'pending'
                ORDER BY created_at ASC
                LIMIT :n
            """),
            {'n': batch_size},
        ).fetchall()
        episode_ids = [int(r[0]) for r in rows]
    except Exception as exc:
        logger.error('[PromotionWorker] SELECT pending failed: %s', exc)
        session.close()
        return stats
    finally:
        session.close()

    stats['pending_seen'] = len(episode_ids)
    if not episode_ids:
        return stats

    for ep_id in episode_ids:
        try:
            decision = promotion_gate.can_promote(ep_id)
            if decision.reason == 'approved':
                if promotion_gate.promote(ep_id):
                    stats['promoted'] += 1
                else:
                    stats['errors'] += 1
            elif decision.reason == 'awaiting_review':
                if promotion_gate.mark_awaiting_review(ep_id):
                    stats['awaiting'] += 1
                else:
                    stats['errors'] += 1
            elif decision.reason.startswith('rejected_'):
                if promotion_gate.reject(ep_id, decision.reason, decision.detail):
                    stats['rejected'] += 1
                else:
                    stats['errors'] += 1
        except Exception as exc:
            logger.warning('[PromotionWorker] episode_id=%s failed: %s', ep_id, exc)
            stats['errors'] += 1

    logger.info(
        '[PromotionWorker] batch done: pending=%d promoted=%d rejected=%d awaiting=%d errors=%d',
        stats['pending_seen'], stats['promoted'], stats['rejected'],
        stats['awaiting'], stats['errors']
    )
    return stats


def push_awaiting_reviews_to_telegram(batch: int = AWAITING_REVIEW_PUSH_BATCH,
                                       chat_id: Optional[str] = None) -> int:
    """找 awaiting_review 但尚未推送的 episode → 推 Telegram 帶 👍/👎 keyboard。

    給 run_scheduler.py 每 30 分鐘跑（與 expire_stale_reviews 配合 24h timeout）。

    判斷「未推送」：reviewed_at IS NULL（mark_awaiting_review 時不設 reviewed_at；
    24h expired / human approve/reject 時才寫 reviewed_at）。
    """
    pushed = 0
    try:
        from sqlalchemy import text as sa_text
        from database.manager import get_session
    except Exception as exc:
        logger.warning('[AwaitingReviewPush] DB import failed: %s', exc)
        return 0

    # 取 chat_id（預設 admin）
    if chat_id is None:
        chat_id = os.environ.get('TELEGRAM_ADMIN_CHAT_ID', '').strip() or None
    if not chat_id:
        logger.info('[AwaitingReviewPush] TELEGRAM_ADMIN_CHAT_ID 未設，跳過推送')
        return 0

    session = get_session()
    try:
        rows = session.execute(
            sa_text("""
                SELECT id, distilled_text, weight, quality_score
                FROM learning_episodes
                WHERE promotion_status = 'awaiting_review'
                  AND reviewed_at IS NULL
                ORDER BY created_at ASC
                LIMIT :n
            """),
            {'n': batch},
        ).fetchall()
    except Exception as exc:
        logger.error('[AwaitingReviewPush] SELECT failed: %s', exc)
        session.close()
        return 0
    finally:
        session.close()

    if not rows:
        return 0

    # 推送
    try:
        from services.telegram_templates import promotion_review_keyboard, _send_telegram_raw
    except Exception as exc:
        logger.warning('[AwaitingReviewPush] template import failed: %s', exc)
        return 0

    for r in rows:
        ep_id, text_, weight, quality = r[0], r[1], float(r[2] or 0), float(r[3] or 0)
        msg = (
            f"🧠 <b>RAG 學習晉升審核</b>\n"
            f"━━━━━━━━━━━━━━━━━━━━\n"
            f"📋 episode #{ep_id} (weight={weight:.2f} quality={quality:.2f})\n\n"
            f"{(text_ or '')[:600]}\n\n"
            f"審核：通過 → 寫入 ai_insights 供 RAG 檢索；拒絕 → 永不晉升"
        )
        try:
            _send_telegram_raw(msg, chat_id=chat_id, reply_markup=promotion_review_keyboard(ep_id))
            pushed += 1
        except Exception as exc:
            logger.warning('[AwaitingReviewPush] episode_id=%s push failed: %s', ep_id, exc)

    logger.info('[AwaitingReviewPush] pushed %d awaiting_review episodes to chat=%s',
                pushed, chat_id)
    return pushed


# ─────────────────────────────────────────────────────────────────────────────
# 全域單例
# ─────────────────────────────────────────────────────────────────────────────
distiller = Distiller()
learning_pipeline = LearningPipeline()
promotion_gate = PromotionGate()


__all__ = [
    'Distiller',
    'DistillResult',
    'GateDecision',
    'LearningPipeline',
    'PromotionGate',
    'distiller',
    'learning_pipeline',
    'promotion_gate',
    'expire_stale_reviews',
    'process_pending_episodes',
    'push_awaiting_reviews_to_telegram',
    'hash_human_approver',
    'STAGE_1_AUTO_QUALITY',
    'STAGE_3_DEDUP_THRESHOLD',
    'STAGE_4_HUMAN_REVIEW_WEIGHT',
    'HUMAN_REVIEW_TIMEOUT_HOURS',
]