#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ services/learning_pipeline.py Operation Ollama-First v5.0 / Phase 11 — 自主學習管線 兩大核心: 1. Distiller — LLM/MCP 結果 → learning_episodes(pending) 2. PromotionGate — learning_episodes → ai_insights 4 階段晉升閘 + expire_stale_reviews() 24h 自動降級 Owen 強調的 v5.0 護欄 #1(ADR-033): 反饋按鈕從「選配」升級為「強制晉升門檻」 Stage 1: quality_score >= 0.7 Stage 2: 規則引擎幻覺檢測 Stage 3: 與既有 insight cosine < 0.95(去重) Stage 4: weight >= 0.8 必經 Telegram 👍/👎 人工驗收(24h 無回應降級 0.5) 對應: - migrations/028_create_learning_episodes.sql - docs/adr/ADR-033 (Promotion Gate) """ from __future__ import annotations import hashlib import json import logging import os import re from dataclasses import dataclass from typing import Any, Dict, List, Optional, Tuple logger = logging.getLogger(__name__) # ───────────────────────────────────────────────────────────────────────────── # 常數(避免 magic number) # ───────────────────────────────────────────────────────────────────────────── STAGE_1_AUTO_QUALITY = 0.7 # quality_score 下限 STAGE_3_DEDUP_THRESHOLD = 0.95 # cosine similarity 視為重複 STAGE_4_HUMAN_REVIEW_WEIGHT = 0.8 # weight >= 此值強制人工驗收 HUMAN_REVIEW_TIMEOUT_HOURS = 24 # awaiting_review 過期門檻 EXPIRED_FALLBACK_WEIGHT = 0.5 # 過期降級後的 weight DISTILLED_TEXT_MAX_BYTES = 16384 # 與 028 CHECK octet_length<=16384 對齊 # 蒸餾品質規則 _MCP_MIN_LEN = 200 # MCP 結果最小長度 _LLM_FREE_TEXT_MIN = 500 # LLM 自由文本最小長度 _LLM_KEYWORDS_HINT = ['結論', '建議', '分析', '預測', '趨勢', '威脅', '機會'] # 幻覺規則(Stage 2) _HALLUCINATION_HEDGE_WORDS = ['我猜', '可能是', '也許', '大概是', '應該是', '我猜測'] # 「具體數字」啟發式:阿拉伯數字 ≥ 1 個 _NUMBER_PATTERN = re.compile(r'\d') # 「自相矛盾」啟發式:簡化判斷 — 同一句裡同一主詞同時被指派不同值(v5.0 不深做語意,rule-only) # user_feedback 蒸餾後預設 weight(高權重,必入 Stage 4 人工驗收) _USER_FEEDBACK_DEFAULT_WEIGHT = 0.9 _MANUAL_CURATED_DEFAULT_WEIGHT = 1.0 # ───────────────────────────────────────────────────────────────────────────── # 容器 # ───────────────────────────────────────────────────────────────────────────── @dataclass class DistillResult: """Distiller 產出的候選筆(尚未寫入 DB)。caller 拿到後可再調整或直接 enqueue。""" episode_type: str # mcp_result / llm_response / user_feedback / manual_curated distilled_text: str quality_score: float weight: float source_table: Optional[str] = None source_id: Optional[int] = None @dataclass class GateDecision: """PromotionGate 4 階段決策。""" can_promote: bool reason: str # approved / awaiting_review / rejected_* detail: Optional[str] = None # 失敗時的人類可讀原因 similar_insight_id: Optional[int] = None # Stage 3 命中的相似 insight id # ───────────────────────────────────────────────────────────────────────────── # Distiller # ───────────────────────────────────────────────────────────────────────────── class Distiller: """LLM/MCP 結果蒸餾器。 純 Hermes 規則引擎(不再呼叫 LLM 避免循環燒錢)。 輸出 DistillResult,由 caller 決定是否 enqueue。 """ def distill( self, episode_type: str, raw_content: str, source_table: Optional[str] = None, source_id: Optional[int] = None, user_feedback_score: Optional[int] = None, ) -> Optional[DistillResult]: """主入口。 Args: episode_type: mcp_result / llm_response / user_feedback / manual_curated raw_content: 原始文本(MCP 結果 / LLM response / 用戶留言) source_table: ai_calls / mcp_calls / None source_id: 對應 source_table.id user_feedback_score: 1-5(episode_type=user_feedback 時帶入; 5 → quality 1.0;1 → 0.0 負樣本不晉升) Returns: DistillResult 或 None(不可蒸餾,例如空字串)。 """ if not raw_content or not raw_content.strip(): return None cleaned = raw_content.strip() # 截長度(與 028 CHECK octet_length<=16384 對齊) encoded = cleaned.encode('utf-8', errors='replace') if len(encoded) > DISTILLED_TEXT_MAX_BYTES: cleaned = encoded[:DISTILLED_TEXT_MAX_BYTES].decode('utf-8', errors='ignore') et = (episode_type or '').strip() if et not in ('mcp_result', 'llm_response', 'user_feedback', 'manual_curated'): logger.warning("[Distiller] unknown episode_type=%s", et) return None # ── 各類型品質規則 ── if et == 'manual_curated': quality, weight = 1.0, _MANUAL_CURATED_DEFAULT_WEIGHT elif et == 'user_feedback': score = max(1, min(int(user_feedback_score or 3), 5)) # 5 → 1.0 高品質;1 → 0.0 負樣本 quality = round((score - 1) / 4.0, 3) weight = _USER_FEEDBACK_DEFAULT_WEIGHT # 用戶直陳的事實,需人工驗收 elif et == 'mcp_result': # MCP:>200 字 + 含 2+ 關鍵字 → 0.8;否則 0.5 keyword_hits = sum(1 for kw in _LLM_KEYWORDS_HINT if kw in cleaned) if len(cleaned) >= _MCP_MIN_LEN and keyword_hits >= 2: quality = 0.8 elif len(cleaned) >= _MCP_MIN_LEN: quality = 0.65 else: quality = 0.5 weight = 0.6 # MCP 事實但非用戶確認,中等權重 else: # llm_response # LLM 結構化 JSON + status='ok' → 0.9;自由文本 >500 字 → 0.6 quality, weight = self._distill_llm_response(cleaned) return DistillResult( episode_type=et, distilled_text=cleaned, quality_score=round(quality, 3), weight=round(weight, 3), source_table=source_table, source_id=source_id, ) @staticmethod def _distill_llm_response(text: str) -> Tuple[float, float]: """LLM 回應蒸餾:JSON 結構化 vs 自由文本兩種路徑。""" # 路徑 1:嘗試解析為 JSON(結構化 ⇒ 高品質) stripped = text.strip() if stripped.startswith('{') or stripped.startswith('['): try: obj = json.loads(stripped) # status == 'ok' 或非空 dict/list → 0.9 status_ok = ( isinstance(obj, dict) and obj.get('status') == 'ok' ) or ( isinstance(obj, list) and len(obj) > 0 ) or ( isinstance(obj, dict) and len(obj) > 0 ) if status_ok: return 0.9, 0.7 return 0.7, 0.6 except (json.JSONDecodeError, ValueError): pass # 路徑 2:自由文本 — 長度 + 繁中數字判斷 if len(text) >= _LLM_FREE_TEXT_MIN: # >500 字 + 含具體數字 → 0.65;無數字 → 0.55 if _NUMBER_PATTERN.search(text): return 0.65, 0.55 return 0.55, 0.5 # 太短的自由文本 → 0.4 不會過 Stage 1 return 0.4, 0.5 # ───────────────────────────────────────────────────────────────────────────── # Learning Pipeline 主入口(enqueue + 整合 Distiller) # ───────────────────────────────────────────────────────────────────────────── class LearningPipeline: """蒸餾 + enqueue 統一入口。 使用範例: from services.learning_pipeline import learning_pipeline learning_pipeline.enqueue( episode_type='llm_response', raw_content=response_text, source_table='ai_calls', source_id=ai_call_id, ) """ def __init__(self): self.distiller = Distiller() def enqueue( self, episode_type: str, raw_content: str, source_table: Optional[str] = None, source_id: Optional[int] = None, user_feedback_score: Optional[int] = None, ) -> Optional[int]: """蒸餾後寫入 learning_episodes(pending 狀態)。 Returns: learning_episodes.id 或 None(蒸餾失敗 / DB 寫入失敗)。 """ result = self.distiller.distill( episode_type=episode_type, raw_content=raw_content, source_table=source_table, source_id=source_id, user_feedback_score=user_feedback_score, ) if not result: return None try: from sqlalchemy import text as sa_text from database.manager import get_session session = get_session() try: row = session.execute( sa_text(""" INSERT INTO learning_episodes ( episode_type, source_table, source_id, distilled_text, quality_score, weight, promotion_status ) VALUES ( :episode_type, :source_table, :source_id, :distilled_text, :quality_score, :weight, 'pending' ) RETURNING id """), { 'episode_type': result.episode_type, 'source_table': result.source_table, 'source_id': result.source_id, 'distilled_text': result.distilled_text, 'quality_score': result.quality_score, 'weight': result.weight, }, ).fetchone() session.commit() episode_id = int(row[0]) if row else None except Exception: session.rollback() raise finally: session.close() # Phase 11.5(2026-05-04 收尾):解鎖 PromotionGate Stage 3 dedup # episode 寫入後 enqueue embedding worker(既有 ADR-007 retry queue 自動處理) # _process_one_embedding 動態 UPDATE {target_table},已支援 learning_episodes if episode_id and result.distilled_text: try: from services.openclaw_learning_service import _enqueue_embedding _enqueue_embedding( target_table='learning_episodes', target_id=episode_id, text_content=result.distilled_text[:4000], # 截長文避免 queue 表膨脹 ) except Exception as embed_err: # 失敗不影響主流程;只是 Stage 3 dedup 對該筆會 skip logger.debug("[LearningPipeline] embedding enqueue failed (non-blocking): %s", embed_err) return episode_id except Exception as exc: logger.warning("[LearningPipeline] enqueue failed: %s", exc) return None # ───────────────────────────────────────────────────────────────────────────── # PromotionGate (v5.0 核心護欄 #1) # ───────────────────────────────────────────────────────────────────────────── class PromotionGate: """learning_episodes → ai_insights 4 階段晉升閘。 Owen 強調:高權重必經人工驗收,避免幻覺污染 RAG。 使用範例: gate = PromotionGate() decision = gate.can_promote(episode_id) if decision.can_promote: insight_id = gate.promote(episode_id) elif decision.reason == 'awaiting_review': # 推 Telegram 等 👍/👎 else: gate.reject(episode_id, decision.detail or decision.reason) """ # ────────────────────────────────────────────────────────────────────── # Stage 1: quality_score 自動門檻 # ────────────────────────────────────────────────────────────────────── def _stage_1_quality(self, episode: Dict[str, Any]) -> Optional[GateDecision]: """quality_score < 0.7 → rejected_quality。""" q = float(episode.get('quality_score') or 0) if q < STAGE_1_AUTO_QUALITY: return GateDecision( can_promote=False, reason='rejected_quality', detail=f'quality_score={q:.3f} < {STAGE_1_AUTO_QUALITY}', ) return None # ────────────────────────────────────────────────────────────────────── # Stage 2: 幻覺檢測(規則引擎) # ────────────────────────────────────────────────────────────────────── def _stage_2_hallucination(self, episode: Dict[str, Any]) -> Optional[GateDecision]: """規則: R1. 含 hedge words('我猜' / '可能' / '也許')+ 無具體數字 → suspect R2. 文本中含「A 是 X」又含「A 是 Y」(同主詞矛盾,rule-light) → suspect R3. (可擴充)引用 SKU 不在 product_master → suspect Returns: rejected_hallucination 或 None(通過)。 """ text = (episode.get('distilled_text') or '').strip() if not text: return None # R1: hedge + 無數字 hedge_hits = [w for w in _HALLUCINATION_HEDGE_WORDS if w in text] if hedge_hits and not _NUMBER_PATTERN.search(text): return GateDecision( can_promote=False, reason='rejected_hallucination', detail=f'hedge words {hedge_hits} 但缺具體數字', ) # R2: 簡單矛盾偵測 — 找「X 是 A」與「X 是 B」(A != B) contradiction = _detect_simple_contradiction(text) if contradiction: return GateDecision( can_promote=False, reason='rejected_hallucination', detail=f'自相矛盾偵測: {contradiction}', ) return None # ────────────────────────────────────────────────────────────────────── # Stage 3: 去重(cosine similarity vs ai_insights) # ────────────────────────────────────────────────────────────────────── def _stage_3_dedup(self, episode: Dict[str, Any]) -> Optional[GateDecision]: """與既有 ai_insights cosine similarity >= 0.95 → rejected_duplicate。 若 episode.embedding 為 NULL(蒸餾時尚未 embed):略過此 stage(warning)。 """ embedding = episode.get('embedding') if not embedding: logger.debug( "[PromotionGate] episode_id=%s embedding 為 NULL,略過 Stage 3 去重", episode.get('id'), ) return None try: from sqlalchemy import text as sa_text from database.manager import get_session session = get_session() try: row = session.execute( sa_text(""" SELECT id, content, 1.0 - (embedding <=> CAST(:vec AS vector)) AS similarity FROM ai_insights WHERE embedding IS NOT NULL AND status IN ('approved','active','executed') ORDER BY embedding <=> CAST(:vec AS vector) ASC LIMIT 1 """), {'vec': str(embedding)}, ).fetchone() finally: session.close() if row and float(row.similarity or 0) >= STAGE_3_DEDUP_THRESHOLD: return GateDecision( can_promote=False, reason='rejected_duplicate', detail=f'similarity={row.similarity:.4f} >= {STAGE_3_DEDUP_THRESHOLD}', similar_insight_id=int(row.id), ) return None except Exception as exc: logger.warning( "[PromotionGate] Stage 3 dedup query failed (episode_id=%s): %s — 視為通過", episode.get('id'), exc, ) return None # ────────────────────────────────────────────────────────────────────── # Stage 4: 高權重強制人工驗收 # ────────────────────────────────────────────────────────────────────── def _stage_4_review(self, episode: Dict[str, Any]) -> GateDecision: """weight >= 0.8 → awaiting_review;否則自動晉升 approved。""" weight = float(episode.get('weight') or 0) if weight >= STAGE_4_HUMAN_REVIEW_WEIGHT: return GateDecision( can_promote=False, reason='awaiting_review', detail=f'weight={weight:.3f} >= {STAGE_4_HUMAN_REVIEW_WEIGHT} 強制人工驗收', ) return GateDecision(can_promote=True, reason='approved') # ────────────────────────────────────────────────────────────────────── # 主入口 # ────────────────────────────────────────────────────────────────────── def can_promote(self, episode_id: int) -> GateDecision: """執行 4 階段檢查,回 GateDecision。 - approved → 可呼叫 promote() - awaiting_review → 不晉升,caller 負責推 Telegram 等 👍/👎 - rejected_* → 不晉升,caller 應呼叫 reject() 標狀態 """ episode = self._load_episode(episode_id) if not episode: return GateDecision( can_promote=False, reason='rejected_quality', detail=f'episode_id={episode_id} not found', ) for stage_fn in (self._stage_1_quality, self._stage_2_hallucination, self._stage_3_dedup): verdict = stage_fn(episode) if verdict: return verdict return self._stage_4_review(episode) def promote(self, episode_id: int) -> Optional[int]: """執行晉升:寫 ai_insights + 更新 learning_episodes.{insight_id, promotion_status}。 Returns: ai_insights.id 或 None(晉升失敗)。 注意:呼叫前 caller 必須先 can_promote() 確認 reason='approved'。 本函式不重複跑 4 stage(避免 race condition + 雙重檢查浪費 query)。 """ episode = self._load_episode(episode_id) if not episode: logger.warning("[PromotionGate] promote skipped: episode_id=%s not found", episode_id) return None try: from sqlalchemy import text as sa_text from database.manager import get_session session = get_session() try: # 1. 寫入 ai_insights(content + insight_type 從 episode 推斷) inferred_type = _infer_insight_type(episode) row = session.execute( sa_text(""" INSERT INTO ai_insights ( insight_type, content, avg_quality, status, confidence, created_by ) VALUES ( :insight_type, :content, :quality, 'approved', :confidence, 'learning_pipeline' ) RETURNING id """), { 'insight_type': inferred_type, 'content': episode['distilled_text'], 'quality': float(episode.get('quality_score') or 0.5), 'confidence': float(episode.get('weight') or 0.5), }, ).fetchone() insight_id = int(row[0]) if row else None if not insight_id: raise RuntimeError('ai_insights INSERT 未回 id') # 2. 更新 learning_episodes(approved + insight_id 回填,CHECK chk_le_approved_consistent 強制一致) session.execute( sa_text(""" UPDATE learning_episodes SET promotion_status = 'approved', insight_id = :insight_id, reviewed_at = NOW() WHERE id = :id """), {'insight_id': insight_id, 'id': episode_id}, ) session.commit() logger.info( "[PromotionGate] episode_id=%s promoted → insight_id=%s", episode_id, insight_id, ) return insight_id except Exception: session.rollback() raise finally: session.close() except Exception as exc: logger.error("[PromotionGate] promote failed (episode_id=%s): %s", episode_id, exc) return None def reject(self, episode_id: int, reason: str, detail: Optional[str] = None) -> bool: """拒絕晉升:標 promotion_status='rejected_*' + rejected_reason。 Args: reason: rejected_quality / rejected_hallucination / rejected_duplicate / rejected_human detail: 補充說明(會與 reason 拼成 rejected_reason 文本) """ valid_statuses = ( 'rejected_quality', 'rejected_hallucination', 'rejected_duplicate', 'rejected_human', ) if reason not in valid_statuses: logger.warning("[PromotionGate] invalid reject reason=%s", reason) return False try: from sqlalchemy import text as sa_text from database.manager import get_session session = get_session() try: full_reason = detail or reason session.execute( sa_text(""" UPDATE learning_episodes SET promotion_status = :status, rejected_reason = :rej_reason, reviewed_at = NOW() WHERE id = :id """), {'status': reason, 'rej_reason': full_reason, 'id': episode_id}, ) session.commit() return True except Exception: session.rollback() raise finally: session.close() except Exception as exc: logger.warning( "[PromotionGate] reject failed (episode_id=%s, reason=%s): %s", episode_id, reason, exc, ) return False def mark_awaiting_review(self, episode_id: int) -> bool: """進入 awaiting_review 狀態(caller 推 Telegram 後呼叫,準備等 👍/👎)。""" try: from sqlalchemy import text as sa_text from database.manager import get_session session = get_session() try: session.execute( sa_text(""" UPDATE learning_episodes SET promotion_status = 'awaiting_review' WHERE id = :id AND promotion_status = 'pending' """), {'id': episode_id}, ) session.commit() return True except Exception: session.rollback() raise finally: session.close() except Exception as exc: logger.warning( "[PromotionGate] mark_awaiting_review failed (episode_id=%s): %s", episode_id, exc, ) return False # ────────────────────────────────────────────────────────────────────── # 內部 # ────────────────────────────────────────────────────────────────────── @staticmethod def _load_episode(episode_id: int) -> Optional[Dict[str, Any]]: """讀取 learning_episodes 單筆(dict 化方便 stage 函式 unit test mock)。""" try: from sqlalchemy import text as sa_text from database.manager import get_session session = get_session() try: row = session.execute( sa_text(""" SELECT id, episode_type, distilled_text, quality_score, weight, embedding, promotion_status FROM learning_episodes WHERE id = :id """), {'id': int(episode_id)}, ).fetchone() if not row: return None return { 'id': int(row.id), 'episode_type': row.episode_type, 'distilled_text': row.distilled_text, 'quality_score': float(row.quality_score or 0), 'weight': float(row.weight or 0), 'embedding': row.embedding, 'promotion_status': row.promotion_status, } finally: session.close() except Exception as exc: logger.warning("[PromotionGate] load_episode failed (id=%s): %s", episode_id, exc) return None # ───────────────────────────────────────────────────────────────────────────── # 24h 自動降級 # ───────────────────────────────────────────────────────────────────────────── def expire_stale_reviews(hours: int = HUMAN_REVIEW_TIMEOUT_HOURS) -> int: """awaiting_review 超過 N 小時無 👍/👎 → 自動降級 weight=0.5 + status='expired'。 呼叫時機:scheduler 每 4 小時跑一次(建議與 ai_calls 90 天保留同排程)。 Returns: 被降級的筆數。 """ try: from sqlalchemy import text as sa_text from database.manager import get_session session = get_session() try: result = session.execute( sa_text(""" UPDATE learning_episodes SET promotion_status = 'expired', weight = :fallback_weight, rejected_reason = COALESCE(rejected_reason, '') || '24h 無人工反饋自動降級', reviewed_at = NOW() WHERE promotion_status = 'awaiting_review' AND created_at < NOW() - (:hours || ' hours')::INTERVAL """), { 'fallback_weight': EXPIRED_FALLBACK_WEIGHT, 'hours': str(int(hours)), }, ) session.commit() count = result.rowcount or 0 if count: logger.info( "[PromotionGate] expire_stale_reviews: %d episodes 降級 weight=%.2f", count, EXPIRED_FALLBACK_WEIGHT, ) return count except Exception: session.rollback() raise finally: session.close() except Exception as exc: logger.warning("[PromotionGate] expire_stale_reviews failed: %s", exc) return 0 # ───────────────────────────────────────────────────────────────────────────── # 工具函式 # ───────────────────────────────────────────────────────────────────────────── def _detect_simple_contradiction(text: str) -> Optional[str]: """偵測「X 是 A」與「X 是 B」(A != B)的同主詞矛盾。 純規則 light 偵測;不深做語意,避免誤殺合理推論。 Returns: 矛盾描述 或 None。 """ pattern = re.compile(r'([一-龥A-Za-z0-9]{1,8})\s*[是為]\s*([一-龥A-Za-z0-9]{1,12})') matches = pattern.findall(text) if len(matches) < 2: return None seen: Dict[str, str] = {} for subject, value in matches: if subject in seen and seen[subject] != value: return f'"{subject}" 同時被指為 "{seen[subject]}" 與 "{value}"' seen[subject] = value return None def _infer_insight_type(episode: Dict[str, Any]) -> str: """從 episode 推斷 ai_insights.insight_type。 規則: - episode_type=user_feedback → 'human_review' - episode_type=manual_curated → 'manual_curated' - episode_type=mcp_result → 'mcp_grounding' - episode_type=llm_response → 'llm_distilled' """ et = episode.get('episode_type') or '' return { 'user_feedback': 'human_review', 'manual_curated': 'manual_curated', 'mcp_result': 'mcp_grounding', 'llm_response': 'llm_distilled', }.get(et, 'llm_distilled') def hash_human_approver(username: str) -> str: """Telegram username SHA1[:8](與 028 schema 註解一致 — 避免 PII 落地)。""" if not username: return '' return hashlib.sha1(username.encode('utf-8')).hexdigest()[:8] # ───────────────────────────────────────────────────────────────────────────── # Worker 函數(給 run_scheduler.py 排程用)— Phase 11+ 收尾 # ───────────────────────────────────────────────────────────────────────────── # 預設批次大小:每次處理 N 筆 pending,避免 worker 一次跑太久阻塞排程 PENDING_BATCH_SIZE = int(os.environ.get('PROMOTION_PENDING_BATCH_SIZE', '50')) AWAITING_REVIEW_PUSH_BATCH = int(os.environ.get('AWAITING_REVIEW_PUSH_BATCH', '5')) def process_pending_episodes(batch_size: int = PENDING_BATCH_SIZE) -> Dict[str, int]: """批次處理 learning_episodes pending → can_promote → promote/reject/await_review。 給 run_scheduler.py 每 5 分鐘跑一次。 依 ADR-032 PromotionGate 4 階段,每筆走完整檢查。 Returns: {'pending_seen': N, 'promoted': X, 'rejected': Y, 'awaiting': Z, 'errors': E} """ stats = {'pending_seen': 0, 'promoted': 0, 'rejected': 0, 'awaiting': 0, 'errors': 0} try: from sqlalchemy import text as sa_text from database.manager import get_session except Exception as exc: logger.warning('[PromotionWorker] DB import failed: %s', exc) return stats session = get_session() try: rows = session.execute( sa_text(""" SELECT id FROM learning_episodes WHERE promotion_status = 'pending' ORDER BY created_at ASC LIMIT :n """), {'n': batch_size}, ).fetchall() episode_ids = [int(r[0]) for r in rows] except Exception as exc: logger.error('[PromotionWorker] SELECT pending failed: %s', exc) session.close() return stats finally: session.close() stats['pending_seen'] = len(episode_ids) if not episode_ids: return stats for ep_id in episode_ids: try: decision = promotion_gate.can_promote(ep_id) if decision.reason == 'approved': if promotion_gate.promote(ep_id): stats['promoted'] += 1 else: stats['errors'] += 1 elif decision.reason == 'awaiting_review': if promotion_gate.mark_awaiting_review(ep_id): stats['awaiting'] += 1 else: stats['errors'] += 1 elif decision.reason.startswith('rejected_'): if promotion_gate.reject(ep_id, decision.reason, decision.detail): stats['rejected'] += 1 else: stats['errors'] += 1 except Exception as exc: logger.warning('[PromotionWorker] episode_id=%s failed: %s', ep_id, exc) stats['errors'] += 1 logger.info( '[PromotionWorker] batch done: pending=%d promoted=%d rejected=%d awaiting=%d errors=%d', stats['pending_seen'], stats['promoted'], stats['rejected'], stats['awaiting'], stats['errors'] ) return stats def push_awaiting_reviews_to_telegram(batch: int = AWAITING_REVIEW_PUSH_BATCH, chat_id: Optional[str] = None) -> int: """找 awaiting_review 但尚未推送的 episode → 推 Telegram 帶 👍/👎 keyboard。 給 run_scheduler.py 每 30 分鐘跑(與 expire_stale_reviews 配合 24h timeout)。 判斷「未推送」:reviewed_at IS NULL(mark_awaiting_review 時不設 reviewed_at; 24h expired / human approve/reject 時才寫 reviewed_at)。 """ pushed = 0 try: from sqlalchemy import text as sa_text from database.manager import get_session except Exception as exc: logger.warning('[AwaitingReviewPush] DB import failed: %s', exc) return 0 # 取 chat_id(預設 admin) if chat_id is None: chat_id = os.environ.get('TELEGRAM_ADMIN_CHAT_ID', '').strip() or None if not chat_id: logger.info('[AwaitingReviewPush] TELEGRAM_ADMIN_CHAT_ID 未設,跳過推送') return 0 session = get_session() try: rows = session.execute( sa_text(""" SELECT id, distilled_text, weight, quality_score FROM learning_episodes WHERE promotion_status = 'awaiting_review' AND reviewed_at IS NULL ORDER BY created_at ASC LIMIT :n """), {'n': batch}, ).fetchall() except Exception as exc: logger.error('[AwaitingReviewPush] SELECT failed: %s', exc) session.close() return 0 finally: session.close() if not rows: return 0 # 推送 try: from services.telegram_templates import promotion_review_keyboard, _send_telegram_raw except Exception as exc: logger.warning('[AwaitingReviewPush] template import failed: %s', exc) return 0 for r in rows: ep_id, text_, weight, quality = r[0], r[1], float(r[2] or 0), float(r[3] or 0) msg = ( f"🧠 RAG 學習晉升審核\n" f"━━━━━━━━━━━━━━━━━━━━\n" f"📋 episode #{ep_id} (weight={weight:.2f} quality={quality:.2f})\n\n" f"{(text_ or '')[:600]}\n\n" f"審核:通過 → 寫入 ai_insights 供 RAG 檢索;拒絕 → 永不晉升" ) try: _send_telegram_raw(msg, chat_id=chat_id, reply_markup=promotion_review_keyboard(ep_id)) pushed += 1 except Exception as exc: logger.warning('[AwaitingReviewPush] episode_id=%s push failed: %s', ep_id, exc) logger.info('[AwaitingReviewPush] pushed %d awaiting_review episodes to chat=%s', pushed, chat_id) return pushed # ───────────────────────────────────────────────────────────────────────────── # 全域單例 # ───────────────────────────────────────────────────────────────────────────── distiller = Distiller() learning_pipeline = LearningPipeline() promotion_gate = PromotionGate() __all__ = [ 'Distiller', 'DistillResult', 'GateDecision', 'LearningPipeline', 'PromotionGate', 'distiller', 'learning_pipeline', 'promotion_gate', 'expire_stale_reviews', 'process_pending_episodes', 'push_awaiting_reviews_to_telegram', 'hash_human_approver', 'STAGE_1_AUTO_QUALITY', 'STAGE_3_DEDUP_THRESHOLD', 'STAGE_4_HUMAN_REVIEW_WEIGHT', 'HUMAN_REVIEW_TIMEOUT_HOURS', ]