""" Knowledge Extractor Service — KB Phase 2-A ========================================== Incident resolved 後自動萃取 KB 草稿。 設計原則: - 使用 `settings.OLLAMA_TOOL_MODEL`,依全域順序 GCP-A → GCP-B → 111 嘗試 - fire-and-forget:失敗不影響 resolve 主流程 - logger.exception 保留完整 Stack Trace 供 Prompt 調優 2026-04-03 ogt: KB Phase 2-A 初始實作 """ import structlog logger = structlog.get_logger(__name__) # 2026-05-19 Codex: 統帥校正,全 Ollama workload 固定 GCP-A → GCP-B → 111。 def _get_ollama_endpoints(): from src.services.ollama_endpoint_circuit_breaker import ( resolve_ollama_order_with_cooldown, ) return resolve_ollama_order_with_cooldown("hermes") def _get_extract_model() -> str: from src.core.config import settings return str(getattr(settings, "OLLAMA_TOOL_MODEL", "hermes3:latest") or "hermes3:latest") _EXTRACT_TIMEOUT = 30.0 # 秒,容忍慢速 # Linear / Nothing.tech 風格的 SRE KB Prompt _PROMPT_TEMPLATE = """你是一位資深 SRE 工程師,請用**繁體中文**撰寫一份知識庫條目(Markdown 格式)。 ## 事件資訊 - 事件 ID:{incident_id} - 嚴重度:{severity} - 發生時間:{created_at} - 解決時間:{resolved_at} ## 觸發信號 {signals} ## 請輸出以下結構的 Markdown(只輸出 Markdown,不要其他說明文字): # [一句話摘要標題] ## 問題描述 (簡述發生了什麼問題,2-3 句) ## 根本原因 (分析可能的根本原因,條列式) ## 解決方法 (列出實際採取的解決步驟,條列式) ## 預防措施 (如何避免未來再發生,條列式) ## 相關標籤 `{severity}` `ai_extracted` """ # 信號關鍵字 → KB 分類映射 _CATEGORY_KEYWORDS: dict[str, list[str]] = { "infrastructure": ["k8s", "pod", "node", "deploy", "container", "namespace", "kubectl", "memory", "cpu", "disk", "oom", "evict", "crashloop"], "application": ["api", "http", "latency", "5xx", "4xx", "error rate", "timeout", "connection", "database", "redis", "postgres", "slow"], "ai_system": ["ai", "llm", "openclaw", "nemo", "ollama", "gemini", "claude", "router", "provider", "inference", "token"], "security": ["ssl", "cert", "auth", "permission", "scan", "vuln", "exploit", "unauthorized", "403", "401"], } class KnowledgeExtractorService: """ Incident → KB 草稿自動萃取器 使用目前配置的 Ollama tool model 產生 Markdown 格式的 SRE 知識條目。 """ async def extract_from_incident(self, incident) -> bool: """ 從已解決的 Incident 萃取 KB 草稿。 Args: incident: Incident 物件(需有 incident_id, severity, signals, created_at) Returns: True = 萃取成功,False = 失敗(已記錄 Stack Trace) """ try: # 1. 組 Prompt # 2026-04-16 ogt: Signal 無 description 欄位,用 alert_name + annotations.summary signals_text = "\n".join( f"- {s.alert_name}: {s.annotations.get('summary', s.annotations.get('description', ''))}" for s in (incident.signals or []) ) or "(無信號記錄)" prompt = _PROMPT_TEMPLATE.format( incident_id=incident.incident_id, severity=incident.severity.value, created_at=str(getattr(incident, "created_at", "未知"))[:19], resolved_at=str(getattr(incident, "resolved_at", "未知"))[:19], signals=signals_text, ) # 2. 呼叫 Ollama(直接 HTTP,不走 AIRouter 避免路由邏輯開銷) markdown_content = await self._call_ollama(prompt) model = _get_extract_model() if not markdown_content: logger.warning( "kb_extract_empty_response", incident_id=incident.incident_id, model=model, ) return False # 3. 萃取標題(第一行 `# 標題`) title = self._extract_title(markdown_content, incident) # 4. 推斷分類 category = self._infer_category(incident) # 5. 建立 KB 條目 from src.models.knowledge import ( EntrySource, EntryType, KnowledgeEntryCreate, ) from src.services.knowledge_service import get_knowledge_service entry_data = KnowledgeEntryCreate( title=title, content=markdown_content, entry_type=EntryType.INCIDENT_CASE, category=category, tags=[incident.severity.value, "ai_extracted", category], source=EntrySource.AI_EXTRACTED, related_incident_id=incident.incident_id, created_by="openclaw_ai", ) await get_knowledge_service().create_entry(entry_data) logger.info( "kb_extract_success", incident_id=incident.incident_id, title=title, category=category, model=model, ) return True except Exception: # 統帥指示:保留完整 Stack Trace 供初期 Prompt 調優 logger.exception( "kb_extract_failed", incident_id=getattr(incident, "incident_id", "unknown"), ) return False async def _call_ollama(self, prompt: str) -> str | None: """ 直接呼叫 Ollama REST API。 不走 AIRouter 是刻意設計: - KB 萃取是背景工作,不需要完整的路由/閘門/Cache 邏輯 - Ollama endpoint 固定依 GCP-A → GCP-B → 111 嘗試 """ import httpx endpoints = _get_ollama_endpoints() model = _get_extract_model() async with httpx.AsyncClient(timeout=_EXTRACT_TIMEOUT) as client: for endpoint in endpoints: if not endpoint.url: continue try: r = await client.post( f"{endpoint.url}/api/generate", json={ "model": model, "prompt": prompt, "stream": False, "options": { "temperature": 0.3, # 低溫:減少幻覺 "num_predict": 800, # 控制長度 "stop": ["\n\n\n"], # 防止無限生成 }, }, ) r.raise_for_status() text = r.json().get("response", "").strip() if text: logger.info( "kb_ollama_call_success", model=model, provider=endpoint.provider_name, base=endpoint.url, ) from src.services.ollama_endpoint_circuit_breaker import ( record_ollama_endpoint_success, ) record_ollama_endpoint_success(endpoint.url) return text except Exception as e: from src.services.ollama_endpoint_circuit_breaker import ( record_ollama_endpoint_failure, ) record_ollama_endpoint_failure(endpoint.url) logger.warning( "kb_ollama_call_failed", model=model, provider=endpoint.provider_name, base=endpoint.url, error=str(e), ) logger.error( "kb_ollama_all_endpoints_failed", model=model, attempted=[endpoint.provider_name for endpoint in endpoints], ) return None def _extract_title(self, markdown: str, incident) -> str: """ 從 Markdown 第一行 `# 標題` 萃取標題。 Fallback:使用 incident_id + 第一個 signal 描述。 """ for line in markdown.splitlines(): stripped = line.strip() if stripped.startswith("# "): title = stripped[2:].strip() if title: return title[:200] # DB column max 255 # Fallback # 2026-04-16 ogt: Signal 無 description 欄位,改用 alert_name signals = incident.signals or [] desc = signals[0].alert_name[:60] if signals else "未知事件" return f"[AI 萃取] {incident.incident_id}: {desc}" def _infer_category(self, incident) -> str: """ 依 signals 關鍵字推斷 KB 分類。 依序比對,第一個匹配的分類獲勝。 """ # 2026-04-24 ogt: Signal 無 description 欄位,改用 alert_name + annotations.summary text = " ".join( ( (s.alert_name or "") + " " + (s.annotations.get("summary", "") if s.annotations else "") ).lower() for s in (incident.signals or []) ) for category, keywords in _CATEGORY_KEYWORDS.items(): if any(k in text for k in keywords): return category # 保守 fallback return "infrastructure" # ============================================================================= # Singleton # ============================================================================= _extractor: KnowledgeExtractorService | None = None def get_knowledge_extractor() -> KnowledgeExtractorService: global _extractor if _extractor is None: _extractor = KnowledgeExtractorService() return _extractor