Files
awoooi/apps/api/src/services/knowledge_extractor_service.py
Your Name c4854bb355
All checks were successful
CD Pipeline / tests (push) Successful in 54s
Code Review / ai-code-review (push) Successful in 10s
CD Pipeline / build-and-deploy (push) Successful in 3m19s
CD Pipeline / post-deploy-checks (push) Successful in 3m12s
fix(ai): isolate heavy Ollama workloads from GCP alert lane
2026-05-05 23:06:07 +08:00

245 lines
8.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Knowledge Extractor Service — KB Phase 2-A
==========================================
Incident resolved 後自動萃取 KB 草稿。
設計原則:
- 強制使用 Ollama llama3.2:3b本地推理符合 Phase 24 D7 隱私規則)
- fire-and-forget失敗不影響 resolve 主流程
- logger.exception 保留完整 Stack Trace 供 Prompt 調優
2026-04-03 ogt: KB Phase 2-A 初始實作
"""
import structlog
logger = structlog.get_logger(__name__)
# 2026-05-05 Codex: KB 萃取走 111 lane避免污染 GCP alert-fast lane
def _get_ollama_base() -> str:
from src.services.ollama_endpoint_resolver import resolve_ollama_endpoint
return resolve_ollama_endpoint("deep_rca")
_EXTRACT_MODEL = "llama3.2:3b"
_EXTRACT_TIMEOUT = 30.0 # 秒,容忍慢速
# Linear / Nothing.tech 風格的 SRE KB Prompt
_PROMPT_TEMPLATE = """你是一位資深 SRE 工程師,請用**繁體中文**撰寫一份知識庫條目Markdown 格式)。
## 事件資訊
- 事件 ID{incident_id}
- 嚴重度:{severity}
- 發生時間:{created_at}
- 解決時間:{resolved_at}
## 觸發信號
{signals}
## 請輸出以下結構的 Markdown只輸出 Markdown不要其他說明文字
# [一句話摘要標題]
## 問題描述
簡述發生了什麼問題2-3 句)
## 根本原因
(分析可能的根本原因,條列式)
## 解決方法
(列出實際採取的解決步驟,條列式)
## 預防措施
(如何避免未來再發生,條列式)
## 相關標籤
`{severity}` `ai_extracted`
"""
# 信號關鍵字 → KB 分類映射
_CATEGORY_KEYWORDS: dict[str, list[str]] = {
"infrastructure": ["k8s", "pod", "node", "deploy", "container", "namespace", "kubectl",
"memory", "cpu", "disk", "oom", "evict", "crashloop"],
"application": ["api", "http", "latency", "5xx", "4xx", "error rate", "timeout",
"connection", "database", "redis", "postgres", "slow"],
"ai_system": ["ai", "llm", "openclaw", "nemo", "ollama", "gemini", "claude",
"router", "provider", "inference", "token"],
"security": ["ssl", "cert", "auth", "permission", "scan", "vuln", "exploit",
"unauthorized", "403", "401"],
}
class KnowledgeExtractorService:
"""
Incident → KB 草稿自動萃取器
使用 Ollama llama3.2:3b 本地推理,產生 Markdown 格式的 SRE 知識條目。
"""
async def extract_from_incident(self, incident) -> bool:
"""
從已解決的 Incident 萃取 KB 草稿。
Args:
incident: Incident 物件(需有 incident_id, severity, signals, created_at
Returns:
True = 萃取成功False = 失敗(已記錄 Stack Trace
"""
try:
# 1. 組 Prompt
# 2026-04-16 ogt: Signal 無 description 欄位,用 alert_name + annotations.summary
signals_text = "\n".join(
f"- {s.alert_name}: {s.annotations.get('summary', s.annotations.get('description', ''))}"
for s in (incident.signals or [])
) or "(無信號記錄)"
prompt = _PROMPT_TEMPLATE.format(
incident_id=incident.incident_id,
severity=incident.severity.value,
created_at=str(getattr(incident, "created_at", "未知"))[:19],
resolved_at=str(getattr(incident, "resolved_at", "未知"))[:19],
signals=signals_text,
)
# 2. 呼叫 Ollama直接 HTTP不走 AIRouter 避免路由邏輯開銷)
markdown_content = await self._call_ollama(prompt)
if not markdown_content:
logger.warning(
"kb_extract_empty_response",
incident_id=incident.incident_id,
model=_EXTRACT_MODEL,
)
return False
# 3. 萃取標題(第一行 `# 標題`
title = self._extract_title(markdown_content, incident)
# 4. 推斷分類
category = self._infer_category(incident)
# 5. 建立 KB 條目
from src.models.knowledge import (
EntrySource,
EntryType,
KnowledgeEntryCreate,
)
from src.services.knowledge_service import get_knowledge_service
entry_data = KnowledgeEntryCreate(
title=title,
content=markdown_content,
entry_type=EntryType.INCIDENT_CASE,
category=category,
tags=[incident.severity.value, "ai_extracted", category],
source=EntrySource.AI_EXTRACTED,
related_incident_id=incident.incident_id,
created_by="openclaw_ai",
)
await get_knowledge_service().create_entry(entry_data)
logger.info(
"kb_extract_success",
incident_id=incident.incident_id,
title=title,
category=category,
model=_EXTRACT_MODEL,
)
return True
except Exception:
# 統帥指示:保留完整 Stack Trace 供初期 Prompt 調優
logger.exception(
"kb_extract_failed",
incident_id=getattr(incident, "incident_id", "unknown"),
)
return False
async def _call_ollama(self, prompt: str) -> str | None:
"""
直接呼叫 Ollama REST API。
不走 AIRouter 是刻意設計:
- KB 萃取是背景工作,不需要完整的路由/閘門/Cache 邏輯
- 強制本地,不允許 fallback 到 cloud provider
"""
import httpx
try:
async with httpx.AsyncClient(timeout=_EXTRACT_TIMEOUT) as client:
r = await client.post(
f"{_get_ollama_base()}/api/generate",
json={
"model": _EXTRACT_MODEL,
"prompt": prompt,
"stream": False,
"options": {
"temperature": 0.3, # 低溫:減少幻覺
"num_predict": 800, # 控制長度
"stop": ["\n\n\n"], # 防止無限生成
},
},
)
r.raise_for_status()
text = r.json().get("response", "").strip()
return text or None
except Exception:
logger.exception(
"kb_ollama_call_failed",
model=_EXTRACT_MODEL,
base=_get_ollama_base(),
)
return None
def _extract_title(self, markdown: str, incident) -> str:
"""
從 Markdown 第一行 `# 標題` 萃取標題。
Fallback使用 incident_id + 第一個 signal 描述。
"""
for line in markdown.splitlines():
stripped = line.strip()
if stripped.startswith("# "):
title = stripped[2:].strip()
if title:
return title[:200] # DB column max 255
# Fallback
# 2026-04-16 ogt: Signal 無 description 欄位,改用 alert_name
signals = incident.signals or []
desc = signals[0].alert_name[:60] if signals else "未知事件"
return f"[AI 萃取] {incident.incident_id}: {desc}"
def _infer_category(self, incident) -> str:
"""
依 signals 關鍵字推斷 KB 分類。
依序比對,第一個匹配的分類獲勝。
"""
# 2026-04-24 ogt: Signal 無 description 欄位,改用 alert_name + annotations.summary
text = " ".join(
(
(s.alert_name or "") + " " +
(s.annotations.get("summary", "") if s.annotations else "")
).lower()
for s in (incident.signals or [])
)
for category, keywords in _CATEGORY_KEYWORDS.items():
if any(k in text for k in keywords):
return category
# 保守 fallback
return "infrastructure"
# =============================================================================
# Singleton
# =============================================================================
_extractor: KnowledgeExtractorService | None = None
def get_knowledge_extractor() -> KnowledgeExtractorService:
global _extractor
if _extractor is None:
_extractor = KnowledgeExtractorService()
return _extractor