feat(mcp-phase4c): AI 動態規則生成 — 新 alertname 自動產 Playbook 草稿
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 8m29s
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 8m29s
_generate_playbook_draft_if_new():
- Playbook 無命中時非同步觸發(不阻塞決策主流程)
- 先用 semantic_search(threshold=0.92) 確認 KM 無同名 Playbook
- 呼叫 qwen2.5:7b-instruct (Ollama 188) 生成五段結構化草稿
(症狀/根因/診斷步驟/修復動作/驗收條件)
- 寫入 KnowledgeEntry(type=PLAYBOOK, status=DRAFT, source=AI_EXTRACTED)
- 寫入 AlertOperationLog PLAYBOOK_DRAFT_CREATED 事件
- 失敗靜默 debug log
完成 MCP Phase 4 全三項:
4a NemoClaw second opinion (信心 < 0.7)
4b K8s 狀態快照 k8s_state_after
4c AI 動態 Playbook 草稿生成
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -326,6 +326,104 @@ async def _nemoclaw_second_opinion(incident: "Incident", primary_result: dict) -
|
||||
return None
|
||||
|
||||
|
||||
async def _generate_playbook_draft_if_new(incident: "Incident") -> None:
|
||||
"""
|
||||
MCP Phase 4c: Playbook 無命中時,自動生成 AI 草稿 Playbook 寫入 KM
|
||||
=====================================================================
|
||||
- 僅在 KM 中不存在同 alertname 的 Playbook 時觸發(避免重複)
|
||||
- 用 qwen2.5:7b-instruct (Ollama 188) 生成結構化 Playbook 草稿
|
||||
- 寫入 KnowledgeEntry,status=DRAFT,需人工審核後升為 APPROVED
|
||||
- 寫入 AlertOperationLog PLAYBOOK_DRAFT_CREATED 事件
|
||||
|
||||
2026-04-11 Claude Sonnet 4.6 Asia/Taipei
|
||||
"""
|
||||
try:
|
||||
import httpx as _httpx
|
||||
from src.core.config import settings
|
||||
from src.models.knowledge import (
|
||||
EntrySource, EntryStatus, EntryType, KnowledgeEntryCreate,
|
||||
)
|
||||
from src.repositories.alert_operation_log_repository import get_alert_operation_log_repository
|
||||
from src.services.knowledge_service import get_knowledge_service
|
||||
|
||||
alertname = ""
|
||||
if incident.signals:
|
||||
alertname = incident.signals[0].labels.get("alertname", "")
|
||||
if not alertname:
|
||||
return
|
||||
|
||||
# 已存在同 alertname 的 KM 條目則跳過
|
||||
knowledge_svc = get_knowledge_service()
|
||||
existing = await knowledge_svc.semantic_search(alertname, limit=1, threshold=0.92)
|
||||
if existing:
|
||||
return
|
||||
|
||||
# 用 qwen2.5:7b-instruct 生成 Playbook 草稿
|
||||
severity = incident.signals[0].labels.get("severity", "warning") if incident.signals else "warning"
|
||||
services = ", ".join(incident.affected_services or ["unknown"])
|
||||
prompt = (
|
||||
f"你是資深 SRE,請為以下告警生成一份結構化 Playbook 草稿(繁體中文)。\n"
|
||||
f"告警名稱: {alertname}\n"
|
||||
f"嚴重度: {severity}\n"
|
||||
f"受影響服務: {services}\n\n"
|
||||
f"請按以下格式輸出(不超過 300 字):\n"
|
||||
f"## 症狀\n(描述此告警代表什麼)\n"
|
||||
f"## 根因假設\n(最常見的 2-3 個原因)\n"
|
||||
f"## 診斷步驟\n(kubectl 或 shell 指令)\n"
|
||||
f"## 修復動作\n(具體修復指令,含 kubectl rollout restart 等)\n"
|
||||
f"## 驗收條件\n(如何確認修復成功)"
|
||||
)
|
||||
|
||||
ollama_url = getattr(settings, "OLLAMA_URL", "http://192.168.0.188:11434")
|
||||
async with _httpx.AsyncClient(timeout=45.0) as client:
|
||||
resp = await client.post(
|
||||
f"{ollama_url}/api/generate",
|
||||
json={"model": "qwen2.5:7b-instruct", "prompt": prompt, "stream": False},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
content = resp.json().get("response", "").strip()
|
||||
|
||||
if not content or len(content) < 50:
|
||||
return
|
||||
|
||||
# 寫入 KM,status=DRAFT
|
||||
entry = await knowledge_svc.create_entry(
|
||||
KnowledgeEntryCreate(
|
||||
title=f"[AI草稿] {alertname} Playbook",
|
||||
content=content,
|
||||
entry_type=EntryType.PLAYBOOK,
|
||||
category="auto_generated",
|
||||
tags=[alertname, severity, "ai_draft", "mcp_phase4c"],
|
||||
source=EntrySource.AI_EXTRACTED,
|
||||
status=EntryStatus.DRAFT,
|
||||
related_incident_id=incident.incident_id,
|
||||
)
|
||||
)
|
||||
|
||||
# 寫入操作日誌
|
||||
op_repo = get_alert_operation_log_repository()
|
||||
await op_repo.append(
|
||||
event_type="PLAYBOOK_DRAFT_CREATED",
|
||||
incident_id=incident.incident_id,
|
||||
actor="mcp_phase4c",
|
||||
action_detail=f"AI 草稿 Playbook: {entry.entry_id}",
|
||||
success=True,
|
||||
context={"alertname": alertname, "km_entry_id": entry.entry_id},
|
||||
)
|
||||
|
||||
import structlog as _sl
|
||||
_sl.get_logger(__name__).info(
|
||||
"playbook_draft_created",
|
||||
incident_id=incident.incident_id,
|
||||
alertname=alertname,
|
||||
entry_id=entry.entry_id,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
import structlog as _sl
|
||||
_sl.get_logger(__name__).debug("playbook_draft_failed", error=str(e))
|
||||
|
||||
|
||||
async def _fetch_metrics_snapshot(incident: Incident) -> dict:
|
||||
"""
|
||||
ADR-071-I: 從 Prometheus 抓取與此 incident 相關的指標快照
|
||||
@@ -1052,6 +1150,9 @@ class DecisionManager:
|
||||
if playbook_result:
|
||||
return playbook_result
|
||||
|
||||
# MCP Phase 4c: Playbook 無命中 → 非同步產生 AI 草稿 Playbook (2026-04-11 Claude Sonnet 4.6)
|
||||
asyncio.create_task(_generate_playbook_draft_if_new(incident))
|
||||
|
||||
# Expert System 同步執行 (立即可用)
|
||||
expert_result = expert_analyze(incident)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user