From f846000c8c05633f6d1ae2e33947e3ab3913e478 Mon Sep 17 00:00:00 2001 From: OG T Date: Sat, 4 Apr 2026 11:48:57 +0800 Subject: [PATCH] =?UTF-8?q?fix(knowledge):=20C1=20=E9=A6=96=E5=B8=AD?= =?UTF-8?q?=E6=9E=B6=E6=A7=8B=E5=B8=AB=E5=BF=85=E4=BF=AE=20=E2=80=94=20=5F?= =?UTF-8?q?query=5Fkb=5Fcontext=205=E7=A7=92=20hard=20timeout?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit C1 修復 (首席架構師 Review 74/100 → 條件通過): - 抽出 _query_kb_context_inner 含實際查詢邏輯 - _query_kb_context 用 asyncio.wait_for(timeout=5.0) 包裝 - Ollama hang/慢響應最多消耗 5s,保護 30s 決策 SLA - timeout 時 logger.warning("kb_rag_timeout") 靜默降級 同步移除 LLM prompt 中的 emoji (## 📚 → ## Knowledge Base) Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/services/decision_manager.py | 64 +++++++++++++---------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/apps/api/src/services/decision_manager.py b/apps/api/src/services/decision_manager.py index 469c4c3a..db316dbe 100644 --- a/apps/api/src/services/decision_manager.py +++ b/apps/api/src/services/decision_manager.py @@ -548,41 +548,51 @@ class DecisionManager: _push_decision_to_telegram(incident, token.proposal_data) ) + async def _query_kb_context_inner(self, incident: Incident) -> str: + """KB RAG 實際查詢邏輯,由 _query_kb_context 包裝 timeout 後呼叫""" + from src.services.knowledge_service import get_knowledge_service + query_parts = list(incident.affected_services) + if incident.signals: + query_parts.insert(0, getattr(incident.signals[0], "alert_name", "")) + query = " ".join(filter(None, query_parts)) + + svc = get_knowledge_service() + results = await svc.semantic_search(query, limit=3, threshold=0.4) + if not results: + return "" + + lines = ["## Knowledge Base Related Entries (KB RAG)"] + for entry, score in results: + lines.append( + f"\n### [{entry.entry_type}] {entry.title} (similarity={score:.2f})" + ) + lines.append(entry.content[:500]) + if len(entry.content) > 500: + lines.append("... (truncated)") + + logger.info( + "kb_rag_context_injected", + incident_id=incident.incident_id, + kb_hits=len(results), + ) + return "\n".join(lines) + async def _query_kb_context(self, incident: Incident) -> str: """ KB Phase 2: 語意搜尋相關 KB 條目,組裝為 LLM context 字串 2026-04-04 Claude Code: KB RAG 整合 - 失敗時靜默降級,不影響主分析流程 + C1 修復 (首席架構師審查): 5 秒 hard timeout,防止 Ollama 慢響應威脅 30s SLA + 失敗/timeout 時靜默降級,不影響主分析流程 """ try: - from src.services.knowledge_service import get_knowledge_service - query_parts = list(incident.affected_services) - if incident.signals: - query_parts.insert(0, getattr(incident.signals[0], "alert_name", "")) - query = " ".join(filter(None, query_parts)) - - svc = get_knowledge_service() - results = await svc.semantic_search(query, limit=3, threshold=0.4) - if not results: - return "" - - lines = ["## 📚 Knowledge Base 相關條目 (KB RAG)"] - for entry, score in results: - lines.append( - f"\n### [{entry.entry_type}] {entry.title} (similarity={score:.2f})" - ) - lines.append(entry.content[:500]) - if len(entry.content) > 500: - lines.append("... (truncated)") - - logger.info( - "kb_rag_context_injected", - incident_id=incident.incident_id, - kb_hits=len(results), + return await asyncio.wait_for( + self._query_kb_context_inner(incident), + timeout=5.0, ) - return "\n".join(lines) - + except asyncio.TimeoutError: + logger.warning("kb_rag_timeout", incident_id=incident.incident_id) + return "" except Exception as e: logger.warning("kb_rag_failed", incident_id=incident.incident_id, error=str(e)) return ""