Files
awoooi/apps/api/src/services/intent_classifier.py
Your Name 35fe37c82a
All checks were successful
Code Review / ai-code-review (push) Successful in 23s
CD Pipeline / tests (push) Successful in 5m51s
CD Pipeline / build-and-deploy (push) Successful in 3m29s
CD Pipeline / post-deploy-checks (push) Successful in 1m14s
fix(api): route direct ollama callers through ordered fallback
2026-05-19 12:56:13 +08:00

801 lines
25 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Intent Classifier - Phase 13.3 #85
===================================
K8s 操作意圖分類器,用於智能路由模型選擇
目標: < 100ms 延遲 (規則引擎 < 10ms)
策略: 方案 A (規則引擎) → 方案 B (LLM 備援)
版本: v2.0
建立: 2026-03-26 (台北時區)
建立者: Claude Code
最後修改: 2026-03-26 (台北時區)
修改者: Claude Code
變更紀錄:
| 版本 | 日期 | 執行者 | 變更內容 |
|------|------|--------|----------|
| v1.0 | 2026-03-26 | Claude Code | 初始實作 (舊版 IntentType) |
| v2.0 | 2026-03-26 | Claude Code | Phase 13.3 #85 升級 (四大核心+輔助意圖) |
"""
from __future__ import annotations
import json
import re
import time
from dataclasses import dataclass, field
from enum import Enum
from typing import Protocol, runtime_checkable
import httpx
import structlog
from src.services.model_registry import get_model_registry
from src.services.ollama_endpoint_resolver import resolve_ollama_order
logger = structlog.get_logger(__name__)
# LLM 分類 Prompt 模板 (Phase 13.4)
_LLM_CLASSIFY_PROMPT = """你是 K8s 操作意圖分類專家。根據以下輸入,判斷用戶的操作意圖。
可選意圖類型:
- restart: 重啟 Pod/Deployment/StatefulSet
- scale: 擴縮容、HPA 調整
- config: ConfigMap/Secret/ENV 變更
- diagnose: 日誌查詢、健康檢查、RCA
- delete: 刪除資源(高風險)
- rollback: 回滾版本
- unknown: 無法判斷
輸入: {text}
請以 JSON 格式回答,只輸出 JSON:
{{"intent": "<類型>", "confidence": <0.0-1.0>, "reasoning": "<判斷依據>"}}"""
# =============================================================================
# 意圖類型定義 (Phase 13.3 #85)
# =============================================================================
class IntentType(Enum):
"""
K8s 操作意圖類型
四大核心意圖:
- RESTART: 重啟 Pod/Deployment/StatefulSet
- SCALE: 擴縮容、HPA 調整
- CONFIG: ConfigMap/Secret/ENV 變更
- DIAGNOSE: 日誌查詢、健康檢查、RCA
輔助意圖:
- DELETE: 刪除資源(高風險)
- ROLLBACK: 回滾版本
- UNKNOWN: 無法判斷
舊版兼容 (已棄用,映射到新意圖):
- ALERT_TRIAGE → DIAGNOSE
- DEPLOYMENT → CONFIG
- QUERY → DIAGNOSE
- MAINTENANCE → RESTART
- CODE_REVIEW → DIAGNOSE
"""
# 四大核心意圖
RESTART = "restart" # 重啟 Pod/Deployment/StatefulSet
SCALE = "scale" # 擴縮容、HPA 調整
CONFIG = "config" # ConfigMap/Secret/ENV 變更
DIAGNOSE = "diagnose" # 日誌查詢、健康檢查、RCA
# 輔助意圖
DELETE = "delete" # 刪除資源(高風險)
ROLLBACK = "rollback" # 回滾版本
UNKNOWN = "unknown" # 無法判斷
# 舊版兼容 (棄用,保留向後兼容)
ALERT_TRIAGE = "alert_triage" # → DIAGNOSE
DEPLOYMENT = "deployment" # → CONFIG
QUERY = "query" # → DIAGNOSE
MAINTENANCE = "maintenance" # → RESTART
CODE_REVIEW = "code_review" # → DIAGNOSE
# 舊版意圖到新版的映射
LEGACY_INTENT_MAP: dict[IntentType, IntentType] = {
IntentType.ALERT_TRIAGE: IntentType.DIAGNOSE,
IntentType.DEPLOYMENT: IntentType.CONFIG,
IntentType.QUERY: IntentType.DIAGNOSE,
IntentType.MAINTENANCE: IntentType.RESTART,
IntentType.CODE_REVIEW: IntentType.DIAGNOSE,
}
def normalize_intent(intent: IntentType) -> IntentType:
"""
正規化意圖 (將舊版意圖映射到新版)
Args:
intent: 原始意圖
Returns:
正規化後的意圖
"""
return LEGACY_INTENT_MAP.get(intent, intent)
# =============================================================================
# 風險等級定義
# =============================================================================
class RiskLevel(Enum):
"""意圖風險等級"""
LOW = "low" # 只讀操作 (DIAGNOSE)
MEDIUM = "medium" # 可逆操作 (RESTART, SCALE, ROLLBACK)
HIGH = "high" # 配置變更 (CONFIG)
CRITICAL = "critical" # 不可逆操作 (DELETE)
# 意圖對應風險等級
INTENT_RISK_MAP: dict[IntentType, RiskLevel] = {
IntentType.DIAGNOSE: RiskLevel.LOW,
IntentType.RESTART: RiskLevel.MEDIUM,
IntentType.SCALE: RiskLevel.MEDIUM,
IntentType.ROLLBACK: RiskLevel.MEDIUM,
IntentType.CONFIG: RiskLevel.HIGH,
IntentType.DELETE: RiskLevel.CRITICAL,
IntentType.UNKNOWN: RiskLevel.MEDIUM,
# 舊版兼容
IntentType.ALERT_TRIAGE: RiskLevel.LOW,
IntentType.DEPLOYMENT: RiskLevel.HIGH,
IntentType.QUERY: RiskLevel.LOW,
IntentType.MAINTENANCE: RiskLevel.MEDIUM,
IntentType.CODE_REVIEW: RiskLevel.LOW,
}
# =============================================================================
# 關鍵字規則引擎 (方案 A, < 10ms)
# =============================================================================
# 核心意圖關鍵字映射
INTENT_KEYWORDS: dict[IntentType, list[str]] = {
# 四大核心意圖
IntentType.RESTART: [
# 英文
"restart",
"reboot",
"recreate",
"kill",
"delete pod",
"rollout restart",
# 中文
"重啟",
"重新啟動",
"重建",
"刪除 pod",
"殺掉",
],
IntentType.SCALE: [
# 英文
"scale",
"replica",
"hpa",
"autoscale",
"scale up",
"scale down",
"horizontal pod autoscaler",
# 中文
"擴容",
"縮容",
"擴縮",
"副本",
"水平擴展",
],
IntentType.CONFIG: [
# 英文
"configmap",
"secret",
"env",
"environment",
"config",
"setting",
"configuration",
"kubectl apply",
"helm upgrade",
# 中文
"配置",
"設定",
"環境變數",
"部署",
"更新配置",
],
IntentType.DIAGNOSE: [
# 英文
"log",
"logs",
"describe",
"get",
"status",
"health",
"check",
"debug",
"trace",
"diagnose",
"rca",
"root cause",
"investigate",
"why",
"what happened",
# 中文
"日誌",
"查看",
"檢查",
"狀態",
"健康",
"診斷",
"原因",
"為什麼",
"什麼問題",
"分析",
],
# 輔助意圖
IntentType.DELETE: [
# 英文
"delete",
"remove",
"destroy",
"kubectl delete",
"helm uninstall",
"drop",
# 中文
"刪除",
"移除",
"銷毀",
"清除",
],
IntentType.ROLLBACK: [
# 英文
"rollback",
"rollout undo",
"revert",
"previous version",
"last version",
# 中文
"回滾",
"回復",
"還原",
"上一版",
"前一版",
],
}
# 告警關鍵字 (強化 DIAGNOSE 分類)
ALERT_KEYWORDS: list[str] = [
"alert",
"alerting",
"firing",
"告警",
"警報",
"異常",
"error",
"critical",
"warning",
"high cpu",
"high memory",
"oom",
"crash",
"down",
"timeout",
"failed",
"unhealthy",
]
# 資源類型關鍵字 (用於上下文判斷)
RESOURCE_KEYWORDS: dict[str, list[str]] = {
"pod": ["pod", "pods", "po"],
"deployment": ["deployment", "deployments", "deploy"],
"statefulset": ["statefulset", "statefulsets", "sts"],
"daemonset": ["daemonset", "daemonsets", "ds"],
"service": ["service", "services", "svc"],
"configmap": ["configmap", "configmaps", "cm"],
"secret": ["secret", "secrets"],
"ingress": ["ingress", "ingresses", "ing"],
"namespace": ["namespace", "namespaces", "ns"],
}
# =============================================================================
# 分類結果
# =============================================================================
@dataclass
class IntentResult:
"""意圖分類結果"""
intent: IntentType # 分類意圖
confidence: float # 信心度 (0.0-1.0)
method: str # 分類方法 (keyword/llm)
risk_level: RiskLevel = field(default=RiskLevel.MEDIUM)
matched_keywords: list[str] = field(default_factory=list)
detected_resources: list[str] = field(default_factory=list)
reasoning: str = ""
def __post_init__(self):
"""初始化後設定風險等級"""
self.risk_level = INTENT_RISK_MAP.get(self.intent, RiskLevel.MEDIUM)
# =============================================================================
# Protocol 介面 (支援 DI)
# =============================================================================
@runtime_checkable
class IIntentClassifier(Protocol):
"""Intent Classifier Interface for DI"""
async def classify(self, text: str) -> IntentResult:
"""分類意圖 (非同步)"""
...
def classify_sync(self, text: str) -> IntentResult:
"""分類意圖 (同步)"""
...
# =============================================================================
# 實作
# =============================================================================
class IntentClassifier:
"""
K8s 操作意圖分類器
使用兩階段分類策略:
1. 方案 A: 規則引擎 (關鍵字匹配, < 10ms)
2. 方案 B: 輕量 LLM (qwen2.5:1b, < 100ms) - 備援
Usage:
classifier = get_intent_classifier()
result = await classifier.classify("重啟 api-server pod")
# IntentResult(intent=RESTART, confidence=0.95, method='keyword')
"""
# LLM 備援模型 (從 ModelRegistry 取得)
_llm_model: str | None = None
def __init__(self):
self._keyword_cache: dict[str, IntentResult] = {}
self._cache_max_size = 1000 # 最大快取條目
@property
def llm_model(self) -> str:
"""取得 LLM 備援模型 (延遲載入)"""
if self._llm_model is None:
try:
registry = get_model_registry()
self._llm_model = registry.get_model("ollama", "intent")
except Exception:
self._llm_model = "qwen2.5:1b" # fallback
return self._llm_model
async def classify(self, text: str) -> IntentResult:
"""
分類意圖 (非同步)
Args:
text: 用戶輸入或告警內容
Returns:
IntentResult: 分類結果
"""
text_lower = text.lower().strip()
# 階段 1: 規則引擎快速匹配 (< 10ms)
result = self._keyword_classify(text_lower)
if result.confidence >= 0.7: # 信心度閾值
logger.debug(
"intent_classified_by_keyword",
intent=result.intent.value,
confidence=result.confidence,
matched_keywords=result.matched_keywords,
text_preview=text[:50],
)
return result
# 階段 2: LLM 分類 (< 100ms)
llm_result = await self._llm_classify(text_lower)
if llm_result.confidence > result.confidence:
logger.debug(
"intent_classified_by_llm",
intent=llm_result.intent.value,
confidence=llm_result.confidence,
text_preview=text[:50],
)
return llm_result
# 使用規則引擎結果
logger.debug(
"intent_classified_fallback",
intent=result.intent.value,
confidence=result.confidence,
text_preview=text[:50],
)
return result
def classify_sync(self, text: str) -> IntentResult:
"""
同步版本 (僅關鍵字匹配)
Args:
text: 用戶輸入或告警內容
Returns:
IntentResult: 分類結果
"""
return self._keyword_classify(text.lower().strip())
def _keyword_classify(self, text: str) -> IntentResult:
"""
規則引擎分類 (方案 A)
目標延遲: < 10ms
Args:
text: 已轉小寫的輸入文字
Returns:
IntentResult: 分類結果
"""
# 檢查快取
cache_key = text[:100]
if cache_key in self._keyword_cache:
return self._keyword_cache[cache_key]
# 計算每個意圖的匹配分數
scores: dict[IntentType, tuple[int, list[str]]] = {}
for intent, keywords in INTENT_KEYWORDS.items():
score = 0
matched: list[str] = []
for keyword in keywords:
if keyword in text:
score += 1
matched.append(keyword)
# 完整詞匹配加分
if re.search(rf"\b{re.escape(keyword)}\b", text):
score += 1
if score > 0:
scores[intent] = (score, matched)
# 檢測告警內容 (強化 DIAGNOSE)
is_alert = any(kw in text for kw in ALERT_KEYWORDS)
if is_alert and IntentType.DIAGNOSE not in scores:
scores[IntentType.DIAGNOSE] = (1, ["(alert_detected)"])
# 檢測資源類型
detected_resources: list[str] = []
for resource_type, keywords in RESOURCE_KEYWORDS.items():
if any(kw in text for kw in keywords):
detected_resources.append(resource_type)
# 選擇最高分意圖
if not scores:
result = IntentResult(
intent=IntentType.UNKNOWN,
confidence=0.0,
method="keyword",
matched_keywords=[],
detected_resources=detected_resources,
reasoning="無匹配關鍵字",
)
else:
best_intent = max(scores, key=lambda k: scores[k][0])
best_score, matched_keywords = scores[best_intent]
# 🔴 2026-03-29 修正: 關鍵字匹配不是 AI 分析,信心度設 0
# 根據 feedback_confidence_truthfulness.md 鐵律
confidence = 0.0
result = IntentResult(
intent=best_intent,
confidence=confidence,
method="keyword",
matched_keywords=matched_keywords,
detected_resources=detected_resources,
reasoning=f"匹配關鍵字: {', '.join(matched_keywords)}",
)
# 快取結果 (LRU 簡易實作)
if len(self._keyword_cache) >= self._cache_max_size:
# 移除最舊的一半
keys = list(self._keyword_cache.keys())
for k in keys[: len(keys) // 2]:
del self._keyword_cache[k]
self._keyword_cache[cache_key] = result
return result
async def _llm_classify(self, text: str) -> IntentResult:
"""
LLM 分類 (方案 B) - Phase 13.4
目標延遲: < 100ms (使用 qwen2.5:1b 或配置的 intent 模型)
Args:
text: 已轉小寫的輸入文字
Returns:
IntentResult: 分類結果
2026-03-30 Claude Code: 實作 Ollama 整合
"""
start_time = time.time()
try:
# 建構 Prompt
prompt = _LLM_CLASSIFY_PROMPT.format(text=text)
# 取得模型配置
model_name = self.llm_model # qwen2.5:1b 或配置值
# 呼叫 OllamaGCP-A → GCP-B → 111
result_text = ""
last_error: Exception | None = None
async with httpx.AsyncClient() as client:
for endpoint in resolve_ollama_order("hermes"):
if not endpoint.url:
continue
try:
response = await client.post(
f"{endpoint.url}/api/generate",
json={
"model": model_name,
"prompt": prompt,
"stream": False,
"format": "json",
"options": {
"num_predict": 128, # 意圖分類只需短回應
"temperature": 0.0, # 確定性輸出
"top_p": 0.9,
},
},
timeout=httpx.Timeout(5.0, connect=2.0), # 嚴格超時
)
response.raise_for_status()
data = response.json()
result_text = data.get("response", "")
break
except Exception as e:
last_error = e
logger.warning(
"intent_llm_endpoint_failed",
provider=endpoint.provider_name,
error=str(e),
error_type=type(e).__name__,
)
if not result_text:
_kw_result = self._keyword_classify(text)
return IntentResult(
intent=_kw_result.intent,
confidence=_kw_result.confidence,
method="llm_unavailable_keyword",
matched_keywords=_kw_result.matched_keywords,
detected_resources=_kw_result.detected_resources,
reasoning=(
f"LLM 全端點不可用({type(last_error).__name__ if last_error else 'no_endpoint'}"
f"{_kw_result.reasoning}"
),
)
# 解析 JSON 回應
elapsed_ms = (time.time() - start_time) * 1000
try:
parsed = json.loads(result_text)
intent_str = parsed.get("intent", "unknown").lower()
confidence = float(parsed.get("confidence", 0.5))
reasoning = parsed.get("reasoning", "")
# 映射到 IntentType
intent = self._parse_intent_type(intent_str)
logger.info(
"intent_llm_classified",
intent=intent.value,
confidence=confidence,
elapsed_ms=round(elapsed_ms, 1),
model=model_name,
)
return IntentResult(
intent=intent,
confidence=confidence,
method="llm",
matched_keywords=[],
detected_resources=[],
reasoning=reasoning,
)
except (json.JSONDecodeError, KeyError, ValueError) as e:
logger.warning(
"intent_llm_parse_failed",
error=str(e),
response_preview=result_text[:100],
)
# 2026-04-24 ogt + Claude Sonnet 4.6: JSON 解析失敗也降級至關鍵字結果
_kw_result = self._keyword_classify(text)
return IntentResult(
intent=_kw_result.intent,
confidence=_kw_result.confidence,
method="llm_parse_failed_keyword",
matched_keywords=_kw_result.matched_keywords,
detected_resources=_kw_result.detected_resources,
reasoning=f"LLM JSON 解析失敗降級 → {_kw_result.reasoning}",
)
except httpx.TimeoutException:
elapsed_ms = (time.time() - start_time) * 1000
logger.warning(
"intent_llm_timeout",
elapsed_ms=round(elapsed_ms, 1),
)
# 2026-04-24 ogt + Claude Sonnet 4.6: LLM 超時直接降級至關鍵字結果
# 問題_llm_fallback_result 返回 confidence=0.0/UNKNOWN和 keyword 結果 confidence 相同
# classify() 比較 0.0 > 0.0 = False → 走 keyword正確但已浪費 5s 超時時間
# 若 Ollama 後端不通,每次都等 5s 才降級 → ai_router/ci_auto_repair 延遲累積
# 修法:超時直接回 keyword 結果method 標記 "llm_timeout_keyword" 供可觀測性追蹤
_kw_result = self._keyword_classify(text)
return IntentResult(
intent=_kw_result.intent,
confidence=_kw_result.confidence,
method="llm_timeout_keyword",
matched_keywords=_kw_result.matched_keywords,
detected_resources=_kw_result.detected_resources,
reasoning=f"LLM 超時降級({round(elapsed_ms, 0):.0f}ms{_kw_result.reasoning}",
)
except Exception as e:
logger.warning(
"intent_llm_error",
error=str(e),
error_type=type(e).__name__,
)
# 2026-04-24 ogt + Claude Sonnet 4.6: LLM 錯誤同樣降級至關鍵字結果
_kw_result = self._keyword_classify(text)
return IntentResult(
intent=_kw_result.intent,
confidence=_kw_result.confidence,
method="llm_error_keyword",
matched_keywords=_kw_result.matched_keywords,
detected_resources=_kw_result.detected_resources,
reasoning=f"LLM 錯誤降級({type(e).__name__})→ {_kw_result.reasoning}",
)
def _parse_intent_type(self, intent_str: str) -> IntentType:
"""解析意圖字串為 IntentType"""
intent_map = {
"restart": IntentType.RESTART,
"scale": IntentType.SCALE,
"config": IntentType.CONFIG,
"diagnose": IntentType.DIAGNOSE,
"delete": IntentType.DELETE,
"rollback": IntentType.ROLLBACK,
"unknown": IntentType.UNKNOWN,
}
return intent_map.get(intent_str.lower(), IntentType.UNKNOWN)
def _llm_fallback_result(self, reason: str) -> IntentResult:
"""LLM 失敗時的 fallback 結果"""
return IntentResult(
intent=IntentType.UNKNOWN,
confidence=0.0,
method="llm",
matched_keywords=[],
detected_resources=[],
reasoning=reason,
)
def get_supported_intents(self) -> list[dict]:
"""
取得支援的意圖清單
Returns:
意圖清單 (含描述和風險等級)
"""
intents = [
{
"intent": IntentType.RESTART.value,
"description": "重啟 Pod/Deployment/StatefulSet",
"risk_level": RiskLevel.MEDIUM.value,
"keywords_sample": INTENT_KEYWORDS[IntentType.RESTART][:5],
},
{
"intent": IntentType.SCALE.value,
"description": "擴縮容、HPA 調整",
"risk_level": RiskLevel.MEDIUM.value,
"keywords_sample": INTENT_KEYWORDS[IntentType.SCALE][:5],
},
{
"intent": IntentType.CONFIG.value,
"description": "ConfigMap/Secret/ENV 變更",
"risk_level": RiskLevel.HIGH.value,
"keywords_sample": INTENT_KEYWORDS[IntentType.CONFIG][:5],
},
{
"intent": IntentType.DIAGNOSE.value,
"description": "日誌查詢、健康檢查、RCA",
"risk_level": RiskLevel.LOW.value,
"keywords_sample": INTENT_KEYWORDS[IntentType.DIAGNOSE][:5],
},
{
"intent": IntentType.DELETE.value,
"description": "刪除資源(高風險)",
"risk_level": RiskLevel.CRITICAL.value,
"keywords_sample": INTENT_KEYWORDS[IntentType.DELETE][:5],
},
{
"intent": IntentType.ROLLBACK.value,
"description": "回滾版本",
"risk_level": RiskLevel.MEDIUM.value,
"keywords_sample": INTENT_KEYWORDS[IntentType.ROLLBACK][:5],
},
{
"intent": IntentType.UNKNOWN.value,
"description": "無法判斷意圖",
"risk_level": RiskLevel.MEDIUM.value,
"keywords_sample": [],
},
]
return intents
# =============================================================================
# Singleton
# =============================================================================
_classifier: IntentClassifier | None = None
def get_intent_classifier() -> IntentClassifier:
"""取得 IntentClassifier 單例"""
global _classifier
if _classifier is None:
_classifier = IntentClassifier()
return _classifier
def reset_intent_classifier() -> None:
"""重置單例 (用於測試)"""
global _classifier
_classifier = None
# =============================================================================
# Convenience Functions
# =============================================================================
async def classify_intent(text: str) -> IntentResult:
"""便捷函數: 分類意圖 (非同步)"""
return await get_intent_classifier().classify(text)
def classify_intent_sync(text: str) -> IntentResult:
"""便捷函數: 分類意圖 (同步)"""
return get_intent_classifier().classify_sync(text)
def get_intent_risk(intent: IntentType) -> RiskLevel:
"""便捷函數: 取得意圖風險等級"""
return INTENT_RISK_MAP.get(intent, RiskLevel.MEDIUM)