awoooi/apps/api/tests/test_intent_classifier.py

"""
Intent Classifier Tests - Phase 13.4 Ollama 整合
================================================
2026-03-31 Claude Code (Phase 22 P1 修復): 移除 Mock，使用真實 Ollama

測試範圍:
- _llm_classify: LLM 分類邏輯 (需要真實 Ollama)
- _parse_intent_type: 意圖解析
- _llm_fallback_result: 失敗回退
- classify: 完整分類流程

遵循規範:
- feedback_no_mock_testing.md: 禁止 MagicMock/AsyncMock/patch
"""

import pytest

from src.services.intent_classifier import (
    IntentClassifier,
    IntentType,
    IntentResult,
    RiskLevel,
    get_intent_classifier,
)


# =============================================================================
# Test Markers
# =============================================================================

requires_ollama = pytest.mark.skipif(
    "not config.getoption('--run-ollama', default=False)",
    reason="Need --run-ollama option to run Ollama integration tests",
)


# =============================================================================
# Fixtures
# =============================================================================


@pytest.fixture
def classifier():
    """測試用 IntentClassifier"""
    return IntentClassifier()


# =============================================================================
# Test Cases - Pure Unit Tests (No External Dependencies)
# =============================================================================


class TestParseIntentType:
    """_parse_intent_type 測試 - 純函數，不需要外部依賴"""

    def test_parse_restart(self, classifier):
        """測試解析 restart"""
        assert classifier._parse_intent_type("restart") == IntentType.RESTART
        assert classifier._parse_intent_type("RESTART") == IntentType.RESTART

    def test_parse_scale(self, classifier):
        """測試解析 scale"""
        assert classifier._parse_intent_type("scale") == IntentType.SCALE

    def test_parse_config(self, classifier):
        """測試解析 config"""
        assert classifier._parse_intent_type("config") == IntentType.CONFIG

    def test_parse_diagnose(self, classifier):
        """測試解析 diagnose"""
        assert classifier._parse_intent_type("diagnose") == IntentType.DIAGNOSE

    def test_parse_delete(self, classifier):
        """測試解析 delete"""
        assert classifier._parse_intent_type("delete") == IntentType.DELETE

    def test_parse_rollback(self, classifier):
        """測試解析 rollback"""
        assert classifier._parse_intent_type("rollback") == IntentType.ROLLBACK

    def test_parse_unknown(self, classifier):
        """測試解析 unknown"""
        assert classifier._parse_intent_type("unknown") == IntentType.UNKNOWN
        assert classifier._parse_intent_type("invalid") == IntentType.UNKNOWN
        assert classifier._parse_intent_type("") == IntentType.UNKNOWN


class TestLlmFallbackResult:
    """_llm_fallback_result 測試 - 純函數"""

    def test_fallback_result(self, classifier):
        """測試 fallback 結果"""
        result = classifier._llm_fallback_result("test error")

        assert result.intent == IntentType.UNKNOWN
        assert result.confidence == 0.0
        assert result.method == "llm"
        assert result.reasoning == "test error"
        assert result.matched_keywords == []
        assert result.detected_resources == []


class TestKeywordClassify:
    """關鍵字分類測試 - 純函數，不需要 LLM"""

    def test_restart_keywords(self, classifier):
        """測試重啟關鍵字"""
        result = classifier.classify_sync("重啟 api-server pod")
        assert result.intent == IntentType.RESTART
        assert result.method == "keyword"
        # 關鍵字匹配信心度為 0 (非 AI 分析)
        assert result.confidence == 0.0
        assert "重啟" in result.matched_keywords

    def test_scale_keywords(self, classifier):
        """測試擴縮容關鍵字"""
        result = classifier.classify_sync("擴展 deployment 副本數到 5")
        assert result.intent == IntentType.SCALE
        assert result.method == "keyword"

    def test_diagnose_keywords(self, classifier):
        """測試診斷關鍵字"""
        result = classifier.classify_sync("查看 pod 日誌")
        assert result.intent == IntentType.DIAGNOSE
        assert result.method == "keyword"

    def test_delete_keywords(self, classifier):
        """測試刪除關鍵字"""
        result = classifier.classify_sync("刪除這個 pod")
        assert result.intent == IntentType.DELETE
        assert result.method == "keyword"

    def test_unknown_text(self, classifier):
        """測試無法識別的文字"""
        result = classifier.classify_sync("今天天氣真好")
        assert result.intent == IntentType.UNKNOWN


class TestGetIntentClassifier:
    """Singleton 測試"""

    def test_singleton(self):
        """測試 singleton 模式"""
        c1 = get_intent_classifier()
        c2 = get_intent_classifier()
        assert c1 is c2


class TestIntentResult:
    """IntentResult 測試 - 純 dataclass"""

    def test_dataclass_fields(self):
        """測試 dataclass 欄位"""
        result = IntentResult(
            intent=IntentType.RESTART,
            confidence=0.9,
            method="llm",
            matched_keywords=["重啟", "pod"],
            detected_resources=["api-server"],
            reasoning="匹配重啟關鍵字",
        )

        assert result.intent == IntentType.RESTART
        assert result.confidence == 0.9
        assert result.method == "llm"
        assert result.risk_level == RiskLevel.MEDIUM  # auto-set by __post_init__
        assert "重啟" in result.matched_keywords
        assert "api-server" in result.detected_resources

    def test_risk_level_auto_set(self):
        """測試風險等級自動設定"""
        # DELETE 應該是 CRITICAL
        delete_result = IntentResult(
            intent=IntentType.DELETE,
            confidence=0.8,
            method="llm",
        )
        assert delete_result.risk_level == RiskLevel.CRITICAL

        # DIAGNOSE 應該是 LOW
        diagnose_result = IntentResult(
            intent=IntentType.DIAGNOSE,
            confidence=0.8,
            method="llm",
        )
        assert diagnose_result.risk_level == RiskLevel.LOW


# =============================================================================
# Integration Tests - Require Real Ollama
# =============================================================================


class TestLlmClassifyIntegration:
    """
    _llm_classify 整合測試 - 需要真實 Ollama

    Phase 22 P1 修復: 移除 Mock，使用真實 Ollama
    2026-03-31 Claude Code (首席架構師)
    """

    @pytest.mark.asyncio
    @requires_ollama
    async def test_llm_success(self, classifier):
        """
        測試 LLM 成功分類

        使用真實 Ollama 服務測試:
        - 能正確解析意圖
        - 返回合理的信心度
        - method 為 "llm"
        """
        result = await classifier._llm_classify("重啟 api 服務的 pod")

        # LLM 應該能識別為 RESTART 意圖
        assert result.intent == IntentType.RESTART
        assert result.method == "llm"
        # 真實 LLM 應該有大於 0 的信心度
        assert result.confidence > 0.0

    @pytest.mark.asyncio
    @requires_ollama
    async def test_llm_scale_intent(self, classifier):
        """測試 LLM 擴縮容意圖識別"""
        result = await classifier._llm_classify("把 deployment 擴展到 5 個副本")

        assert result.intent == IntentType.SCALE
        assert result.method == "llm"

    @pytest.mark.asyncio
    @requires_ollama
    async def test_llm_diagnose_intent(self, classifier):
        """測試 LLM 診斷意圖識別"""
        result = await classifier._llm_classify("幫我分析一下為什麼 pod 一直重啟")

        assert result.intent == IntentType.DIAGNOSE
        assert result.method == "llm"

    @pytest.mark.asyncio
    @requires_ollama
    async def test_llm_delete_intent(self, classifier):
        """測試 LLM 刪除意圖識別 (高風險)"""
        result = await classifier._llm_classify("刪除這個有問題的 pod")

        assert result.intent == IntentType.DELETE
        assert result.method == "llm"


class TestClassifyIntegration:
    """
    完整分類流程整合測試 - 需要真實 Ollama

    Phase 22 P1 修復: 移除 Mock，使用真實 Ollama
    2026-03-31 Claude Code (首席架構師)
    """

    @pytest.mark.asyncio
    @requires_ollama
    async def test_classify_with_real_llm(self, classifier):
        """
        測試完整分類流程 (關鍵字 + LLM)

        流程:
        1. 先嘗試關鍵字匹配
        2. 如果關鍵字匹配成功但信心度為 0，嘗試 LLM
        3. 選擇信心度較高的結果
        """
        result = await classifier.classify("重啟 api pod")

        # 意圖應該是 RESTART
        assert result.intent == IntentType.RESTART
        # method 可能是 keyword 或 llm，取決於哪個信心度更高
        assert result.method in ["keyword", "llm"]

    @pytest.mark.asyncio
    @requires_ollama
    async def test_classify_complex_query(self, classifier):
        """測試複雜查詢 (需要 LLM 理解上下文)"""
        result = await classifier.classify("API 回應很慢，幫我看一下是不是需要增加副本")

        # 這種情況可能是 DIAGNOSE 或 SCALE
        assert result.intent in [IntentType.DIAGNOSE, IntentType.SCALE]
        # 複雜查詢更可能使用 LLM
        assert result.method == "llm"

    @pytest.mark.asyncio
    @requires_ollama
    async def test_classify_ambiguous_query(self, classifier):
        """測試模糊查詢"""
        result = await classifier.classify("幫我處理一下這個服務")

        # 模糊查詢可能返回 UNKNOWN 或 DIAGNOSE
        assert result.intent in [IntentType.UNKNOWN, IntentType.DIAGNOSE]