awoooi/apps/api/tests/test_ollama_health_monitor.py

# apps/api/tests/test_ollama_health_monitor.py | 2026-04-25 @ Asia/Taipei
# Created 2026-04-25 P1.1c by Claude Engineer-C
# 2026-05-03 ogt: ADR-110 GCP 三層容災，HOST 更新為 GCP-A Primary
"""
OllamaHealthMonitor 單元測試 - P1.1c
=====================================
測試覆蓋：
- 4 種健康狀態（HEALTHY / SLOW / DEGRADED / OFFLINE）
- 連通性失敗 → OFFLINE
- 推理超時（asyncio.TimeoutError / httpx.TimeoutException）→ DEGRADED
- 推理回傳非 200 → DEGRADED
- Redis 快取命中（from_cache=True）
- Redis 快取失敗時降級直接執行（不 crash）
- is_usable() 邏輯

測試分類：unit（mock httpx，無 DB / Redis 依賴）
"""

from __future__ import annotations

import asyncio
from unittest.mock import AsyncMock, MagicMock, patch

import httpx
import pytest

from src.services.ollama_endpoint_circuit_breaker import (
    is_ollama_endpoint_blocked,
    record_ollama_endpoint_failure,
    reset_ollama_endpoint_cooldown_for_tests,
)
from src.services.ollama_health_monitor import (
    HealthReport,
    HealthStatus,
    OllamaHealthMonitor,
    get_ollama_health_monitor,
    reset_ollama_health_monitor,
)


# =============================================================================
# Fixtures
# =============================================================================

HOST = "http://34.143.170.20:11434"      # GCP-A Primary（ADR-110 2026-05-03）
HOST_LOCAL = "http://192.168.0.111:11434"  # Local fallback（已移出 188 主路由）


@pytest.fixture(autouse=True)
def reset_singleton():
    """每個測試後重置 singleton"""
    reset_ollama_endpoint_cooldown_for_tests()
    yield
    reset_ollama_health_monitor()
    reset_ollama_endpoint_cooldown_for_tests()


@pytest.fixture
def monitor():
    return OllamaHealthMonitor()


def _mock_tags_ok() -> MagicMock:
    """/api/tags 回傳 200"""
    resp = MagicMock()
    resp.status_code = 200
    return resp


def _mock_generate_ok(latency_s: float = 0.5) -> tuple[MagicMock, float]:
    """
    /api/generate 回傳 200，模擬給定延遲。
    返回 (response_mock, latency_s)，由 test 自行控制 time.perf_counter patch。
    """
    resp = MagicMock()
    resp.status_code = 200
    resp.json.return_value = {"response": "ok"}
    return resp


# =============================================================================
# 層 1：連通性
# =============================================================================


class TestConnectivity:
    """_check_connectivity 各種情況"""

    @pytest.mark.asyncio
    async def test_connectivity_success(self, monitor):
        """/api/tags 200 → 連通性通過"""
        mock_resp = _mock_tags_ok()
        mock_client = AsyncMock()
        mock_client.get = AsyncMock(return_value=mock_resp)
        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
        mock_client.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", return_value=mock_client):
            result = await monitor._check_connectivity(HOST)
        assert result is True

    @pytest.mark.asyncio
    async def test_connectivity_non_200(self, monitor):
        """/api/tags 非 200 → 連通性失敗"""
        mock_resp = MagicMock()
        mock_resp.status_code = 503
        mock_client = AsyncMock()
        mock_client.get = AsyncMock(return_value=mock_resp)
        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
        mock_client.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", return_value=mock_client):
            result = await monitor._check_connectivity(HOST)
        assert result is False

    @pytest.mark.asyncio
    async def test_run_checks_records_connectivity_failure_cooldown(self, monitor, monkeypatch):
        """連通性失敗會建立短暫 cooldown，避免重複撞同一個 upstream。"""
        monkeypatch.setattr(monitor, "_check_connectivity", AsyncMock(return_value=False))

        report = await monitor._run_checks(HOST)

        assert report.status == HealthStatus.OFFLINE
        assert is_ollama_endpoint_blocked(HOST)

    @pytest.mark.asyncio
    async def test_run_checks_respects_existing_failure_cooldown(self, monitor, monkeypatch):
        """cooldown 中直接回報 OFFLINE，不再打 /api/tags。"""
        record_ollama_endpoint_failure(HOST)
        connectivity = AsyncMock(return_value=True)
        monkeypatch.setattr(monitor, "_check_connectivity", connectivity)

        report = await monitor._run_checks(HOST)

        assert report.status == HealthStatus.OFFLINE
        assert "cooldown" in report.reason
        connectivity.assert_not_awaited()

    @pytest.mark.asyncio
    async def test_connectivity_timeout(self, monitor):
        """連線 timeout → 返回 False（不 raise）"""
        mock_client = AsyncMock()
        mock_client.get = AsyncMock(side_effect=httpx.TimeoutException("timeout"))
        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
        mock_client.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", return_value=mock_client):
            result = await monitor._check_connectivity(HOST)
        assert result is False

    @pytest.mark.asyncio
    async def test_connectivity_connect_error(self, monitor):
        """連線拒絕 → 返回 False（不 raise）"""
        mock_client = AsyncMock()
        mock_client.get = AsyncMock(side_effect=httpx.ConnectError("refused"))
        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
        mock_client.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", return_value=mock_client):
            result = await monitor._check_connectivity(HOST)
        assert result is False


# =============================================================================
# 層 2：推理測試分級
# =============================================================================


class TestInference:
    """_check_inference 延遲分級"""

    def _make_mock_client(self, status_code: int = 200) -> AsyncMock:
        resp = MagicMock()
        resp.status_code = status_code
        resp.json.return_value = {"response": "ok"}
        mock_client = AsyncMock()
        mock_client.post = AsyncMock(return_value=resp)
        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
        mock_client.__aexit__ = AsyncMock(return_value=False)
        return mock_client

    @pytest.mark.asyncio
    async def test_inference_healthy(self, monitor):
        """推理延遲 <10s → HEALTHY"""
        mock_client = self._make_mock_client()

        # 模擬 0.5s 延遲（< 10s threshold）
        call_count = [0]

        def _fake_perf_counter():
            call_count[0] += 1
            # 第一次呼叫（start），第二次呼叫（end = start + 0.5s）
            if call_count[0] == 1:
                return 0.0
            return 0.5

        with patch("httpx.AsyncClient", return_value=mock_client):
            with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
                report = await monitor._check_inference(HOST)

        assert report.status == HealthStatus.HEALTHY
        assert report.latency_ms == pytest.approx(500.0, abs=10)

    @pytest.mark.asyncio
    async def test_inference_slow(self, monitor):
        """推理延遲 10-30s → SLOW"""
        mock_client = self._make_mock_client()

        call_count = [0]

        def _fake_perf_counter():
            call_count[0] += 1
            if call_count[0] == 1:
                return 0.0
            return 15.0  # 15s → SLOW zone

        with patch("httpx.AsyncClient", return_value=mock_client):
            with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
                report = await monitor._check_inference(HOST)

        assert report.status == HealthStatus.SLOW
        assert report.latency_ms == pytest.approx(15_000.0, abs=10)

    @pytest.mark.asyncio
    async def test_inference_degraded_by_latency(self, monitor):
        """推理延遲 >30s → DEGRADED"""
        mock_client = self._make_mock_client()

        call_count = [0]

        def _fake_perf_counter():
            call_count[0] += 1
            if call_count[0] == 1:
                return 0.0
            return 32.0  # 32s → DEGRADED

        with patch("httpx.AsyncClient", return_value=mock_client):
            with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
                report = await monitor._check_inference(HOST)

        assert report.status == HealthStatus.DEGRADED

    @pytest.mark.asyncio
    async def test_inference_timeout_degraded(self, monitor):
        """推理 TimeoutException → DEGRADED（不 crash，不 OFFLINE）"""
        mock_client = AsyncMock()
        mock_client.post = AsyncMock(side_effect=httpx.TimeoutException("timeout"))
        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
        mock_client.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", return_value=mock_client):
            report = await monitor._check_inference(HOST)

        assert report.status == HealthStatus.DEGRADED
        assert "超時" in report.reason or "timeout" in report.reason.lower()

    @pytest.mark.asyncio
    async def test_inference_asyncio_timeout_degraded(self, monitor):
        """推理 asyncio.TimeoutError → DEGRADED"""
        mock_client = AsyncMock()
        mock_client.post = AsyncMock(side_effect=asyncio.TimeoutError())
        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
        mock_client.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", return_value=mock_client):
            report = await monitor._check_inference(HOST)

        assert report.status == HealthStatus.DEGRADED

    @pytest.mark.asyncio
    async def test_inference_connect_error_degraded(self, monitor):
        """推理 ConnectError → DEGRADED（連通性已通過，視為 socket 瞬斷）"""
        mock_client = AsyncMock()
        mock_client.post = AsyncMock(side_effect=httpx.ConnectError("refused"))
        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
        mock_client.__aexit__ = AsyncMock(return_value=False)

        with patch("httpx.AsyncClient", return_value=mock_client):
            report = await monitor._check_inference(HOST)

        assert report.status == HealthStatus.DEGRADED

    @pytest.mark.asyncio
    async def test_inference_non_200_degraded(self, monitor):
        """推理回傳非 200 → DEGRADED"""
        mock_client = self._make_mock_client(status_code=503)

        call_count = [0]

        def _fake_perf_counter():
            call_count[0] += 1
            return float(call_count[0] - 1) * 0.1

        with patch("httpx.AsyncClient", return_value=mock_client):
            with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
                report = await monitor._check_inference(HOST)

        assert report.status == HealthStatus.DEGRADED
        assert "503" in report.reason


# =============================================================================
# check() 整合（含 Redis 快取）
# =============================================================================


class TestCheckWithCache:
    """check() 方法含 Redis 快取邏輯"""

    @pytest.mark.asyncio
    async def test_check_offline_when_connectivity_fails(self, monitor):
        """連通性失敗 → 最終 OFFLINE"""
        with patch.object(monitor, "_check_connectivity", return_value=False):
            with patch.object(monitor, "_get_cached", return_value=None):
                with patch.object(monitor, "_set_cached", return_value=None):
                    with patch.object(monitor, "_write_audit_log", return_value=None):
                        report = await monitor.check(HOST)

        assert report.status == HealthStatus.OFFLINE
        assert report.host == HOST

    @pytest.mark.asyncio
    async def test_check_healthy_when_inference_fast(self, monitor):
        """連通性通過 + 推理快 → HEALTHY"""
        healthy_report = HealthReport(
            status=HealthStatus.HEALTHY,
            latency_ms=500.0,
        )
        with patch.object(monitor, "_check_connectivity", return_value=True):
            with patch.object(monitor, "_check_inference", return_value=healthy_report):
                with patch.object(monitor, "_get_cached", return_value=None):
                    with patch.object(monitor, "_set_cached", return_value=None):
                        with patch.object(monitor, "_write_audit_log", return_value=None):
                            report = await monitor.check(HOST)

        assert report.status == HealthStatus.HEALTHY
        assert report.host == HOST

    @pytest.mark.asyncio
    async def test_check_returns_cached(self, monitor):
        """快取命中 → 直接返回快取結果，from_cache=True"""
        cached = HealthReport(
            status=HealthStatus.HEALTHY,
            host=HOST,
            latency_ms=300.0,
        )
        with patch.object(monitor, "_get_cached", return_value=cached):
            report = await monitor.check(HOST)

        assert report.from_cache is True
        assert report.status == HealthStatus.HEALTHY

    @pytest.mark.asyncio
    async def test_check_proceeds_when_cache_get_fails(self, monitor):
        """Redis get 失敗 → 降級直接執行 check（不 crash）"""
        with patch.object(monitor, "_get_cached", return_value=None):
            with patch.object(monitor, "_check_connectivity", return_value=False):
                with patch.object(monitor, "_set_cached", return_value=None):
                    with patch.object(monitor, "_write_audit_log", return_value=None):
                        report = await monitor.check(HOST)

        assert report.status == HealthStatus.OFFLINE  # 正常降級，未 crash

    @pytest.mark.asyncio
    async def test_cache_set_failure_does_not_crash(self, monitor):
        """Redis set 失敗 → 靜默，結果仍正常返回"""
        healthy = HealthReport(status=HealthStatus.HEALTHY, latency_ms=200.0)
        with patch.object(monitor, "_get_cached", return_value=None):
            with patch.object(monitor, "_check_connectivity", return_value=True):
                with patch.object(monitor, "_check_inference", return_value=healthy):
                    with patch.object(monitor, "_set_cached", side_effect=RuntimeError("Redis down")):
                        with patch.object(monitor, "_write_audit_log", return_value=None):
                            # 不應 raise
                            report = await monitor.check(HOST)

        assert report.status == HealthStatus.HEALTHY


# =============================================================================
# HealthReport 輔助方法
# =============================================================================


class TestHealthReport:
    """HealthReport dataclass 邏輯"""

    def test_is_usable_healthy(self):
        assert HealthReport(status=HealthStatus.HEALTHY).is_usable() is True

    def test_is_usable_slow(self):
        assert HealthReport(status=HealthStatus.SLOW).is_usable() is True

    def test_is_usable_degraded(self):
        assert HealthReport(status=HealthStatus.DEGRADED).is_usable() is True

    def test_is_usable_offline(self):
        assert HealthReport(status=HealthStatus.OFFLINE).is_usable() is False

    def test_to_dict_structure(self):
        report = HealthReport(
            status=HealthStatus.SLOW,
            host=HOST,
            latency_ms=15500.0,
            reason="slow zone",
        )
        d = report.to_dict()
        assert d["status"] == "slow"
        assert d["host"] == HOST
        assert d["latency_ms"] == 15500.0
        assert d["reason"] == "slow zone"


# =============================================================================
# Singleton
# =============================================================================


def test_singleton_returns_same_instance():
    m1 = get_ollama_health_monitor()
    m2 = get_ollama_health_monitor()
    assert m1 is m2


def test_reset_singleton_gives_new_instance():
    m1 = get_ollama_health_monitor()
    reset_ollama_health_monitor()
    m2 = get_ollama_health_monitor()
    assert m1 is not m2