429 lines
16 KiB
Python
429 lines
16 KiB
Python
# apps/api/tests/test_ollama_health_monitor.py | 2026-04-25 @ Asia/Taipei
|
||
# Created 2026-04-25 P1.1c by Claude Engineer-C
|
||
# 2026-05-03 ogt: ADR-110 GCP 三層容災,HOST 更新為 GCP-A Primary
|
||
"""
|
||
OllamaHealthMonitor 單元測試 - P1.1c
|
||
=====================================
|
||
測試覆蓋:
|
||
- 4 種健康狀態(HEALTHY / SLOW / DEGRADED / OFFLINE)
|
||
- 連通性失敗 → OFFLINE
|
||
- 推理超時(asyncio.TimeoutError / httpx.TimeoutException)→ DEGRADED
|
||
- 推理回傳非 200 → DEGRADED
|
||
- Redis 快取命中(from_cache=True)
|
||
- Redis 快取失敗時降級直接執行(不 crash)
|
||
- is_usable() 邏輯
|
||
|
||
測試分類:unit(mock httpx,無 DB / Redis 依賴)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
from unittest.mock import AsyncMock, MagicMock, patch
|
||
|
||
import httpx
|
||
import pytest
|
||
|
||
from src.services.ollama_endpoint_circuit_breaker import (
|
||
is_ollama_endpoint_blocked,
|
||
record_ollama_endpoint_failure,
|
||
reset_ollama_endpoint_cooldown_for_tests,
|
||
)
|
||
from src.services.ollama_health_monitor import (
|
||
HealthReport,
|
||
HealthStatus,
|
||
OllamaHealthMonitor,
|
||
get_ollama_health_monitor,
|
||
reset_ollama_health_monitor,
|
||
)
|
||
|
||
|
||
# =============================================================================
|
||
# Fixtures
|
||
# =============================================================================
|
||
|
||
HOST = "http://34.143.170.20:11434" # GCP-A Primary(ADR-110 2026-05-03)
|
||
HOST_LOCAL = "http://192.168.0.111:11434" # Local fallback(已移出 188 主路由)
|
||
|
||
|
||
@pytest.fixture(autouse=True)
|
||
def reset_singleton():
|
||
"""每個測試後重置 singleton"""
|
||
reset_ollama_endpoint_cooldown_for_tests()
|
||
yield
|
||
reset_ollama_health_monitor()
|
||
reset_ollama_endpoint_cooldown_for_tests()
|
||
|
||
|
||
@pytest.fixture
|
||
def monitor():
|
||
return OllamaHealthMonitor()
|
||
|
||
|
||
def _mock_tags_ok() -> MagicMock:
|
||
"""/api/tags 回傳 200"""
|
||
resp = MagicMock()
|
||
resp.status_code = 200
|
||
return resp
|
||
|
||
|
||
def _mock_generate_ok(latency_s: float = 0.5) -> tuple[MagicMock, float]:
|
||
"""
|
||
/api/generate 回傳 200,模擬給定延遲。
|
||
返回 (response_mock, latency_s),由 test 自行控制 time.perf_counter patch。
|
||
"""
|
||
resp = MagicMock()
|
||
resp.status_code = 200
|
||
resp.json.return_value = {"response": "ok"}
|
||
return resp
|
||
|
||
|
||
# =============================================================================
|
||
# 層 1:連通性
|
||
# =============================================================================
|
||
|
||
|
||
class TestConnectivity:
|
||
"""_check_connectivity 各種情況"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_connectivity_success(self, monitor):
|
||
"""/api/tags 200 → 連通性通過"""
|
||
mock_resp = _mock_tags_ok()
|
||
mock_client = AsyncMock()
|
||
mock_client.get = AsyncMock(return_value=mock_resp)
|
||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||
|
||
with patch("httpx.AsyncClient", return_value=mock_client):
|
||
result = await monitor._check_connectivity(HOST)
|
||
assert result is True
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_connectivity_non_200(self, monitor):
|
||
"""/api/tags 非 200 → 連通性失敗"""
|
||
mock_resp = MagicMock()
|
||
mock_resp.status_code = 503
|
||
mock_client = AsyncMock()
|
||
mock_client.get = AsyncMock(return_value=mock_resp)
|
||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||
|
||
with patch("httpx.AsyncClient", return_value=mock_client):
|
||
result = await monitor._check_connectivity(HOST)
|
||
assert result is False
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_run_checks_records_connectivity_failure_cooldown(self, monitor, monkeypatch):
|
||
"""連通性失敗會建立短暫 cooldown,避免重複撞同一個 upstream。"""
|
||
monkeypatch.setattr(monitor, "_check_connectivity", AsyncMock(return_value=False))
|
||
|
||
report = await monitor._run_checks(HOST)
|
||
|
||
assert report.status == HealthStatus.OFFLINE
|
||
assert is_ollama_endpoint_blocked(HOST)
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_run_checks_respects_existing_failure_cooldown(self, monitor, monkeypatch):
|
||
"""cooldown 中直接回報 OFFLINE,不再打 /api/tags。"""
|
||
record_ollama_endpoint_failure(HOST)
|
||
connectivity = AsyncMock(return_value=True)
|
||
monkeypatch.setattr(monitor, "_check_connectivity", connectivity)
|
||
|
||
report = await monitor._run_checks(HOST)
|
||
|
||
assert report.status == HealthStatus.OFFLINE
|
||
assert "cooldown" in report.reason
|
||
connectivity.assert_not_awaited()
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_connectivity_timeout(self, monitor):
|
||
"""連線 timeout → 返回 False(不 raise)"""
|
||
mock_client = AsyncMock()
|
||
mock_client.get = AsyncMock(side_effect=httpx.TimeoutException("timeout"))
|
||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||
|
||
with patch("httpx.AsyncClient", return_value=mock_client):
|
||
result = await monitor._check_connectivity(HOST)
|
||
assert result is False
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_connectivity_connect_error(self, monitor):
|
||
"""連線拒絕 → 返回 False(不 raise)"""
|
||
mock_client = AsyncMock()
|
||
mock_client.get = AsyncMock(side_effect=httpx.ConnectError("refused"))
|
||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||
|
||
with patch("httpx.AsyncClient", return_value=mock_client):
|
||
result = await monitor._check_connectivity(HOST)
|
||
assert result is False
|
||
|
||
|
||
# =============================================================================
|
||
# 層 2:推理測試分級
|
||
# =============================================================================
|
||
|
||
|
||
class TestInference:
|
||
"""_check_inference 延遲分級"""
|
||
|
||
def _make_mock_client(self, status_code: int = 200) -> AsyncMock:
|
||
resp = MagicMock()
|
||
resp.status_code = status_code
|
||
resp.json.return_value = {"response": "ok"}
|
||
mock_client = AsyncMock()
|
||
mock_client.post = AsyncMock(return_value=resp)
|
||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||
return mock_client
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_inference_healthy(self, monitor):
|
||
"""推理延遲 <10s → HEALTHY"""
|
||
mock_client = self._make_mock_client()
|
||
|
||
# 模擬 0.5s 延遲(< 10s threshold)
|
||
call_count = [0]
|
||
|
||
def _fake_perf_counter():
|
||
call_count[0] += 1
|
||
# 第一次呼叫(start),第二次呼叫(end = start + 0.5s)
|
||
if call_count[0] == 1:
|
||
return 0.0
|
||
return 0.5
|
||
|
||
with patch("httpx.AsyncClient", return_value=mock_client):
|
||
with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
|
||
report = await monitor._check_inference(HOST)
|
||
|
||
assert report.status == HealthStatus.HEALTHY
|
||
assert report.latency_ms == pytest.approx(500.0, abs=10)
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_inference_slow(self, monitor):
|
||
"""推理延遲 10-30s → SLOW"""
|
||
mock_client = self._make_mock_client()
|
||
|
||
call_count = [0]
|
||
|
||
def _fake_perf_counter():
|
||
call_count[0] += 1
|
||
if call_count[0] == 1:
|
||
return 0.0
|
||
return 15.0 # 15s → SLOW zone
|
||
|
||
with patch("httpx.AsyncClient", return_value=mock_client):
|
||
with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
|
||
report = await monitor._check_inference(HOST)
|
||
|
||
assert report.status == HealthStatus.SLOW
|
||
assert report.latency_ms == pytest.approx(15_000.0, abs=10)
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_inference_degraded_by_latency(self, monitor):
|
||
"""推理延遲 >30s → DEGRADED"""
|
||
mock_client = self._make_mock_client()
|
||
|
||
call_count = [0]
|
||
|
||
def _fake_perf_counter():
|
||
call_count[0] += 1
|
||
if call_count[0] == 1:
|
||
return 0.0
|
||
return 32.0 # 32s → DEGRADED
|
||
|
||
with patch("httpx.AsyncClient", return_value=mock_client):
|
||
with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
|
||
report = await monitor._check_inference(HOST)
|
||
|
||
assert report.status == HealthStatus.DEGRADED
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_inference_timeout_degraded(self, monitor):
|
||
"""推理 TimeoutException → DEGRADED(不 crash,不 OFFLINE)"""
|
||
mock_client = AsyncMock()
|
||
mock_client.post = AsyncMock(side_effect=httpx.TimeoutException("timeout"))
|
||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||
|
||
with patch("httpx.AsyncClient", return_value=mock_client):
|
||
report = await monitor._check_inference(HOST)
|
||
|
||
assert report.status == HealthStatus.DEGRADED
|
||
assert "超時" in report.reason or "timeout" in report.reason.lower()
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_inference_asyncio_timeout_degraded(self, monitor):
|
||
"""推理 asyncio.TimeoutError → DEGRADED"""
|
||
mock_client = AsyncMock()
|
||
mock_client.post = AsyncMock(side_effect=asyncio.TimeoutError())
|
||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||
|
||
with patch("httpx.AsyncClient", return_value=mock_client):
|
||
report = await monitor._check_inference(HOST)
|
||
|
||
assert report.status == HealthStatus.DEGRADED
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_inference_connect_error_degraded(self, monitor):
|
||
"""推理 ConnectError → DEGRADED(連通性已通過,視為 socket 瞬斷)"""
|
||
mock_client = AsyncMock()
|
||
mock_client.post = AsyncMock(side_effect=httpx.ConnectError("refused"))
|
||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||
|
||
with patch("httpx.AsyncClient", return_value=mock_client):
|
||
report = await monitor._check_inference(HOST)
|
||
|
||
assert report.status == HealthStatus.DEGRADED
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_inference_non_200_degraded(self, monitor):
|
||
"""推理回傳非 200 → DEGRADED"""
|
||
mock_client = self._make_mock_client(status_code=503)
|
||
|
||
call_count = [0]
|
||
|
||
def _fake_perf_counter():
|
||
call_count[0] += 1
|
||
return float(call_count[0] - 1) * 0.1
|
||
|
||
with patch("httpx.AsyncClient", return_value=mock_client):
|
||
with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
|
||
report = await monitor._check_inference(HOST)
|
||
|
||
assert report.status == HealthStatus.DEGRADED
|
||
assert "503" in report.reason
|
||
|
||
|
||
# =============================================================================
|
||
# check() 整合(含 Redis 快取)
|
||
# =============================================================================
|
||
|
||
|
||
class TestCheckWithCache:
|
||
"""check() 方法含 Redis 快取邏輯"""
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_check_offline_when_connectivity_fails(self, monitor):
|
||
"""連通性失敗 → 最終 OFFLINE"""
|
||
with patch.object(monitor, "_check_connectivity", return_value=False):
|
||
with patch.object(monitor, "_get_cached", return_value=None):
|
||
with patch.object(monitor, "_set_cached", return_value=None):
|
||
with patch.object(monitor, "_write_audit_log", return_value=None):
|
||
report = await monitor.check(HOST)
|
||
|
||
assert report.status == HealthStatus.OFFLINE
|
||
assert report.host == HOST
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_check_healthy_when_inference_fast(self, monitor):
|
||
"""連通性通過 + 推理快 → HEALTHY"""
|
||
healthy_report = HealthReport(
|
||
status=HealthStatus.HEALTHY,
|
||
latency_ms=500.0,
|
||
)
|
||
with patch.object(monitor, "_check_connectivity", return_value=True):
|
||
with patch.object(monitor, "_check_inference", return_value=healthy_report):
|
||
with patch.object(monitor, "_get_cached", return_value=None):
|
||
with patch.object(monitor, "_set_cached", return_value=None):
|
||
with patch.object(monitor, "_write_audit_log", return_value=None):
|
||
report = await monitor.check(HOST)
|
||
|
||
assert report.status == HealthStatus.HEALTHY
|
||
assert report.host == HOST
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_check_returns_cached(self, monitor):
|
||
"""快取命中 → 直接返回快取結果,from_cache=True"""
|
||
cached = HealthReport(
|
||
status=HealthStatus.HEALTHY,
|
||
host=HOST,
|
||
latency_ms=300.0,
|
||
)
|
||
with patch.object(monitor, "_get_cached", return_value=cached):
|
||
report = await monitor.check(HOST)
|
||
|
||
assert report.from_cache is True
|
||
assert report.status == HealthStatus.HEALTHY
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_check_proceeds_when_cache_get_fails(self, monitor):
|
||
"""Redis get 失敗 → 降級直接執行 check(不 crash)"""
|
||
with patch.object(monitor, "_get_cached", return_value=None):
|
||
with patch.object(monitor, "_check_connectivity", return_value=False):
|
||
with patch.object(monitor, "_set_cached", return_value=None):
|
||
with patch.object(monitor, "_write_audit_log", return_value=None):
|
||
report = await monitor.check(HOST)
|
||
|
||
assert report.status == HealthStatus.OFFLINE # 正常降級,未 crash
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_cache_set_failure_does_not_crash(self, monitor):
|
||
"""Redis set 失敗 → 靜默,結果仍正常返回"""
|
||
healthy = HealthReport(status=HealthStatus.HEALTHY, latency_ms=200.0)
|
||
with patch.object(monitor, "_get_cached", return_value=None):
|
||
with patch.object(monitor, "_check_connectivity", return_value=True):
|
||
with patch.object(monitor, "_check_inference", return_value=healthy):
|
||
with patch.object(monitor, "_set_cached", side_effect=RuntimeError("Redis down")):
|
||
with patch.object(monitor, "_write_audit_log", return_value=None):
|
||
# 不應 raise
|
||
report = await monitor.check(HOST)
|
||
|
||
assert report.status == HealthStatus.HEALTHY
|
||
|
||
|
||
# =============================================================================
|
||
# HealthReport 輔助方法
|
||
# =============================================================================
|
||
|
||
|
||
class TestHealthReport:
|
||
"""HealthReport dataclass 邏輯"""
|
||
|
||
def test_is_usable_healthy(self):
|
||
assert HealthReport(status=HealthStatus.HEALTHY).is_usable() is True
|
||
|
||
def test_is_usable_slow(self):
|
||
assert HealthReport(status=HealthStatus.SLOW).is_usable() is True
|
||
|
||
def test_is_usable_degraded(self):
|
||
assert HealthReport(status=HealthStatus.DEGRADED).is_usable() is True
|
||
|
||
def test_is_usable_offline(self):
|
||
assert HealthReport(status=HealthStatus.OFFLINE).is_usable() is False
|
||
|
||
def test_to_dict_structure(self):
|
||
report = HealthReport(
|
||
status=HealthStatus.SLOW,
|
||
host=HOST,
|
||
latency_ms=15500.0,
|
||
reason="slow zone",
|
||
)
|
||
d = report.to_dict()
|
||
assert d["status"] == "slow"
|
||
assert d["host"] == HOST
|
||
assert d["latency_ms"] == 15500.0
|
||
assert d["reason"] == "slow zone"
|
||
|
||
|
||
# =============================================================================
|
||
# Singleton
|
||
# =============================================================================
|
||
|
||
|
||
def test_singleton_returns_same_instance():
|
||
m1 = get_ollama_health_monitor()
|
||
m2 = get_ollama_health_monitor()
|
||
assert m1 is m2
|
||
|
||
|
||
def test_reset_singleton_gives_new_instance():
|
||
m1 = get_ollama_health_monitor()
|
||
reset_ollama_health_monitor()
|
||
m2 = get_ollama_health_monitor()
|
||
assert m1 is not m2
|