Files
awoooi/apps/api/tests/test_ollama_health_monitor.py
Your Name 9ccf230a5f
Some checks failed
CD Pipeline / tests (push) Successful in 1m24s
Code Review / ai-code-review (push) Successful in 17s
CD Pipeline / build-and-deploy (push) Successful in 3m37s
CD Pipeline / post-deploy-checks (push) Has been cancelled
fix(ollama): cooldown provider health probes
2026-05-25 12:25:32 +08:00

429 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# apps/api/tests/test_ollama_health_monitor.py | 2026-04-25 @ Asia/Taipei
# Created 2026-04-25 P1.1c by Claude Engineer-C
# 2026-05-03 ogt: ADR-110 GCP 三層容災HOST 更新為 GCP-A Primary
"""
OllamaHealthMonitor 單元測試 - P1.1c
=====================================
測試覆蓋:
- 4 種健康狀態HEALTHY / SLOW / DEGRADED / OFFLINE
- 連通性失敗 → OFFLINE
- 推理超時asyncio.TimeoutError / httpx.TimeoutException→ DEGRADED
- 推理回傳非 200 → DEGRADED
- Redis 快取命中from_cache=True
- Redis 快取失敗時降級直接執行(不 crash
- is_usable() 邏輯
測試分類unitmock httpx無 DB / Redis 依賴)
"""
from __future__ import annotations
import asyncio
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
from src.services.ollama_endpoint_circuit_breaker import (
is_ollama_endpoint_blocked,
record_ollama_endpoint_failure,
reset_ollama_endpoint_cooldown_for_tests,
)
from src.services.ollama_health_monitor import (
HealthReport,
HealthStatus,
OllamaHealthMonitor,
get_ollama_health_monitor,
reset_ollama_health_monitor,
)
# =============================================================================
# Fixtures
# =============================================================================
HOST = "http://34.143.170.20:11434" # GCP-A PrimaryADR-110 2026-05-03
HOST_LOCAL = "http://192.168.0.111:11434" # Local fallback已移出 188 主路由)
@pytest.fixture(autouse=True)
def reset_singleton():
"""每個測試後重置 singleton"""
reset_ollama_endpoint_cooldown_for_tests()
yield
reset_ollama_health_monitor()
reset_ollama_endpoint_cooldown_for_tests()
@pytest.fixture
def monitor():
return OllamaHealthMonitor()
def _mock_tags_ok() -> MagicMock:
"""/api/tags 回傳 200"""
resp = MagicMock()
resp.status_code = 200
return resp
def _mock_generate_ok(latency_s: float = 0.5) -> tuple[MagicMock, float]:
"""
/api/generate 回傳 200模擬給定延遲。
返回 (response_mock, latency_s),由 test 自行控制 time.perf_counter patch。
"""
resp = MagicMock()
resp.status_code = 200
resp.json.return_value = {"response": "ok"}
return resp
# =============================================================================
# 層 1連通性
# =============================================================================
class TestConnectivity:
"""_check_connectivity 各種情況"""
@pytest.mark.asyncio
async def test_connectivity_success(self, monitor):
"""/api/tags 200 → 連通性通過"""
mock_resp = _mock_tags_ok()
mock_client = AsyncMock()
mock_client.get = AsyncMock(return_value=mock_resp)
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
with patch("httpx.AsyncClient", return_value=mock_client):
result = await monitor._check_connectivity(HOST)
assert result is True
@pytest.mark.asyncio
async def test_connectivity_non_200(self, monitor):
"""/api/tags 非 200 → 連通性失敗"""
mock_resp = MagicMock()
mock_resp.status_code = 503
mock_client = AsyncMock()
mock_client.get = AsyncMock(return_value=mock_resp)
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
with patch("httpx.AsyncClient", return_value=mock_client):
result = await monitor._check_connectivity(HOST)
assert result is False
@pytest.mark.asyncio
async def test_run_checks_records_connectivity_failure_cooldown(self, monitor, monkeypatch):
"""連通性失敗會建立短暫 cooldown避免重複撞同一個 upstream。"""
monkeypatch.setattr(monitor, "_check_connectivity", AsyncMock(return_value=False))
report = await monitor._run_checks(HOST)
assert report.status == HealthStatus.OFFLINE
assert is_ollama_endpoint_blocked(HOST)
@pytest.mark.asyncio
async def test_run_checks_respects_existing_failure_cooldown(self, monitor, monkeypatch):
"""cooldown 中直接回報 OFFLINE不再打 /api/tags。"""
record_ollama_endpoint_failure(HOST)
connectivity = AsyncMock(return_value=True)
monkeypatch.setattr(monitor, "_check_connectivity", connectivity)
report = await monitor._run_checks(HOST)
assert report.status == HealthStatus.OFFLINE
assert "cooldown" in report.reason
connectivity.assert_not_awaited()
@pytest.mark.asyncio
async def test_connectivity_timeout(self, monitor):
"""連線 timeout → 返回 False不 raise"""
mock_client = AsyncMock()
mock_client.get = AsyncMock(side_effect=httpx.TimeoutException("timeout"))
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
with patch("httpx.AsyncClient", return_value=mock_client):
result = await monitor._check_connectivity(HOST)
assert result is False
@pytest.mark.asyncio
async def test_connectivity_connect_error(self, monitor):
"""連線拒絕 → 返回 False不 raise"""
mock_client = AsyncMock()
mock_client.get = AsyncMock(side_effect=httpx.ConnectError("refused"))
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
with patch("httpx.AsyncClient", return_value=mock_client):
result = await monitor._check_connectivity(HOST)
assert result is False
# =============================================================================
# 層 2推理測試分級
# =============================================================================
class TestInference:
"""_check_inference 延遲分級"""
def _make_mock_client(self, status_code: int = 200) -> AsyncMock:
resp = MagicMock()
resp.status_code = status_code
resp.json.return_value = {"response": "ok"}
mock_client = AsyncMock()
mock_client.post = AsyncMock(return_value=resp)
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
return mock_client
@pytest.mark.asyncio
async def test_inference_healthy(self, monitor):
"""推理延遲 <10s → HEALTHY"""
mock_client = self._make_mock_client()
# 模擬 0.5s 延遲(< 10s threshold
call_count = [0]
def _fake_perf_counter():
call_count[0] += 1
# 第一次呼叫start第二次呼叫end = start + 0.5s
if call_count[0] == 1:
return 0.0
return 0.5
with patch("httpx.AsyncClient", return_value=mock_client):
with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
report = await monitor._check_inference(HOST)
assert report.status == HealthStatus.HEALTHY
assert report.latency_ms == pytest.approx(500.0, abs=10)
@pytest.mark.asyncio
async def test_inference_slow(self, monitor):
"""推理延遲 10-30s → SLOW"""
mock_client = self._make_mock_client()
call_count = [0]
def _fake_perf_counter():
call_count[0] += 1
if call_count[0] == 1:
return 0.0
return 15.0 # 15s → SLOW zone
with patch("httpx.AsyncClient", return_value=mock_client):
with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
report = await monitor._check_inference(HOST)
assert report.status == HealthStatus.SLOW
assert report.latency_ms == pytest.approx(15_000.0, abs=10)
@pytest.mark.asyncio
async def test_inference_degraded_by_latency(self, monitor):
"""推理延遲 >30s → DEGRADED"""
mock_client = self._make_mock_client()
call_count = [0]
def _fake_perf_counter():
call_count[0] += 1
if call_count[0] == 1:
return 0.0
return 32.0 # 32s → DEGRADED
with patch("httpx.AsyncClient", return_value=mock_client):
with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
report = await monitor._check_inference(HOST)
assert report.status == HealthStatus.DEGRADED
@pytest.mark.asyncio
async def test_inference_timeout_degraded(self, monitor):
"""推理 TimeoutException → DEGRADED不 crash不 OFFLINE"""
mock_client = AsyncMock()
mock_client.post = AsyncMock(side_effect=httpx.TimeoutException("timeout"))
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
with patch("httpx.AsyncClient", return_value=mock_client):
report = await monitor._check_inference(HOST)
assert report.status == HealthStatus.DEGRADED
assert "超時" in report.reason or "timeout" in report.reason.lower()
@pytest.mark.asyncio
async def test_inference_asyncio_timeout_degraded(self, monitor):
"""推理 asyncio.TimeoutError → DEGRADED"""
mock_client = AsyncMock()
mock_client.post = AsyncMock(side_effect=asyncio.TimeoutError())
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
with patch("httpx.AsyncClient", return_value=mock_client):
report = await monitor._check_inference(HOST)
assert report.status == HealthStatus.DEGRADED
@pytest.mark.asyncio
async def test_inference_connect_error_degraded(self, monitor):
"""推理 ConnectError → DEGRADED連通性已通過視為 socket 瞬斷)"""
mock_client = AsyncMock()
mock_client.post = AsyncMock(side_effect=httpx.ConnectError("refused"))
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
with patch("httpx.AsyncClient", return_value=mock_client):
report = await monitor._check_inference(HOST)
assert report.status == HealthStatus.DEGRADED
@pytest.mark.asyncio
async def test_inference_non_200_degraded(self, monitor):
"""推理回傳非 200 → DEGRADED"""
mock_client = self._make_mock_client(status_code=503)
call_count = [0]
def _fake_perf_counter():
call_count[0] += 1
return float(call_count[0] - 1) * 0.1
with patch("httpx.AsyncClient", return_value=mock_client):
with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
report = await monitor._check_inference(HOST)
assert report.status == HealthStatus.DEGRADED
assert "503" in report.reason
# =============================================================================
# check() 整合(含 Redis 快取)
# =============================================================================
class TestCheckWithCache:
"""check() 方法含 Redis 快取邏輯"""
@pytest.mark.asyncio
async def test_check_offline_when_connectivity_fails(self, monitor):
"""連通性失敗 → 最終 OFFLINE"""
with patch.object(monitor, "_check_connectivity", return_value=False):
with patch.object(monitor, "_get_cached", return_value=None):
with patch.object(monitor, "_set_cached", return_value=None):
with patch.object(monitor, "_write_audit_log", return_value=None):
report = await monitor.check(HOST)
assert report.status == HealthStatus.OFFLINE
assert report.host == HOST
@pytest.mark.asyncio
async def test_check_healthy_when_inference_fast(self, monitor):
"""連通性通過 + 推理快 → HEALTHY"""
healthy_report = HealthReport(
status=HealthStatus.HEALTHY,
latency_ms=500.0,
)
with patch.object(monitor, "_check_connectivity", return_value=True):
with patch.object(monitor, "_check_inference", return_value=healthy_report):
with patch.object(monitor, "_get_cached", return_value=None):
with patch.object(monitor, "_set_cached", return_value=None):
with patch.object(monitor, "_write_audit_log", return_value=None):
report = await monitor.check(HOST)
assert report.status == HealthStatus.HEALTHY
assert report.host == HOST
@pytest.mark.asyncio
async def test_check_returns_cached(self, monitor):
"""快取命中 → 直接返回快取結果from_cache=True"""
cached = HealthReport(
status=HealthStatus.HEALTHY,
host=HOST,
latency_ms=300.0,
)
with patch.object(monitor, "_get_cached", return_value=cached):
report = await monitor.check(HOST)
assert report.from_cache is True
assert report.status == HealthStatus.HEALTHY
@pytest.mark.asyncio
async def test_check_proceeds_when_cache_get_fails(self, monitor):
"""Redis get 失敗 → 降級直接執行 check不 crash"""
with patch.object(monitor, "_get_cached", return_value=None):
with patch.object(monitor, "_check_connectivity", return_value=False):
with patch.object(monitor, "_set_cached", return_value=None):
with patch.object(monitor, "_write_audit_log", return_value=None):
report = await monitor.check(HOST)
assert report.status == HealthStatus.OFFLINE # 正常降級,未 crash
@pytest.mark.asyncio
async def test_cache_set_failure_does_not_crash(self, monitor):
"""Redis set 失敗 → 靜默,結果仍正常返回"""
healthy = HealthReport(status=HealthStatus.HEALTHY, latency_ms=200.0)
with patch.object(monitor, "_get_cached", return_value=None):
with patch.object(monitor, "_check_connectivity", return_value=True):
with patch.object(monitor, "_check_inference", return_value=healthy):
with patch.object(monitor, "_set_cached", side_effect=RuntimeError("Redis down")):
with patch.object(monitor, "_write_audit_log", return_value=None):
# 不應 raise
report = await monitor.check(HOST)
assert report.status == HealthStatus.HEALTHY
# =============================================================================
# HealthReport 輔助方法
# =============================================================================
class TestHealthReport:
"""HealthReport dataclass 邏輯"""
def test_is_usable_healthy(self):
assert HealthReport(status=HealthStatus.HEALTHY).is_usable() is True
def test_is_usable_slow(self):
assert HealthReport(status=HealthStatus.SLOW).is_usable() is True
def test_is_usable_degraded(self):
assert HealthReport(status=HealthStatus.DEGRADED).is_usable() is True
def test_is_usable_offline(self):
assert HealthReport(status=HealthStatus.OFFLINE).is_usable() is False
def test_to_dict_structure(self):
report = HealthReport(
status=HealthStatus.SLOW,
host=HOST,
latency_ms=15500.0,
reason="slow zone",
)
d = report.to_dict()
assert d["status"] == "slow"
assert d["host"] == HOST
assert d["latency_ms"] == 15500.0
assert d["reason"] == "slow zone"
# =============================================================================
# Singleton
# =============================================================================
def test_singleton_returns_same_instance():
m1 = get_ollama_health_monitor()
m2 = get_ollama_health_monitor()
assert m1 is m2
def test_reset_singleton_gives_new_instance():
m1 = get_ollama_health_monitor()
reset_ollama_health_monitor()
m2 = get_ollama_health_monitor()
assert m1 is not m2