Files
awoooi/apps/api/tests/test_ollama_health_monitor.py
Your Name b1ef05fa8c
Some checks failed
Code Review / ai-code-review (push) Successful in 50s
CD Pipeline / tests (push) Failing after 1m14s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
feat(ollama): ADR-110 GCP 三層容災架構(GCP-A → GCP-B → Local → Gemini)
## 變更摘要
- Primary: http://34.143.170.20:11434 (GCP-A SSD, 9x 載速 + 2x 推理)
- Secondary: http://34.21.145.224:11434 (GCP-B SSD)
- Fallback: http://192.168.0.111:11434 (M1 Pro Local HDD,最後防線)
- 廢止 ADR-105「111 唯一鐵律」,新建 ADR-110

## 核心改動
- config.py: 新增 OLLAMA_SECONDARY_URL;validator 加 GCP IP 白名單(34.143.170.20, 34.21.145.224)
- ollama_failover_manager.py: 三層 Ollama 決策矩陣;並行健康檢查三台;health_111 → health_gcp_a
- ollama_health_monitor.py: host label 萃取改為通用版(支援 GCP 公網 IP)
- failover_alerter.py: 故障/恢復主機動態顯示,不再硬編碼「Ollama 111 (GPU)」
- ollama_auto_recovery.py: notify_recovery 改為 ollama_gcp_a;recovered_host 動態
- k8s/awoooi-prod: configmap + deployment + network-policy 同步更新(egress 加 GCP /32)
- 服務層: 10 個服務檔案硬編碼 192.168.0.111 改為讀 settings.OLLAMA_URL
- 測試: URL 常數更新,新增三層容災場景,GCP IP 白名單驗證測試

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 22:49:23 +08:00

404 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# apps/api/tests/test_ollama_health_monitor.py | 2026-04-25 @ Asia/Taipei
# Created 2026-04-25 P1.1c by Claude Engineer-C
# 2026-05-03 ogt: ADR-110 GCP 三層容災HOST 更新為 GCP-A Primary
"""
OllamaHealthMonitor 單元測試 - P1.1c
=====================================
測試覆蓋:
- 4 種健康狀態HEALTHY / SLOW / DEGRADED / OFFLINE
- 連通性失敗 → OFFLINE
- 推理超時asyncio.TimeoutError / httpx.TimeoutException→ DEGRADED
- 推理回傳非 200 → DEGRADED
- Redis 快取命中from_cache=True
- Redis 快取失敗時降級直接執行(不 crash
- is_usable() 邏輯
測試分類unitmock httpx無 DB / Redis 依賴)
"""
from __future__ import annotations
import asyncio
import json
import time
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
from src.services.ollama_health_monitor import (
LATENCY_HEALTHY_THRESHOLD_MS,
LATENCY_SLOW_THRESHOLD_MS,
HealthReport,
HealthStatus,
OllamaHealthMonitor,
get_ollama_health_monitor,
reset_ollama_health_monitor,
)
# =============================================================================
# Fixtures
# =============================================================================
HOST = "http://34.143.170.20:11434" # GCP-A PrimaryADR-110 2026-05-03
HOST_188 = "http://192.168.0.188:11434" # 歷史遺留參考常數(已移出主路由)
@pytest.fixture(autouse=True)
def reset_singleton():
"""每個測試後重置 singleton"""
yield
reset_ollama_health_monitor()
@pytest.fixture
def monitor():
return OllamaHealthMonitor()
def _mock_tags_ok() -> MagicMock:
"""/api/tags 回傳 200"""
resp = MagicMock()
resp.status_code = 200
return resp
def _mock_generate_ok(latency_s: float = 0.5) -> tuple[MagicMock, float]:
"""
/api/generate 回傳 200模擬給定延遲。
返回 (response_mock, latency_s),由 test 自行控制 time.perf_counter patch。
"""
resp = MagicMock()
resp.status_code = 200
resp.json.return_value = {"response": "ok"}
return resp
# =============================================================================
# 層 1連通性
# =============================================================================
class TestConnectivity:
"""_check_connectivity 各種情況"""
@pytest.mark.asyncio
async def test_connectivity_success(self, monitor):
"""/api/tags 200 → 連通性通過"""
mock_resp = _mock_tags_ok()
mock_client = AsyncMock()
mock_client.get = AsyncMock(return_value=mock_resp)
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
with patch("httpx.AsyncClient", return_value=mock_client):
result = await monitor._check_connectivity(HOST)
assert result is True
@pytest.mark.asyncio
async def test_connectivity_non_200(self, monitor):
"""/api/tags 非 200 → 連通性失敗"""
mock_resp = MagicMock()
mock_resp.status_code = 503
mock_client = AsyncMock()
mock_client.get = AsyncMock(return_value=mock_resp)
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
with patch("httpx.AsyncClient", return_value=mock_client):
result = await monitor._check_connectivity(HOST)
assert result is False
@pytest.mark.asyncio
async def test_connectivity_timeout(self, monitor):
"""連線 timeout → 返回 False不 raise"""
mock_client = AsyncMock()
mock_client.get = AsyncMock(side_effect=httpx.TimeoutException("timeout"))
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
with patch("httpx.AsyncClient", return_value=mock_client):
result = await monitor._check_connectivity(HOST)
assert result is False
@pytest.mark.asyncio
async def test_connectivity_connect_error(self, monitor):
"""連線拒絕 → 返回 False不 raise"""
mock_client = AsyncMock()
mock_client.get = AsyncMock(side_effect=httpx.ConnectError("refused"))
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
with patch("httpx.AsyncClient", return_value=mock_client):
result = await monitor._check_connectivity(HOST)
assert result is False
# =============================================================================
# 層 2推理測試分級
# =============================================================================
class TestInference:
"""_check_inference 延遲分級"""
def _make_mock_client(self, status_code: int = 200) -> AsyncMock:
resp = MagicMock()
resp.status_code = status_code
resp.json.return_value = {"response": "ok"}
mock_client = AsyncMock()
mock_client.post = AsyncMock(return_value=resp)
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
return mock_client
@pytest.mark.asyncio
async def test_inference_healthy(self, monitor):
"""推理延遲 <10s → HEALTHY"""
mock_client = self._make_mock_client()
# 模擬 0.5s 延遲(< 10s threshold
call_count = [0]
original_perf_counter = time.perf_counter
def _fake_perf_counter():
call_count[0] += 1
# 第一次呼叫start第二次呼叫end = start + 0.5s
if call_count[0] == 1:
return 0.0
return 0.5
with patch("httpx.AsyncClient", return_value=mock_client):
with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
report = await monitor._check_inference(HOST)
assert report.status == HealthStatus.HEALTHY
assert report.latency_ms == pytest.approx(500.0, abs=10)
@pytest.mark.asyncio
async def test_inference_slow(self, monitor):
"""推理延遲 10-30s → SLOW"""
mock_client = self._make_mock_client()
call_count = [0]
def _fake_perf_counter():
call_count[0] += 1
if call_count[0] == 1:
return 0.0
return 15.0 # 15s → SLOW zone
with patch("httpx.AsyncClient", return_value=mock_client):
with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
report = await monitor._check_inference(HOST)
assert report.status == HealthStatus.SLOW
assert report.latency_ms == pytest.approx(15_000.0, abs=10)
@pytest.mark.asyncio
async def test_inference_degraded_by_latency(self, monitor):
"""推理延遲 >30s → DEGRADED"""
mock_client = self._make_mock_client()
call_count = [0]
def _fake_perf_counter():
call_count[0] += 1
if call_count[0] == 1:
return 0.0
return 32.0 # 32s → DEGRADED
with patch("httpx.AsyncClient", return_value=mock_client):
with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
report = await monitor._check_inference(HOST)
assert report.status == HealthStatus.DEGRADED
@pytest.mark.asyncio
async def test_inference_timeout_degraded(self, monitor):
"""推理 TimeoutException → DEGRADED不 crash不 OFFLINE"""
mock_client = AsyncMock()
mock_client.post = AsyncMock(side_effect=httpx.TimeoutException("timeout"))
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
with patch("httpx.AsyncClient", return_value=mock_client):
report = await monitor._check_inference(HOST)
assert report.status == HealthStatus.DEGRADED
assert "超時" in report.reason or "timeout" in report.reason.lower()
@pytest.mark.asyncio
async def test_inference_asyncio_timeout_degraded(self, monitor):
"""推理 asyncio.TimeoutError → DEGRADED"""
mock_client = AsyncMock()
mock_client.post = AsyncMock(side_effect=asyncio.TimeoutError())
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
with patch("httpx.AsyncClient", return_value=mock_client):
report = await monitor._check_inference(HOST)
assert report.status == HealthStatus.DEGRADED
@pytest.mark.asyncio
async def test_inference_connect_error_offline(self, monitor):
"""推理 ConnectError → OFFLINE連通性層已放行但推理層掉線"""
mock_client = AsyncMock()
mock_client.post = AsyncMock(side_effect=httpx.ConnectError("refused"))
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
with patch("httpx.AsyncClient", return_value=mock_client):
report = await monitor._check_inference(HOST)
assert report.status == HealthStatus.OFFLINE
@pytest.mark.asyncio
async def test_inference_non_200_degraded(self, monitor):
"""推理回傳非 200 → DEGRADED"""
mock_client = self._make_mock_client(status_code=503)
call_count = [0]
def _fake_perf_counter():
call_count[0] += 1
return float(call_count[0] - 1) * 0.1
with patch("httpx.AsyncClient", return_value=mock_client):
with patch("src.services.ollama_health_monitor.time.perf_counter", _fake_perf_counter):
report = await monitor._check_inference(HOST)
assert report.status == HealthStatus.DEGRADED
assert "503" in report.reason
# =============================================================================
# check() 整合(含 Redis 快取)
# =============================================================================
class TestCheckWithCache:
"""check() 方法含 Redis 快取邏輯"""
@pytest.mark.asyncio
async def test_check_offline_when_connectivity_fails(self, monitor):
"""連通性失敗 → 最終 OFFLINE"""
with patch.object(monitor, "_check_connectivity", return_value=False):
with patch.object(monitor, "_get_cached", return_value=None):
with patch.object(monitor, "_set_cached", return_value=None):
with patch.object(monitor, "_write_audit_log", return_value=None):
report = await monitor.check(HOST)
assert report.status == HealthStatus.OFFLINE
assert report.host == HOST
@pytest.mark.asyncio
async def test_check_healthy_when_inference_fast(self, monitor):
"""連通性通過 + 推理快 → HEALTHY"""
healthy_report = HealthReport(
status=HealthStatus.HEALTHY,
latency_ms=500.0,
)
with patch.object(monitor, "_check_connectivity", return_value=True):
with patch.object(monitor, "_check_inference", return_value=healthy_report):
with patch.object(monitor, "_get_cached", return_value=None):
with patch.object(monitor, "_set_cached", return_value=None):
with patch.object(monitor, "_write_audit_log", return_value=None):
report = await monitor.check(HOST)
assert report.status == HealthStatus.HEALTHY
assert report.host == HOST
@pytest.mark.asyncio
async def test_check_returns_cached(self, monitor):
"""快取命中 → 直接返回快取結果from_cache=True"""
cached = HealthReport(
status=HealthStatus.HEALTHY,
host=HOST,
latency_ms=300.0,
)
with patch.object(monitor, "_get_cached", return_value=cached):
report = await monitor.check(HOST)
assert report.from_cache is True
assert report.status == HealthStatus.HEALTHY
@pytest.mark.asyncio
async def test_check_proceeds_when_cache_get_fails(self, monitor):
"""Redis get 失敗 → 降級直接執行 check不 crash"""
with patch.object(monitor, "_get_cached", return_value=None):
with patch.object(monitor, "_check_connectivity", return_value=False):
with patch.object(monitor, "_set_cached", return_value=None):
with patch.object(monitor, "_write_audit_log", return_value=None):
report = await monitor.check(HOST)
assert report.status == HealthStatus.OFFLINE # 正常降級,未 crash
@pytest.mark.asyncio
async def test_cache_set_failure_does_not_crash(self, monitor):
"""Redis set 失敗 → 靜默,結果仍正常返回"""
healthy = HealthReport(status=HealthStatus.HEALTHY, latency_ms=200.0)
with patch.object(monitor, "_get_cached", return_value=None):
with patch.object(monitor, "_check_connectivity", return_value=True):
with patch.object(monitor, "_check_inference", return_value=healthy):
with patch.object(monitor, "_set_cached", side_effect=RuntimeError("Redis down")):
with patch.object(monitor, "_write_audit_log", return_value=None):
# 不應 raise
report = await monitor.check(HOST)
assert report.status == HealthStatus.HEALTHY
# =============================================================================
# HealthReport 輔助方法
# =============================================================================
class TestHealthReport:
"""HealthReport dataclass 邏輯"""
def test_is_usable_healthy(self):
assert HealthReport(status=HealthStatus.HEALTHY).is_usable() is True
def test_is_usable_slow(self):
assert HealthReport(status=HealthStatus.SLOW).is_usable() is True
def test_is_usable_degraded(self):
assert HealthReport(status=HealthStatus.DEGRADED).is_usable() is True
def test_is_usable_offline(self):
assert HealthReport(status=HealthStatus.OFFLINE).is_usable() is False
def test_to_dict_structure(self):
report = HealthReport(
status=HealthStatus.SLOW,
host=HOST,
latency_ms=15500.0,
reason="slow zone",
)
d = report.to_dict()
assert d["status"] == "slow"
assert d["host"] == HOST
assert d["latency_ms"] == 15500.0
assert d["reason"] == "slow zone"
# =============================================================================
# Singleton
# =============================================================================
def test_singleton_returns_same_instance():
m1 = get_ollama_health_monitor()
m2 = get_ollama_health_monitor()
assert m1 is m2
def test_reset_singleton_gives_new_instance():
m1 = get_ollama_health_monitor()
reset_ollama_health_monitor()
m2 = get_ollama_health_monitor()
assert m1 is not m2