#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ ADR-027 Phase 2 — Ollama host resolve / health probe / unhealthy mark 驗證 涵蓋: B3 — _is_reachable HTTP probe(取代純 TCP) B4 — mark_unhealthy / _is_unhealthy / cache 失效互動 B1/B2 — config.get_ollama_host / get_embedding_host / get_hermes_url lazy 行為 """ import os import time import importlib from unittest.mock import patch, MagicMock import pytest # ───────────────────────────────────────────────────────────────────────────── # 必要:每個 test 前清空 ollama_service 的 module-level cache,避免 cross-test 污染 # ───────────────────────────────────────────────────────────────────────────── @pytest.fixture(autouse=True) def _reset_ollama_caches(): from services import ollama_service as oss oss._resolved_host_cache['host'] = None oss._resolved_host_cache['ts'] = 0 oss._unhealthy_marks.clear() yield oss._resolved_host_cache['host'] = None oss._resolved_host_cache['ts'] = 0 oss._unhealthy_marks.clear() # ═══════════════════════════════════════════════════════════════════════════ # B3 — HTTP probe 取代 TCP # ═══════════════════════════════════════════════════════════════════════════ def test_resolve_picks_primary_when_http_200(): from services import ollama_service as oss fake_resp = MagicMock(status_code=200) with patch('services.ollama_service.requests.get', return_value=fake_resp) as mock_get: host = oss.resolve_ollama_host( primary='http://primary.example:11434', secondary='http://secondary.example:11434', fallback='http://fallback.example:11434', ) assert host == 'http://primary.example:11434' # 驗證有打 /api/version called_url = mock_get.call_args[0][0] assert called_url == 'http://primary.example:11434/api/version' def test_resolve_falls_back_when_http_500(): """B3 關鍵:純 TCP 通但 HTTP 500(process 卡死)也視為 unreachable""" from services import ollama_service as oss fake_resp = MagicMock(status_code=500) with patch('services.ollama_service.requests.get', return_value=fake_resp): host = oss.resolve_ollama_host( primary='http://primary.example:11434', secondary='http://secondary.example:11434', fallback='http://fallback.example:11434', ) assert host == 'http://fallback.example:11434' def test_resolve_falls_back_on_request_exception(): from services import ollama_service as oss with patch('services.ollama_service.requests.get', side_effect=Exception('connect timeout')): host = oss.resolve_ollama_host( primary='http://primary.example:11434', secondary='http://secondary.example:11434', fallback='http://fallback.example:11434', ) assert host == 'http://fallback.example:11434' def test_resolve_uses_primary_proxy_rescue_before_secondary(): """正式主機直連 GCP-A 不通時,先走 110 primary proxy,再考慮 GCP-B。""" from services import ollama_service as oss fake_ok = MagicMock(status_code=200) seen_urls = [] def fake_get(url, timeout=None): seen_urls.append(url) if url == f"{oss.OLLAMA_HOST_PRIMARY}/api/version": raise Exception("primary direct timeout") if url == f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version": return fake_ok raise AssertionError(f"should not reach {url}") with patch('services.ollama_service.requests.get', side_effect=fake_get): host = oss.resolve_ollama_host() assert host == oss.OLLAMA_HOST_PRIMARY_PROXY assert seen_urls == [ f"{oss.OLLAMA_HOST_PRIMARY}/api/version", f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version", ] def test_resolve_skips_recent_unhealthy_direct_primary_and_uses_proxy(monkeypatch): """host_health 已判定 GCP-A direct 不健康時,不再等待 direct timeout。""" from datetime import datetime from services import ollama_service as oss class FakeResult: def fetchone(self): return (False, "ConnectTimeout", datetime.now()) class FakeSession: def execute(self, *args, **kwargs): return FakeResult() def close(self): pass fake_ok = MagicMock(status_code=200) seen_urls = [] def fake_get(url, timeout=None): seen_urls.append(url) if url == f"{oss.OLLAMA_HOST_PRIMARY}/api/version": raise AssertionError("recent unhealthy direct host should be skipped") if url == f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version": return fake_ok raise AssertionError(f"should not reach {url}") monkeypatch.setenv("OLLAMA_RESOLVE_HOST_HEALTH_SKIP_ENABLED", "true") monkeypatch.setenv("OLLAMA_RESOLVE_HOST_HEALTH_SKIP_WINDOW_MINUTES", "20") monkeypatch.setattr("database.manager.get_session", lambda: FakeSession()) with patch('services.ollama_service.requests.get', side_effect=fake_get): host = oss.resolve_ollama_host() assert host == oss.OLLAMA_HOST_PRIMARY_PROXY assert seen_urls == [f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version"] def test_recent_direct_host_unhealthy_matches_actual_host_url(monkeypatch): """舊 GCP-A 的 unhealthy 紀錄不能誤擋新 GCP-A。""" from datetime import datetime from services import ollama_service as oss seen_params = [] class FakeResult: def __init__(self, row): self.row = row def fetchone(self): return self.row class FakeSession: def execute(self, _statement, params): seen_params.append(dict(params)) if params.get("host_url") == oss.OLLAMA_HOST_PRIMARY: return FakeResult((False, "ConnectTimeout", datetime.now())) return FakeResult(None) def close(self): pass monkeypatch.setenv("OLLAMA_RESOLVE_HOST_HEALTH_SKIP_ENABLED", "true") monkeypatch.setattr("database.manager.get_session", lambda: FakeSession()) assert oss._recent_direct_host_unhealthy(oss.OLLAMA_HOST_PRIMARY) is True assert seen_params == [ {"host_label": "Primary (GCP)", "host_url": oss.OLLAMA_HOST_PRIMARY} ] def test_retired_gcp_a_host_is_not_approved(monkeypatch): """已退役 GCP-A 不可再被 env 白名單接受。""" from services import ollama_service as oss monkeypatch.setenv("OLLAMA_HOST_PRIMARY", "http://34.143.170.20:11434") assert oss.is_approved_ollama_host("http://34.143.170.20:11434") is False assert oss.approved_ollama_env("OLLAMA_HOST_PRIMARY", oss.OLLAMA_HOST_PRIMARY) == oss.OLLAMA_HOST_PRIMARY # ═══════════════════════════════════════════════════════════════════════════ # B4 — mark_unhealthy 行為 # ═══════════════════════════════════════════════════════════════════════════ def test_mark_unhealthy_triggers_immediate_fallback(): """B4: primary 標 unhealthy 後,三主機架構下應跳到 secondary(非 fallback)""" from services import ollama_service as oss oss.mark_unhealthy('http://primary.example:11434') # 即使 HTTP 探測會回 200,被標 unhealthy 的主機仍應跳過 fake_resp = MagicMock(status_code=200) with patch('services.ollama_service.requests.get', return_value=fake_resp): host = oss.resolve_ollama_host( primary='http://primary.example:11434', secondary='http://secondary.example:11434', fallback='http://fallback.example:11434', ) # 三主機架構:primary 不可用 → secondary(非 fallback) assert host == 'http://secondary.example:11434' def test_mark_unhealthy_primary_and_secondary_falls_to_fallback(): """B4 三主機級聯:primary+secondary 都掛才走 fallback""" from services import ollama_service as oss oss.mark_unhealthy('http://primary.example:11434') oss.mark_unhealthy('http://secondary.example:11434') fake_resp = MagicMock(status_code=200) with patch('services.ollama_service.requests.get', return_value=fake_resp): host = oss.resolve_ollama_host( primary='http://primary.example:11434', secondary='http://secondary.example:11434', fallback='http://fallback.example:11434', ) assert host == 'http://fallback.example:11434' def test_unhealthy_mark_expires_after_ttl(): from services import ollama_service as oss oss.mark_unhealthy('http://primary.example:11434') # 偽造時間 31 秒前打的標 oss._unhealthy_marks['http://primary.example:11434'] = time.time() - 31 fake_resp = MagicMock(status_code=200) with patch('services.ollama_service.requests.get', return_value=fake_resp): host = oss.resolve_ollama_host( primary='http://primary.example:11434', secondary='http://secondary.example:11434', fallback='http://fallback.example:11434', ) assert host == 'http://primary.example:11434' def test_mark_unhealthy_invalidates_resolved_cache(): """B4 重點:被標 unhealthy 後,舊 cache 不能再返回 primary""" from services import ollama_service as oss # 先讓 primary 被 cache fake_resp_ok = MagicMock(status_code=200) with patch('services.ollama_service.requests.get', return_value=fake_resp_ok): first = oss.resolve_ollama_host( primary='http://primary.example:11434', secondary='http://secondary.example:11434', fallback='http://fallback.example:11434', ) assert first == 'http://primary.example:11434' # 模擬 generate 失敗,標 unhealthy oss.mark_unhealthy('http://primary.example:11434') # 即使 cache 還在 TTL 內(120s),下一次 resolve 必須跳過 primary # 三主機架構下應跳到 secondary fake_resp_again = MagicMock(status_code=200) with patch('services.ollama_service.requests.get', return_value=fake_resp_again): second = oss.resolve_ollama_host( primary='http://primary.example:11434', secondary='http://secondary.example:11434', fallback='http://fallback.example:11434', ) assert second == 'http://secondary.example:11434' def test_mark_unhealthy_handles_trailing_slash(): from services import ollama_service as oss oss.mark_unhealthy('http://primary.example:11434/') assert oss._is_unhealthy('http://primary.example:11434') is True assert oss._is_unhealthy('http://primary.example:11434/') is True def test_mark_unhealthy_ignores_empty(): from services import ollama_service as oss oss.mark_unhealthy('') oss.mark_unhealthy(None) assert len(oss._unhealthy_marks) == 0 # ═══════════════════════════════════════════════════════════════════════════ # B1/B2 — config lazy getters # ═══════════════════════════════════════════════════════════════════════════ def test_get_ollama_host_uses_approved_env_when_set(monkeypatch): monkeypatch.setenv('OLLAMA_HOST', 'http://34.21.145.224:11434') import config importlib.reload(config) # 確保 env 變更生效 assert config.get_ollama_host() == 'http://34.21.145.224:11434' def test_get_ollama_host_rejects_unapproved_env(monkeypatch): monkeypatch.setenv('OLLAMA_HOST', 'http://192.168.0.188:11434') fake_resp = MagicMock(status_code=200) with patch('services.ollama_service.requests.get', return_value=fake_resp): import config importlib.reload(config) host = config.get_ollama_host() assert host == 'http://34.87.90.216:11434' def test_get_ollama_host_falls_back_to_resolve_without_env(monkeypatch): monkeypatch.delenv('OLLAMA_HOST', raising=False) fake_resp = MagicMock(status_code=200) with patch('services.ollama_service.requests.get', return_value=fake_resp): # reload to ensure env is honored import config importlib.reload(config) host = config.get_ollama_host() # primary URL 由 env OLLAMA_HOST_PRIMARY 控制(預設 GCP-SSD 34.87.90.216) assert host.startswith('http://') def test_config_ollama_compat_constants_do_not_probe_network(monkeypatch): monkeypatch.setenv('OLLAMA_HOST', 'http://192.168.0.188:11434') monkeypatch.setenv('HERMES_URL', 'http://192.168.0.188:11434') monkeypatch.setenv('EMBEDDING_HOST', 'http://192.168.0.188:11434') monkeypatch.setenv('OLLAMA_HOST_PRIMARY', 'http://34.87.90.216:11434') with patch('services.ollama_service.requests.get') as mock_get: import config importlib.reload(config) mock_get.assert_not_called() assert config.OLLAMA_HOST == 'http://34.87.90.216:11434' assert config.HERMES_URL == 'http://34.87.90.216:11434' assert config.EMBEDDING_HOST == 'http://34.87.90.216:11434' def test_get_embedding_host_prefers_env(monkeypatch): monkeypatch.setenv('EMBEDDING_HOST', 'http://192.168.0.111:11434') import config importlib.reload(config) assert config.get_embedding_host() == 'http://192.168.0.111:11434' def test_get_hermes_url_prefers_env(monkeypatch): monkeypatch.setenv('HERMES_URL', 'http://34.87.90.216:11434') import config importlib.reload(config) assert config.get_hermes_url() == 'http://34.87.90.216:11434' # ═══════════════════════════════════════════════════════════════════════════ # Cache TTL 行為 # ═══════════════════════════════════════════════════════════════════════════ def test_resolve_uses_cache_within_ttl(): """同一 TTL 內第二次 resolve 不應再呼叫 HTTP""" from services import ollama_service as oss fake_resp = MagicMock(status_code=200) with patch('services.ollama_service.requests.get', return_value=fake_resp) as mock_get: oss.resolve_ollama_host( primary='http://primary.example:11434', secondary='http://secondary.example:11434', fallback='http://fallback.example:11434', ) oss.resolve_ollama_host( primary='http://primary.example:11434', secondary='http://secondary.example:11434', fallback='http://fallback.example:11434', ) # cache 命中 → 第二次不打 HTTP assert mock_get.call_count == 1 def test_111_fallback_downgrades_heavy_model_and_shortens_keep_alive(monkeypatch): """111 是 final fallback;14B+ 模型不得長駐這台 16GB Mac。""" from services import ollama_service as oss monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "qwen2.5:7b-instruct") monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m") monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20) monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096) monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512) monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("qwen3:14b",)) fake_resp = MagicMock(status_code=200) fake_resp.json.return_value = { "response": "ok", "prompt_eval_count": 3, "eval_count": 2, "total_duration": 1_000_000_000, } svc = oss.OllamaService(host="http://192.168.0.111:11434", model="qwen3:14b") with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post: resp = svc.generate( "hi", timeout=120, keep_alive="24h", options={"num_ctx": 131072, "num_predict": 4096}, ) payload = mock_post.call_args.kwargs["json"] assert payload["model"] == "qwen2.5:7b-instruct" assert payload["keep_alive"] == "5m" assert payload["options"]["num_ctx"] == 4096 assert payload["options"]["num_predict"] == 512 assert mock_post.call_args.kwargs["timeout"] == 20 assert resp.model == "qwen2.5:7b-instruct" def test_111_fallback_keeps_light_model_but_caps_timeout(monkeypatch): from requests import Timeout from services import ollama_service as oss monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m") monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20) monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096) monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512) svc = oss.OllamaService(host="http://192.168.0.111:11434", model="llama3.2:latest") with patch("services.ollama_service.requests.post", side_effect=Timeout): resp = svc.generate("hi", timeout=120, keep_alive="24h") assert resp.success is False assert "timeout (20s)" in resp.error def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch): from services import ollama_service as oss monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "llama3.2:latest") monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m") monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20) monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096) monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512) monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("hermes3:*",)) fake_resp = MagicMock(status_code=200) fake_resp.json.return_value = { "response": "ok", "prompt_eval_count": 3, "eval_count": 2, "total_duration": 1_000_000_000, } svc = oss.OllamaService(host="http://192.168.0.111:11434", model="hermes3:latest") with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post: resp = svc.generate("hi", timeout=120, keep_alive="24h") payload = mock_post.call_args.kwargs["json"] assert payload["model"] == "llama3.2:latest" assert payload["keep_alive"] == "5m" assert payload["options"]["num_ctx"] == 4096 assert payload["options"]["num_predict"] == 512 assert resp.model == "llama3.2:latest"