Files
ewoooc/tests/test_ollama_resolve.py
OoO ba5fe06b13
Some checks failed
CD Pipeline / deploy (push) Has been cancelled
fix: update ollama primary host
2026-06-18 14:24:55 +08:00

443 lines
19 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
ADR-027 Phase 2 — Ollama host resolve / health probe / unhealthy mark 驗證
涵蓋:
B3 — _is_reachable HTTP probe取代純 TCP
B4 — mark_unhealthy / _is_unhealthy / cache 失效互動
B1/B2 — config.get_ollama_host / get_embedding_host / get_hermes_url lazy 行為
"""
import os
import time
import importlib
from unittest.mock import patch, MagicMock
import pytest
# ─────────────────────────────────────────────────────────────────────────────
# 必要:每個 test 前清空 ollama_service 的 module-level cache避免 cross-test 污染
# ─────────────────────────────────────────────────────────────────────────────
@pytest.fixture(autouse=True)
def _reset_ollama_caches():
from services import ollama_service as oss
oss._resolved_host_cache['host'] = None
oss._resolved_host_cache['ts'] = 0
oss._unhealthy_marks.clear()
yield
oss._resolved_host_cache['host'] = None
oss._resolved_host_cache['ts'] = 0
oss._unhealthy_marks.clear()
# ═══════════════════════════════════════════════════════════════════════════
# B3 — HTTP probe 取代 TCP
# ═══════════════════════════════════════════════════════════════════════════
def test_resolve_picks_primary_when_http_200():
from services import ollama_service as oss
fake_resp = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp) as mock_get:
host = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
assert host == 'http://primary.example:11434'
# 驗證有打 /api/version
called_url = mock_get.call_args[0][0]
assert called_url == 'http://primary.example:11434/api/version'
def test_resolve_falls_back_when_http_500():
"""B3 關鍵:純 TCP 通但 HTTP 500process 卡死)也視為 unreachable"""
from services import ollama_service as oss
fake_resp = MagicMock(status_code=500)
with patch('services.ollama_service.requests.get', return_value=fake_resp):
host = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
assert host == 'http://fallback.example:11434'
def test_resolve_falls_back_on_request_exception():
from services import ollama_service as oss
with patch('services.ollama_service.requests.get', side_effect=Exception('connect timeout')):
host = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
assert host == 'http://fallback.example:11434'
def test_resolve_uses_primary_proxy_rescue_before_secondary():
"""正式主機直連 GCP-A 不通時,先走 110 primary proxy再考慮 GCP-B。"""
from services import ollama_service as oss
fake_ok = MagicMock(status_code=200)
seen_urls = []
def fake_get(url, timeout=None):
seen_urls.append(url)
if url == f"{oss.OLLAMA_HOST_PRIMARY}/api/version":
raise Exception("primary direct timeout")
if url == f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version":
return fake_ok
raise AssertionError(f"should not reach {url}")
with patch('services.ollama_service.requests.get', side_effect=fake_get):
host = oss.resolve_ollama_host()
assert host == oss.OLLAMA_HOST_PRIMARY_PROXY
assert seen_urls == [
f"{oss.OLLAMA_HOST_PRIMARY}/api/version",
f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version",
]
def test_resolve_skips_recent_unhealthy_direct_primary_and_uses_proxy(monkeypatch):
"""host_health 已判定 GCP-A direct 不健康時,不再等待 direct timeout。"""
from datetime import datetime
from services import ollama_service as oss
class FakeResult:
def fetchone(self):
return (False, "ConnectTimeout", datetime.now())
class FakeSession:
def execute(self, *args, **kwargs):
return FakeResult()
def close(self):
pass
fake_ok = MagicMock(status_code=200)
seen_urls = []
def fake_get(url, timeout=None):
seen_urls.append(url)
if url == f"{oss.OLLAMA_HOST_PRIMARY}/api/version":
raise AssertionError("recent unhealthy direct host should be skipped")
if url == f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version":
return fake_ok
raise AssertionError(f"should not reach {url}")
monkeypatch.setenv("OLLAMA_RESOLVE_HOST_HEALTH_SKIP_ENABLED", "true")
monkeypatch.setenv("OLLAMA_RESOLVE_HOST_HEALTH_SKIP_WINDOW_MINUTES", "20")
monkeypatch.setattr("database.manager.get_session", lambda: FakeSession())
with patch('services.ollama_service.requests.get', side_effect=fake_get):
host = oss.resolve_ollama_host()
assert host == oss.OLLAMA_HOST_PRIMARY_PROXY
assert seen_urls == [f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version"]
def test_recent_direct_host_unhealthy_matches_actual_host_url(monkeypatch):
"""舊 GCP-A 的 unhealthy 紀錄不能誤擋新 GCP-A。"""
from datetime import datetime
from services import ollama_service as oss
seen_params = []
class FakeResult:
def __init__(self, row):
self.row = row
def fetchone(self):
return self.row
class FakeSession:
def execute(self, _statement, params):
seen_params.append(dict(params))
if params.get("host_url") == oss.OLLAMA_HOST_PRIMARY:
return FakeResult((False, "ConnectTimeout", datetime.now()))
return FakeResult(None)
def close(self):
pass
monkeypatch.setenv("OLLAMA_RESOLVE_HOST_HEALTH_SKIP_ENABLED", "true")
monkeypatch.setattr("database.manager.get_session", lambda: FakeSession())
assert oss._recent_direct_host_unhealthy(oss.OLLAMA_HOST_PRIMARY) is True
assert seen_params == [
{"host_label": "Primary (GCP)", "host_url": oss.OLLAMA_HOST_PRIMARY}
]
def test_retired_gcp_a_host_is_not_approved(monkeypatch):
"""已退役 GCP-A 不可再被 env 白名單接受。"""
from services import ollama_service as oss
monkeypatch.setenv("OLLAMA_HOST_PRIMARY", "http://34.143.170.20:11434")
assert oss.is_approved_ollama_host("http://34.143.170.20:11434") is False
assert oss.approved_ollama_env("OLLAMA_HOST_PRIMARY", oss.OLLAMA_HOST_PRIMARY) == oss.OLLAMA_HOST_PRIMARY
# ═══════════════════════════════════════════════════════════════════════════
# B4 — mark_unhealthy 行為
# ═══════════════════════════════════════════════════════════════════════════
def test_mark_unhealthy_triggers_immediate_fallback():
"""B4: primary 標 unhealthy 後,三主機架構下應跳到 secondary非 fallback"""
from services import ollama_service as oss
oss.mark_unhealthy('http://primary.example:11434')
# 即使 HTTP 探測會回 200被標 unhealthy 的主機仍應跳過
fake_resp = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp):
host = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
# 三主機架構primary 不可用 → secondary非 fallback
assert host == 'http://secondary.example:11434'
def test_mark_unhealthy_primary_and_secondary_falls_to_fallback():
"""B4 三主機級聯primary+secondary 都掛才走 fallback"""
from services import ollama_service as oss
oss.mark_unhealthy('http://primary.example:11434')
oss.mark_unhealthy('http://secondary.example:11434')
fake_resp = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp):
host = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
assert host == 'http://fallback.example:11434'
def test_unhealthy_mark_expires_after_ttl():
from services import ollama_service as oss
oss.mark_unhealthy('http://primary.example:11434')
# 偽造時間 31 秒前打的標
oss._unhealthy_marks['http://primary.example:11434'] = time.time() - 31
fake_resp = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp):
host = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
assert host == 'http://primary.example:11434'
def test_mark_unhealthy_invalidates_resolved_cache():
"""B4 重點:被標 unhealthy 後,舊 cache 不能再返回 primary"""
from services import ollama_service as oss
# 先讓 primary 被 cache
fake_resp_ok = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp_ok):
first = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
assert first == 'http://primary.example:11434'
# 模擬 generate 失敗,標 unhealthy
oss.mark_unhealthy('http://primary.example:11434')
# 即使 cache 還在 TTL 內120s下一次 resolve 必須跳過 primary
# 三主機架構下應跳到 secondary
fake_resp_again = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp_again):
second = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
assert second == 'http://secondary.example:11434'
def test_mark_unhealthy_handles_trailing_slash():
from services import ollama_service as oss
oss.mark_unhealthy('http://primary.example:11434/')
assert oss._is_unhealthy('http://primary.example:11434') is True
assert oss._is_unhealthy('http://primary.example:11434/') is True
def test_mark_unhealthy_ignores_empty():
from services import ollama_service as oss
oss.mark_unhealthy('')
oss.mark_unhealthy(None)
assert len(oss._unhealthy_marks) == 0
# ═══════════════════════════════════════════════════════════════════════════
# B1/B2 — config lazy getters
# ═══════════════════════════════════════════════════════════════════════════
def test_get_ollama_host_uses_approved_env_when_set(monkeypatch):
monkeypatch.setenv('OLLAMA_HOST', 'http://34.21.145.224:11434')
import config
importlib.reload(config) # 確保 env 變更生效
assert config.get_ollama_host() == 'http://34.21.145.224:11434'
def test_get_ollama_host_rejects_unapproved_env(monkeypatch):
monkeypatch.setenv('OLLAMA_HOST', 'http://192.168.0.188:11434')
fake_resp = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp):
import config
importlib.reload(config)
host = config.get_ollama_host()
assert host == 'http://34.87.90.216:11434'
def test_get_ollama_host_falls_back_to_resolve_without_env(monkeypatch):
monkeypatch.delenv('OLLAMA_HOST', raising=False)
fake_resp = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp):
# reload to ensure env is honored
import config
importlib.reload(config)
host = config.get_ollama_host()
# primary URL 由 env OLLAMA_HOST_PRIMARY 控制(預設 GCP-SSD 34.87.90.216
assert host.startswith('http://')
def test_config_ollama_compat_constants_do_not_probe_network(monkeypatch):
monkeypatch.setenv('OLLAMA_HOST', 'http://192.168.0.188:11434')
monkeypatch.setenv('HERMES_URL', 'http://192.168.0.188:11434')
monkeypatch.setenv('EMBEDDING_HOST', 'http://192.168.0.188:11434')
monkeypatch.setenv('OLLAMA_HOST_PRIMARY', 'http://34.87.90.216:11434')
with patch('services.ollama_service.requests.get') as mock_get:
import config
importlib.reload(config)
mock_get.assert_not_called()
assert config.OLLAMA_HOST == 'http://34.87.90.216:11434'
assert config.HERMES_URL == 'http://34.87.90.216:11434'
assert config.EMBEDDING_HOST == 'http://34.87.90.216:11434'
def test_get_embedding_host_prefers_env(monkeypatch):
monkeypatch.setenv('EMBEDDING_HOST', 'http://192.168.0.111:11434')
import config
importlib.reload(config)
assert config.get_embedding_host() == 'http://192.168.0.111:11434'
def test_get_hermes_url_prefers_env(monkeypatch):
monkeypatch.setenv('HERMES_URL', 'http://34.87.90.216:11434')
import config
importlib.reload(config)
assert config.get_hermes_url() == 'http://34.87.90.216:11434'
# ═══════════════════════════════════════════════════════════════════════════
# Cache TTL 行為
# ═══════════════════════════════════════════════════════════════════════════
def test_resolve_uses_cache_within_ttl():
"""同一 TTL 內第二次 resolve 不應再呼叫 HTTP"""
from services import ollama_service as oss
fake_resp = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp) as mock_get:
oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
# cache 命中 → 第二次不打 HTTP
assert mock_get.call_count == 1
def test_111_fallback_downgrades_heavy_model_and_shortens_keep_alive(monkeypatch):
"""111 是 final fallback14B+ 模型不得長駐這台 16GB Mac。"""
from services import ollama_service as oss
monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "qwen2.5:7b-instruct")
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("qwen3:14b",))
fake_resp = MagicMock(status_code=200)
fake_resp.json.return_value = {
"response": "ok",
"prompt_eval_count": 3,
"eval_count": 2,
"total_duration": 1_000_000_000,
}
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="qwen3:14b")
with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
resp = svc.generate(
"hi",
timeout=120,
keep_alive="24h",
options={"num_ctx": 131072, "num_predict": 4096},
)
payload = mock_post.call_args.kwargs["json"]
assert payload["model"] == "qwen2.5:7b-instruct"
assert payload["keep_alive"] == "5m"
assert payload["options"]["num_ctx"] == 4096
assert payload["options"]["num_predict"] == 512
assert mock_post.call_args.kwargs["timeout"] == 20
assert resp.model == "qwen2.5:7b-instruct"
def test_111_fallback_keeps_light_model_but_caps_timeout(monkeypatch):
from requests import Timeout
from services import ollama_service as oss
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="llama3.2:latest")
with patch("services.ollama_service.requests.post", side_effect=Timeout):
resp = svc.generate("hi", timeout=120, keep_alive="24h")
assert resp.success is False
assert "timeout (20s)" in resp.error
def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
from services import ollama_service as oss
monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "llama3.2:latest")
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("hermes3:*",))
fake_resp = MagicMock(status_code=200)
fake_resp.json.return_value = {
"response": "ok",
"prompt_eval_count": 3,
"eval_count": 2,
"total_duration": 1_000_000_000,
}
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="hermes3:latest")
with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
resp = svc.generate("hi", timeout=120, keep_alive="24h")
payload = mock_post.call_args.kwargs["json"]
assert payload["model"] == "llama3.2:latest"
assert payload["keep_alive"] == "5m"
assert payload["options"]["num_ctx"] == 4096
assert payload["options"]["num_predict"] == 512
assert resp.model == "llama3.2:latest"