443 lines
19 KiB
Python
443 lines
19 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
ADR-027 Phase 2 — Ollama host resolve / health probe / unhealthy mark 驗證
|
||
|
||
涵蓋:
|
||
B3 — _is_reachable HTTP probe(取代純 TCP)
|
||
B4 — mark_unhealthy / _is_unhealthy / cache 失效互動
|
||
B1/B2 — config.get_ollama_host / get_embedding_host / get_hermes_url lazy 行為
|
||
"""
|
||
|
||
import os
|
||
import time
|
||
import importlib
|
||
from unittest.mock import patch, MagicMock
|
||
|
||
import pytest
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# 必要:每個 test 前清空 ollama_service 的 module-level cache,避免 cross-test 污染
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
@pytest.fixture(autouse=True)
|
||
def _reset_ollama_caches():
|
||
from services import ollama_service as oss
|
||
oss._resolved_host_cache['host'] = None
|
||
oss._resolved_host_cache['ts'] = 0
|
||
oss._unhealthy_marks.clear()
|
||
yield
|
||
oss._resolved_host_cache['host'] = None
|
||
oss._resolved_host_cache['ts'] = 0
|
||
oss._unhealthy_marks.clear()
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# B3 — HTTP probe 取代 TCP
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_resolve_picks_primary_when_http_200():
|
||
from services import ollama_service as oss
|
||
fake_resp = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp) as mock_get:
|
||
host = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
assert host == 'http://primary.example:11434'
|
||
# 驗證有打 /api/version
|
||
called_url = mock_get.call_args[0][0]
|
||
assert called_url == 'http://primary.example:11434/api/version'
|
||
|
||
|
||
def test_resolve_falls_back_when_http_500():
|
||
"""B3 關鍵:純 TCP 通但 HTTP 500(process 卡死)也視為 unreachable"""
|
||
from services import ollama_service as oss
|
||
fake_resp = MagicMock(status_code=500)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp):
|
||
host = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
assert host == 'http://fallback.example:11434'
|
||
|
||
|
||
def test_resolve_falls_back_on_request_exception():
|
||
from services import ollama_service as oss
|
||
with patch('services.ollama_service.requests.get', side_effect=Exception('connect timeout')):
|
||
host = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
assert host == 'http://fallback.example:11434'
|
||
|
||
|
||
def test_resolve_uses_primary_proxy_rescue_before_secondary():
|
||
"""正式主機直連 GCP-A 不通時,先走 110 primary proxy,再考慮 GCP-B。"""
|
||
from services import ollama_service as oss
|
||
|
||
fake_ok = MagicMock(status_code=200)
|
||
seen_urls = []
|
||
|
||
def fake_get(url, timeout=None):
|
||
seen_urls.append(url)
|
||
if url == f"{oss.OLLAMA_HOST_PRIMARY}/api/version":
|
||
raise Exception("primary direct timeout")
|
||
if url == f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version":
|
||
return fake_ok
|
||
raise AssertionError(f"should not reach {url}")
|
||
|
||
with patch('services.ollama_service.requests.get', side_effect=fake_get):
|
||
host = oss.resolve_ollama_host()
|
||
|
||
assert host == oss.OLLAMA_HOST_PRIMARY_PROXY
|
||
assert seen_urls == [
|
||
f"{oss.OLLAMA_HOST_PRIMARY}/api/version",
|
||
f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version",
|
||
]
|
||
|
||
|
||
def test_resolve_skips_recent_unhealthy_direct_primary_and_uses_proxy(monkeypatch):
|
||
"""host_health 已判定 GCP-A direct 不健康時,不再等待 direct timeout。"""
|
||
from datetime import datetime
|
||
from services import ollama_service as oss
|
||
|
||
class FakeResult:
|
||
def fetchone(self):
|
||
return (False, "ConnectTimeout", datetime.now())
|
||
|
||
class FakeSession:
|
||
def execute(self, *args, **kwargs):
|
||
return FakeResult()
|
||
|
||
def close(self):
|
||
pass
|
||
|
||
fake_ok = MagicMock(status_code=200)
|
||
seen_urls = []
|
||
|
||
def fake_get(url, timeout=None):
|
||
seen_urls.append(url)
|
||
if url == f"{oss.OLLAMA_HOST_PRIMARY}/api/version":
|
||
raise AssertionError("recent unhealthy direct host should be skipped")
|
||
if url == f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version":
|
||
return fake_ok
|
||
raise AssertionError(f"should not reach {url}")
|
||
|
||
monkeypatch.setenv("OLLAMA_RESOLVE_HOST_HEALTH_SKIP_ENABLED", "true")
|
||
monkeypatch.setenv("OLLAMA_RESOLVE_HOST_HEALTH_SKIP_WINDOW_MINUTES", "20")
|
||
monkeypatch.setattr("database.manager.get_session", lambda: FakeSession())
|
||
|
||
with patch('services.ollama_service.requests.get', side_effect=fake_get):
|
||
host = oss.resolve_ollama_host()
|
||
|
||
assert host == oss.OLLAMA_HOST_PRIMARY_PROXY
|
||
assert seen_urls == [f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version"]
|
||
|
||
|
||
def test_recent_direct_host_unhealthy_matches_actual_host_url(monkeypatch):
|
||
"""舊 GCP-A 的 unhealthy 紀錄不能誤擋新 GCP-A。"""
|
||
from datetime import datetime
|
||
from services import ollama_service as oss
|
||
|
||
seen_params = []
|
||
|
||
class FakeResult:
|
||
def __init__(self, row):
|
||
self.row = row
|
||
|
||
def fetchone(self):
|
||
return self.row
|
||
|
||
class FakeSession:
|
||
def execute(self, _statement, params):
|
||
seen_params.append(dict(params))
|
||
if params.get("host_url") == oss.OLLAMA_HOST_PRIMARY:
|
||
return FakeResult((False, "ConnectTimeout", datetime.now()))
|
||
return FakeResult(None)
|
||
|
||
def close(self):
|
||
pass
|
||
|
||
monkeypatch.setenv("OLLAMA_RESOLVE_HOST_HEALTH_SKIP_ENABLED", "true")
|
||
monkeypatch.setattr("database.manager.get_session", lambda: FakeSession())
|
||
|
||
assert oss._recent_direct_host_unhealthy(oss.OLLAMA_HOST_PRIMARY) is True
|
||
assert seen_params == [
|
||
{"host_label": "Primary (GCP)", "host_url": oss.OLLAMA_HOST_PRIMARY}
|
||
]
|
||
|
||
|
||
def test_retired_gcp_a_host_is_not_approved(monkeypatch):
|
||
"""已退役 GCP-A 不可再被 env 白名單接受。"""
|
||
from services import ollama_service as oss
|
||
|
||
monkeypatch.setenv("OLLAMA_HOST_PRIMARY", "http://34.143.170.20:11434")
|
||
|
||
assert oss.is_approved_ollama_host("http://34.143.170.20:11434") is False
|
||
assert oss.approved_ollama_env("OLLAMA_HOST_PRIMARY", oss.OLLAMA_HOST_PRIMARY) == oss.OLLAMA_HOST_PRIMARY
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# B4 — mark_unhealthy 行為
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_mark_unhealthy_triggers_immediate_fallback():
|
||
"""B4: primary 標 unhealthy 後,三主機架構下應跳到 secondary(非 fallback)"""
|
||
from services import ollama_service as oss
|
||
oss.mark_unhealthy('http://primary.example:11434')
|
||
# 即使 HTTP 探測會回 200,被標 unhealthy 的主機仍應跳過
|
||
fake_resp = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp):
|
||
host = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
# 三主機架構:primary 不可用 → secondary(非 fallback)
|
||
assert host == 'http://secondary.example:11434'
|
||
|
||
|
||
def test_mark_unhealthy_primary_and_secondary_falls_to_fallback():
|
||
"""B4 三主機級聯:primary+secondary 都掛才走 fallback"""
|
||
from services import ollama_service as oss
|
||
oss.mark_unhealthy('http://primary.example:11434')
|
||
oss.mark_unhealthy('http://secondary.example:11434')
|
||
fake_resp = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp):
|
||
host = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
assert host == 'http://fallback.example:11434'
|
||
|
||
|
||
def test_unhealthy_mark_expires_after_ttl():
|
||
from services import ollama_service as oss
|
||
oss.mark_unhealthy('http://primary.example:11434')
|
||
# 偽造時間 31 秒前打的標
|
||
oss._unhealthy_marks['http://primary.example:11434'] = time.time() - 31
|
||
fake_resp = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp):
|
||
host = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
assert host == 'http://primary.example:11434'
|
||
|
||
|
||
def test_mark_unhealthy_invalidates_resolved_cache():
|
||
"""B4 重點:被標 unhealthy 後,舊 cache 不能再返回 primary"""
|
||
from services import ollama_service as oss
|
||
# 先讓 primary 被 cache
|
||
fake_resp_ok = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp_ok):
|
||
first = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
assert first == 'http://primary.example:11434'
|
||
|
||
# 模擬 generate 失敗,標 unhealthy
|
||
oss.mark_unhealthy('http://primary.example:11434')
|
||
|
||
# 即使 cache 還在 TTL 內(120s),下一次 resolve 必須跳過 primary
|
||
# 三主機架構下應跳到 secondary
|
||
fake_resp_again = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp_again):
|
||
second = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
assert second == 'http://secondary.example:11434'
|
||
|
||
|
||
def test_mark_unhealthy_handles_trailing_slash():
|
||
from services import ollama_service as oss
|
||
oss.mark_unhealthy('http://primary.example:11434/')
|
||
assert oss._is_unhealthy('http://primary.example:11434') is True
|
||
assert oss._is_unhealthy('http://primary.example:11434/') is True
|
||
|
||
|
||
def test_mark_unhealthy_ignores_empty():
|
||
from services import ollama_service as oss
|
||
oss.mark_unhealthy('')
|
||
oss.mark_unhealthy(None)
|
||
assert len(oss._unhealthy_marks) == 0
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# B1/B2 — config lazy getters
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_get_ollama_host_uses_approved_env_when_set(monkeypatch):
|
||
monkeypatch.setenv('OLLAMA_HOST', 'http://34.21.145.224:11434')
|
||
import config
|
||
importlib.reload(config) # 確保 env 變更生效
|
||
assert config.get_ollama_host() == 'http://34.21.145.224:11434'
|
||
|
||
|
||
def test_get_ollama_host_rejects_unapproved_env(monkeypatch):
|
||
monkeypatch.setenv('OLLAMA_HOST', 'http://192.168.0.188:11434')
|
||
fake_resp = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp):
|
||
import config
|
||
importlib.reload(config)
|
||
host = config.get_ollama_host()
|
||
assert host == 'http://34.87.90.216:11434'
|
||
|
||
|
||
def test_get_ollama_host_falls_back_to_resolve_without_env(monkeypatch):
|
||
monkeypatch.delenv('OLLAMA_HOST', raising=False)
|
||
fake_resp = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp):
|
||
# reload to ensure env is honored
|
||
import config
|
||
importlib.reload(config)
|
||
host = config.get_ollama_host()
|
||
# primary URL 由 env OLLAMA_HOST_PRIMARY 控制(預設 GCP-SSD 34.87.90.216)
|
||
assert host.startswith('http://')
|
||
|
||
|
||
def test_config_ollama_compat_constants_do_not_probe_network(monkeypatch):
|
||
monkeypatch.setenv('OLLAMA_HOST', 'http://192.168.0.188:11434')
|
||
monkeypatch.setenv('HERMES_URL', 'http://192.168.0.188:11434')
|
||
monkeypatch.setenv('EMBEDDING_HOST', 'http://192.168.0.188:11434')
|
||
monkeypatch.setenv('OLLAMA_HOST_PRIMARY', 'http://34.87.90.216:11434')
|
||
with patch('services.ollama_service.requests.get') as mock_get:
|
||
import config
|
||
importlib.reload(config)
|
||
|
||
mock_get.assert_not_called()
|
||
assert config.OLLAMA_HOST == 'http://34.87.90.216:11434'
|
||
assert config.HERMES_URL == 'http://34.87.90.216:11434'
|
||
assert config.EMBEDDING_HOST == 'http://34.87.90.216:11434'
|
||
|
||
|
||
def test_get_embedding_host_prefers_env(monkeypatch):
|
||
monkeypatch.setenv('EMBEDDING_HOST', 'http://192.168.0.111:11434')
|
||
import config
|
||
importlib.reload(config)
|
||
assert config.get_embedding_host() == 'http://192.168.0.111:11434'
|
||
|
||
|
||
def test_get_hermes_url_prefers_env(monkeypatch):
|
||
monkeypatch.setenv('HERMES_URL', 'http://34.87.90.216:11434')
|
||
import config
|
||
importlib.reload(config)
|
||
assert config.get_hermes_url() == 'http://34.87.90.216:11434'
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# Cache TTL 行為
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_resolve_uses_cache_within_ttl():
|
||
"""同一 TTL 內第二次 resolve 不應再呼叫 HTTP"""
|
||
from services import ollama_service as oss
|
||
fake_resp = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp) as mock_get:
|
||
oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
# cache 命中 → 第二次不打 HTTP
|
||
assert mock_get.call_count == 1
|
||
|
||
|
||
def test_111_fallback_downgrades_heavy_model_and_shortens_keep_alive(monkeypatch):
|
||
"""111 是 final fallback;14B+ 模型不得長駐這台 16GB Mac。"""
|
||
from services import ollama_service as oss
|
||
|
||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "qwen2.5:7b-instruct")
|
||
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
|
||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
|
||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
|
||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
|
||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("qwen3:14b",))
|
||
|
||
fake_resp = MagicMock(status_code=200)
|
||
fake_resp.json.return_value = {
|
||
"response": "ok",
|
||
"prompt_eval_count": 3,
|
||
"eval_count": 2,
|
||
"total_duration": 1_000_000_000,
|
||
}
|
||
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="qwen3:14b")
|
||
|
||
with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
|
||
resp = svc.generate(
|
||
"hi",
|
||
timeout=120,
|
||
keep_alive="24h",
|
||
options={"num_ctx": 131072, "num_predict": 4096},
|
||
)
|
||
|
||
payload = mock_post.call_args.kwargs["json"]
|
||
assert payload["model"] == "qwen2.5:7b-instruct"
|
||
assert payload["keep_alive"] == "5m"
|
||
assert payload["options"]["num_ctx"] == 4096
|
||
assert payload["options"]["num_predict"] == 512
|
||
assert mock_post.call_args.kwargs["timeout"] == 20
|
||
assert resp.model == "qwen2.5:7b-instruct"
|
||
|
||
|
||
def test_111_fallback_keeps_light_model_but_caps_timeout(monkeypatch):
|
||
from requests import Timeout
|
||
from services import ollama_service as oss
|
||
|
||
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
|
||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
|
||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
|
||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
|
||
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="llama3.2:latest")
|
||
|
||
with patch("services.ollama_service.requests.post", side_effect=Timeout):
|
||
resp = svc.generate("hi", timeout=120, keep_alive="24h")
|
||
|
||
assert resp.success is False
|
||
assert "timeout (20s)" in resp.error
|
||
|
||
|
||
def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
|
||
from services import ollama_service as oss
|
||
|
||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "llama3.2:latest")
|
||
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
|
||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
|
||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
|
||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
|
||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("hermes3:*",))
|
||
|
||
fake_resp = MagicMock(status_code=200)
|
||
fake_resp.json.return_value = {
|
||
"response": "ok",
|
||
"prompt_eval_count": 3,
|
||
"eval_count": 2,
|
||
"total_duration": 1_000_000_000,
|
||
}
|
||
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="hermes3:latest")
|
||
|
||
with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
|
||
resp = svc.generate("hi", timeout=120, keep_alive="24h")
|
||
|
||
payload = mock_post.call_args.kwargs["json"]
|
||
assert payload["model"] == "llama3.2:latest"
|
||
assert payload["keep_alive"] == "5m"
|
||
assert payload["options"]["num_ctx"] == 4096
|
||
assert payload["options"]["num_predict"] == 512
|
||
assert resp.model == "llama3.2:latest"
|