312 lines
14 KiB
Python
312 lines
14 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
ADR-027 Phase 2 — Ollama host resolve / health probe / unhealthy mark 驗證
|
||
|
||
涵蓋:
|
||
B3 — _is_reachable HTTP probe(取代純 TCP)
|
||
B4 — mark_unhealthy / _is_unhealthy / cache 失效互動
|
||
B1/B2 — config.get_ollama_host / get_embedding_host / get_hermes_url lazy 行為
|
||
"""
|
||
|
||
import os
|
||
import time
|
||
import importlib
|
||
from unittest.mock import patch, MagicMock
|
||
|
||
import pytest
|
||
|
||
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# 必要:每個 test 前清空 ollama_service 的 module-level cache,避免 cross-test 污染
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
@pytest.fixture(autouse=True)
|
||
def _reset_ollama_caches():
|
||
from services import ollama_service as oss
|
||
oss._resolved_host_cache['host'] = None
|
||
oss._resolved_host_cache['ts'] = 0
|
||
oss._unhealthy_marks.clear()
|
||
yield
|
||
oss._resolved_host_cache['host'] = None
|
||
oss._resolved_host_cache['ts'] = 0
|
||
oss._unhealthy_marks.clear()
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# B3 — HTTP probe 取代 TCP
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_resolve_picks_primary_when_http_200():
|
||
from services import ollama_service as oss
|
||
fake_resp = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp) as mock_get:
|
||
host = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
assert host == 'http://primary.example:11434'
|
||
# 驗證有打 /api/version
|
||
called_url = mock_get.call_args[0][0]
|
||
assert called_url == 'http://primary.example:11434/api/version'
|
||
|
||
|
||
def test_resolve_falls_back_when_http_500():
|
||
"""B3 關鍵:純 TCP 通但 HTTP 500(process 卡死)也視為 unreachable"""
|
||
from services import ollama_service as oss
|
||
fake_resp = MagicMock(status_code=500)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp):
|
||
host = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
assert host == 'http://fallback.example:11434'
|
||
|
||
|
||
def test_resolve_falls_back_on_request_exception():
|
||
from services import ollama_service as oss
|
||
with patch('services.ollama_service.requests.get', side_effect=Exception('connect timeout')):
|
||
host = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
assert host == 'http://fallback.example:11434'
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# B4 — mark_unhealthy 行為
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_mark_unhealthy_triggers_immediate_fallback():
|
||
"""B4: primary 標 unhealthy 後,三主機架構下應跳到 secondary(非 fallback)"""
|
||
from services import ollama_service as oss
|
||
oss.mark_unhealthy('http://primary.example:11434')
|
||
# 即使 HTTP 探測會回 200,被標 unhealthy 的主機仍應跳過
|
||
fake_resp = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp):
|
||
host = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
# 三主機架構:primary 不可用 → secondary(非 fallback)
|
||
assert host == 'http://secondary.example:11434'
|
||
|
||
|
||
def test_mark_unhealthy_primary_and_secondary_falls_to_fallback():
|
||
"""B4 三主機級聯:primary+secondary 都掛才走 fallback"""
|
||
from services import ollama_service as oss
|
||
oss.mark_unhealthy('http://primary.example:11434')
|
||
oss.mark_unhealthy('http://secondary.example:11434')
|
||
fake_resp = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp):
|
||
host = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
assert host == 'http://fallback.example:11434'
|
||
|
||
|
||
def test_unhealthy_mark_expires_after_ttl():
|
||
from services import ollama_service as oss
|
||
oss.mark_unhealthy('http://primary.example:11434')
|
||
# 偽造時間 31 秒前打的標
|
||
oss._unhealthy_marks['http://primary.example:11434'] = time.time() - 31
|
||
fake_resp = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp):
|
||
host = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
assert host == 'http://primary.example:11434'
|
||
|
||
|
||
def test_mark_unhealthy_invalidates_resolved_cache():
|
||
"""B4 重點:被標 unhealthy 後,舊 cache 不能再返回 primary"""
|
||
from services import ollama_service as oss
|
||
# 先讓 primary 被 cache
|
||
fake_resp_ok = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp_ok):
|
||
first = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
assert first == 'http://primary.example:11434'
|
||
|
||
# 模擬 generate 失敗,標 unhealthy
|
||
oss.mark_unhealthy('http://primary.example:11434')
|
||
|
||
# 即使 cache 還在 TTL 內(120s),下一次 resolve 必須跳過 primary
|
||
# 三主機架構下應跳到 secondary
|
||
fake_resp_again = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp_again):
|
||
second = oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
assert second == 'http://secondary.example:11434'
|
||
|
||
|
||
def test_mark_unhealthy_handles_trailing_slash():
|
||
from services import ollama_service as oss
|
||
oss.mark_unhealthy('http://primary.example:11434/')
|
||
assert oss._is_unhealthy('http://primary.example:11434') is True
|
||
assert oss._is_unhealthy('http://primary.example:11434/') is True
|
||
|
||
|
||
def test_mark_unhealthy_ignores_empty():
|
||
from services import ollama_service as oss
|
||
oss.mark_unhealthy('')
|
||
oss.mark_unhealthy(None)
|
||
assert len(oss._unhealthy_marks) == 0
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# B1/B2 — config lazy getters
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_get_ollama_host_uses_approved_env_when_set(monkeypatch):
|
||
monkeypatch.setenv('OLLAMA_HOST', 'http://34.21.145.224:11434')
|
||
import config
|
||
importlib.reload(config) # 確保 env 變更生效
|
||
assert config.get_ollama_host() == 'http://34.21.145.224:11434'
|
||
|
||
|
||
def test_get_ollama_host_rejects_unapproved_env(monkeypatch):
|
||
monkeypatch.setenv('OLLAMA_HOST', 'http://192.168.0.188:11434')
|
||
fake_resp = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp):
|
||
import config
|
||
importlib.reload(config)
|
||
host = config.get_ollama_host()
|
||
assert host == 'http://34.143.170.20:11434'
|
||
|
||
|
||
def test_get_ollama_host_falls_back_to_resolve_without_env(monkeypatch):
|
||
monkeypatch.delenv('OLLAMA_HOST', raising=False)
|
||
fake_resp = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp):
|
||
# reload to ensure env is honored
|
||
import config
|
||
importlib.reload(config)
|
||
host = config.get_ollama_host()
|
||
# primary URL 由 env OLLAMA_HOST_PRIMARY 控制(預設 GCP-SSD 34.143.170.20)
|
||
assert host.startswith('http://')
|
||
|
||
|
||
def test_get_embedding_host_prefers_env(monkeypatch):
|
||
monkeypatch.setenv('EMBEDDING_HOST', 'http://192.168.0.111:11434')
|
||
import config
|
||
importlib.reload(config)
|
||
assert config.get_embedding_host() == 'http://192.168.0.111:11434'
|
||
|
||
|
||
def test_get_hermes_url_prefers_env(monkeypatch):
|
||
monkeypatch.setenv('HERMES_URL', 'http://34.143.170.20:11434')
|
||
import config
|
||
importlib.reload(config)
|
||
assert config.get_hermes_url() == 'http://34.143.170.20:11434'
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# Cache TTL 行為
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_resolve_uses_cache_within_ttl():
|
||
"""同一 TTL 內第二次 resolve 不應再呼叫 HTTP"""
|
||
from services import ollama_service as oss
|
||
fake_resp = MagicMock(status_code=200)
|
||
with patch('services.ollama_service.requests.get', return_value=fake_resp) as mock_get:
|
||
oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
oss.resolve_ollama_host(
|
||
primary='http://primary.example:11434',
|
||
secondary='http://secondary.example:11434',
|
||
fallback='http://fallback.example:11434',
|
||
)
|
||
# cache 命中 → 第二次不打 HTTP
|
||
assert mock_get.call_count == 1
|
||
|
||
|
||
def test_111_fallback_downgrades_heavy_model_and_shortens_keep_alive(monkeypatch):
|
||
"""111 是 final fallback;14B+ 模型不得長駐這台 16GB Mac。"""
|
||
from services import ollama_service as oss
|
||
|
||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "qwen2.5:7b-instruct")
|
||
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
|
||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
|
||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
|
||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("qwen3:14b",))
|
||
|
||
fake_resp = MagicMock(status_code=200)
|
||
fake_resp.json.return_value = {
|
||
"response": "ok",
|
||
"prompt_eval_count": 3,
|
||
"eval_count": 2,
|
||
"total_duration": 1_000_000_000,
|
||
}
|
||
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="qwen3:14b")
|
||
|
||
with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
|
||
resp = svc.generate("hi", timeout=120, keep_alive="24h")
|
||
|
||
payload = mock_post.call_args.kwargs["json"]
|
||
assert payload["model"] == "qwen2.5:7b-instruct"
|
||
assert payload["keep_alive"] == "5m"
|
||
assert payload["options"]["num_ctx"] == 4096
|
||
assert mock_post.call_args.kwargs["timeout"] == 45
|
||
assert resp.model == "qwen2.5:7b-instruct"
|
||
|
||
|
||
def test_111_fallback_keeps_light_model_but_caps_timeout(monkeypatch):
|
||
from requests import Timeout
|
||
from services import ollama_service as oss
|
||
|
||
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
|
||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
|
||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
|
||
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="llama3.2:latest")
|
||
|
||
with patch("services.ollama_service.requests.post", side_effect=Timeout):
|
||
resp = svc.generate("hi", timeout=120, keep_alive="24h")
|
||
|
||
assert resp.success is False
|
||
assert "timeout (45s)" in resp.error
|
||
|
||
|
||
def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
|
||
from services import ollama_service as oss
|
||
|
||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "llama3.2:latest")
|
||
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
|
||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
|
||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
|
||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("hermes3:*",))
|
||
|
||
fake_resp = MagicMock(status_code=200)
|
||
fake_resp.json.return_value = {
|
||
"response": "ok",
|
||
"prompt_eval_count": 3,
|
||
"eval_count": 2,
|
||
"total_duration": 1_000_000_000,
|
||
}
|
||
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="hermes3:latest")
|
||
|
||
with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
|
||
resp = svc.generate("hi", timeout=120, keep_alive="24h")
|
||
|
||
payload = mock_post.call_args.kwargs["json"]
|
||
assert payload["model"] == "llama3.2:latest"
|
||
assert payload["keep_alive"] == "5m"
|
||
assert payload["options"]["num_ctx"] == 4096
|
||
assert resp.model == "llama3.2:latest"
|