Files
ewoooc/tests/test_ollama_resolve.py
OoO 106c1935f4
Some checks failed
CD Pipeline / deploy (push) Failing after 11m7s
收緊 111 Ollama fallback 資源上限
2026-05-21 18:13:50 +08:00

322 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
ADR-027 Phase 2 — Ollama host resolve / health probe / unhealthy mark 驗證
涵蓋:
B3 — _is_reachable HTTP probe取代純 TCP
B4 — mark_unhealthy / _is_unhealthy / cache 失效互動
B1/B2 — config.get_ollama_host / get_embedding_host / get_hermes_url lazy 行為
"""
import os
import time
import importlib
from unittest.mock import patch, MagicMock
import pytest
# ─────────────────────────────────────────────────────────────────────────────
# 必要:每個 test 前清空 ollama_service 的 module-level cache避免 cross-test 污染
# ─────────────────────────────────────────────────────────────────────────────
@pytest.fixture(autouse=True)
def _reset_ollama_caches():
from services import ollama_service as oss
oss._resolved_host_cache['host'] = None
oss._resolved_host_cache['ts'] = 0
oss._unhealthy_marks.clear()
yield
oss._resolved_host_cache['host'] = None
oss._resolved_host_cache['ts'] = 0
oss._unhealthy_marks.clear()
# ═══════════════════════════════════════════════════════════════════════════
# B3 — HTTP probe 取代 TCP
# ═══════════════════════════════════════════════════════════════════════════
def test_resolve_picks_primary_when_http_200():
from services import ollama_service as oss
fake_resp = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp) as mock_get:
host = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
assert host == 'http://primary.example:11434'
# 驗證有打 /api/version
called_url = mock_get.call_args[0][0]
assert called_url == 'http://primary.example:11434/api/version'
def test_resolve_falls_back_when_http_500():
"""B3 關鍵:純 TCP 通但 HTTP 500process 卡死)也視為 unreachable"""
from services import ollama_service as oss
fake_resp = MagicMock(status_code=500)
with patch('services.ollama_service.requests.get', return_value=fake_resp):
host = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
assert host == 'http://fallback.example:11434'
def test_resolve_falls_back_on_request_exception():
from services import ollama_service as oss
with patch('services.ollama_service.requests.get', side_effect=Exception('connect timeout')):
host = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
assert host == 'http://fallback.example:11434'
# ═══════════════════════════════════════════════════════════════════════════
# B4 — mark_unhealthy 行為
# ═══════════════════════════════════════════════════════════════════════════
def test_mark_unhealthy_triggers_immediate_fallback():
"""B4: primary 標 unhealthy 後,三主機架構下應跳到 secondary非 fallback"""
from services import ollama_service as oss
oss.mark_unhealthy('http://primary.example:11434')
# 即使 HTTP 探測會回 200被標 unhealthy 的主機仍應跳過
fake_resp = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp):
host = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
# 三主機架構primary 不可用 → secondary非 fallback
assert host == 'http://secondary.example:11434'
def test_mark_unhealthy_primary_and_secondary_falls_to_fallback():
"""B4 三主機級聯primary+secondary 都掛才走 fallback"""
from services import ollama_service as oss
oss.mark_unhealthy('http://primary.example:11434')
oss.mark_unhealthy('http://secondary.example:11434')
fake_resp = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp):
host = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
assert host == 'http://fallback.example:11434'
def test_unhealthy_mark_expires_after_ttl():
from services import ollama_service as oss
oss.mark_unhealthy('http://primary.example:11434')
# 偽造時間 31 秒前打的標
oss._unhealthy_marks['http://primary.example:11434'] = time.time() - 31
fake_resp = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp):
host = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
assert host == 'http://primary.example:11434'
def test_mark_unhealthy_invalidates_resolved_cache():
"""B4 重點:被標 unhealthy 後,舊 cache 不能再返回 primary"""
from services import ollama_service as oss
# 先讓 primary 被 cache
fake_resp_ok = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp_ok):
first = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
assert first == 'http://primary.example:11434'
# 模擬 generate 失敗,標 unhealthy
oss.mark_unhealthy('http://primary.example:11434')
# 即使 cache 還在 TTL 內120s下一次 resolve 必須跳過 primary
# 三主機架構下應跳到 secondary
fake_resp_again = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp_again):
second = oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
assert second == 'http://secondary.example:11434'
def test_mark_unhealthy_handles_trailing_slash():
from services import ollama_service as oss
oss.mark_unhealthy('http://primary.example:11434/')
assert oss._is_unhealthy('http://primary.example:11434') is True
assert oss._is_unhealthy('http://primary.example:11434/') is True
def test_mark_unhealthy_ignores_empty():
from services import ollama_service as oss
oss.mark_unhealthy('')
oss.mark_unhealthy(None)
assert len(oss._unhealthy_marks) == 0
# ═══════════════════════════════════════════════════════════════════════════
# B1/B2 — config lazy getters
# ═══════════════════════════════════════════════════════════════════════════
def test_get_ollama_host_uses_approved_env_when_set(monkeypatch):
monkeypatch.setenv('OLLAMA_HOST', 'http://34.21.145.224:11434')
import config
importlib.reload(config) # 確保 env 變更生效
assert config.get_ollama_host() == 'http://34.21.145.224:11434'
def test_get_ollama_host_rejects_unapproved_env(monkeypatch):
monkeypatch.setenv('OLLAMA_HOST', 'http://192.168.0.188:11434')
fake_resp = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp):
import config
importlib.reload(config)
host = config.get_ollama_host()
assert host == 'http://34.143.170.20:11434'
def test_get_ollama_host_falls_back_to_resolve_without_env(monkeypatch):
monkeypatch.delenv('OLLAMA_HOST', raising=False)
fake_resp = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp):
# reload to ensure env is honored
import config
importlib.reload(config)
host = config.get_ollama_host()
# primary URL 由 env OLLAMA_HOST_PRIMARY 控制(預設 GCP-SSD 34.143.170.20
assert host.startswith('http://')
def test_get_embedding_host_prefers_env(monkeypatch):
monkeypatch.setenv('EMBEDDING_HOST', 'http://192.168.0.111:11434')
import config
importlib.reload(config)
assert config.get_embedding_host() == 'http://192.168.0.111:11434'
def test_get_hermes_url_prefers_env(monkeypatch):
monkeypatch.setenv('HERMES_URL', 'http://34.143.170.20:11434')
import config
importlib.reload(config)
assert config.get_hermes_url() == 'http://34.143.170.20:11434'
# ═══════════════════════════════════════════════════════════════════════════
# Cache TTL 行為
# ═══════════════════════════════════════════════════════════════════════════
def test_resolve_uses_cache_within_ttl():
"""同一 TTL 內第二次 resolve 不應再呼叫 HTTP"""
from services import ollama_service as oss
fake_resp = MagicMock(status_code=200)
with patch('services.ollama_service.requests.get', return_value=fake_resp) as mock_get:
oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
oss.resolve_ollama_host(
primary='http://primary.example:11434',
secondary='http://secondary.example:11434',
fallback='http://fallback.example:11434',
)
# cache 命中 → 第二次不打 HTTP
assert mock_get.call_count == 1
def test_111_fallback_downgrades_heavy_model_and_shortens_keep_alive(monkeypatch):
"""111 是 final fallback14B+ 模型不得長駐這台 16GB Mac。"""
from services import ollama_service as oss
monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "qwen2.5:7b-instruct")
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("qwen3:14b",))
fake_resp = MagicMock(status_code=200)
fake_resp.json.return_value = {
"response": "ok",
"prompt_eval_count": 3,
"eval_count": 2,
"total_duration": 1_000_000_000,
}
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="qwen3:14b")
with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
resp = svc.generate(
"hi",
timeout=120,
keep_alive="24h",
options={"num_ctx": 131072, "num_predict": 4096},
)
payload = mock_post.call_args.kwargs["json"]
assert payload["model"] == "qwen2.5:7b-instruct"
assert payload["keep_alive"] == "5m"
assert payload["options"]["num_ctx"] == 4096
assert payload["options"]["num_predict"] == 512
assert mock_post.call_args.kwargs["timeout"] == 20
assert resp.model == "qwen2.5:7b-instruct"
def test_111_fallback_keeps_light_model_but_caps_timeout(monkeypatch):
from requests import Timeout
from services import ollama_service as oss
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="llama3.2:latest")
with patch("services.ollama_service.requests.post", side_effect=Timeout):
resp = svc.generate("hi", timeout=120, keep_alive="24h")
assert resp.success is False
assert "timeout (20s)" in resp.error
def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
from services import ollama_service as oss
monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "llama3.2:latest")
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("hermes3:*",))
fake_resp = MagicMock(status_code=200)
fake_resp.json.return_value = {
"response": "ok",
"prompt_eval_count": 3,
"eval_count": 2,
"total_duration": 1_000_000_000,
}
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="hermes3:latest")
with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
resp = svc.generate("hi", timeout=120, keep_alive="24h")
payload = mock_post.call_args.kwargs["json"]
assert payload["model"] == "llama3.2:latest"
assert payload["keep_alive"] == "5m"
assert payload["options"]["num_ctx"] == 4096
assert payload["options"]["num_predict"] == 512
assert resp.model == "llama3.2:latest"