This commit is contained in:
@@ -243,8 +243,9 @@ def test_111_fallback_downgrades_heavy_model_and_shortens_keep_alive(monkeypatch
|
||||
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "qwen2.5:7b-instruct")
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("qwen3:14b",))
|
||||
|
||||
fake_resp = MagicMock(status_code=200)
|
||||
@@ -257,13 +258,19 @@ def test_111_fallback_downgrades_heavy_model_and_shortens_keep_alive(monkeypatch
|
||||
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="qwen3:14b")
|
||||
|
||||
with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
|
||||
resp = svc.generate("hi", timeout=120, keep_alive="24h")
|
||||
resp = svc.generate(
|
||||
"hi",
|
||||
timeout=120,
|
||||
keep_alive="24h",
|
||||
options={"num_ctx": 131072, "num_predict": 4096},
|
||||
)
|
||||
|
||||
payload = mock_post.call_args.kwargs["json"]
|
||||
assert payload["model"] == "qwen2.5:7b-instruct"
|
||||
assert payload["keep_alive"] == "5m"
|
||||
assert payload["options"]["num_ctx"] == 4096
|
||||
assert mock_post.call_args.kwargs["timeout"] == 45
|
||||
assert payload["options"]["num_predict"] == 512
|
||||
assert mock_post.call_args.kwargs["timeout"] == 20
|
||||
assert resp.model == "qwen2.5:7b-instruct"
|
||||
|
||||
|
||||
@@ -272,15 +279,16 @@ def test_111_fallback_keeps_light_model_but_caps_timeout(monkeypatch):
|
||||
from services import ollama_service as oss
|
||||
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
|
||||
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="llama3.2:latest")
|
||||
|
||||
with patch("services.ollama_service.requests.post", side_effect=Timeout):
|
||||
resp = svc.generate("hi", timeout=120, keep_alive="24h")
|
||||
|
||||
assert resp.success is False
|
||||
assert "timeout (45s)" in resp.error
|
||||
assert "timeout (20s)" in resp.error
|
||||
|
||||
|
||||
def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
|
||||
@@ -288,8 +296,9 @@ def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
|
||||
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "llama3.2:latest")
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
|
||||
monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("hermes3:*",))
|
||||
|
||||
fake_resp = MagicMock(status_code=200)
|
||||
@@ -308,4 +317,5 @@ def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
|
||||
assert payload["model"] == "llama3.2:latest"
|
||||
assert payload["keep_alive"] == "5m"
|
||||
assert payload["options"]["num_ctx"] == 4096
|
||||
assert payload["options"]["num_predict"] == 512
|
||||
assert resp.model == "llama3.2:latest"
|
||||
|
||||
Reference in New Issue
Block a user