收緊 111 Ollama fallback 資源上限
Some checks failed
CD Pipeline / deploy (push) Failing after 11m7s

This commit is contained in:
OoO
2026-05-21 18:06:09 +08:00
committed by AiderHeal Bot
parent 9ada32477c
commit 106c1935f4
6 changed files with 32 additions and 13 deletions

View File

@@ -243,8 +243,9 @@ def test_111_fallback_downgrades_heavy_model_and_shortens_keep_alive(monkeypatch
monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "qwen2.5:7b-instruct")
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("qwen3:14b",))
fake_resp = MagicMock(status_code=200)
@@ -257,13 +258,19 @@ def test_111_fallback_downgrades_heavy_model_and_shortens_keep_alive(monkeypatch
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="qwen3:14b")
with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
resp = svc.generate("hi", timeout=120, keep_alive="24h")
resp = svc.generate(
"hi",
timeout=120,
keep_alive="24h",
options={"num_ctx": 131072, "num_predict": 4096},
)
payload = mock_post.call_args.kwargs["json"]
assert payload["model"] == "qwen2.5:7b-instruct"
assert payload["keep_alive"] == "5m"
assert payload["options"]["num_ctx"] == 4096
assert mock_post.call_args.kwargs["timeout"] == 45
assert payload["options"]["num_predict"] == 512
assert mock_post.call_args.kwargs["timeout"] == 20
assert resp.model == "qwen2.5:7b-instruct"
@@ -272,15 +279,16 @@ def test_111_fallback_keeps_light_model_but_caps_timeout(monkeypatch):
from services import ollama_service as oss
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
svc = oss.OllamaService(host="http://192.168.0.111:11434", model="llama3.2:latest")
with patch("services.ollama_service.requests.post", side_effect=Timeout):
resp = svc.generate("hi", timeout=120, keep_alive="24h")
assert resp.success is False
assert "timeout (45s)" in resp.error
assert "timeout (20s)" in resp.error
def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
@@ -288,8 +296,9 @@ def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "llama3.2:latest")
monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("hermes3:*",))
fake_resp = MagicMock(status_code=200)
@@ -308,4 +317,5 @@ def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
assert payload["model"] == "llama3.2:latest"
assert payload["keep_alive"] == "5m"
assert payload["options"]["num_ctx"] == 4096
assert payload["options"]["num_predict"] == 512
assert resp.model == "llama3.2:latest"