Files
ewoooc/tests/test_ollama_retry_chain.py
OoO ba5fe06b13
Some checks failed
CD Pipeline / deploy (push) Has been cancelled
fix: update ollama primary host
2026-06-18 14:24:55 +08:00

611 lines
26 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
tests/test_ollama_retry_chain.py
─────────────────────────────────────────────────────────────────
Operation Ollama-First v5.0 / Phase 13 補強 — 三主機 retry 鏈驗證
驗證 hotfix e862a90 (generate retry) + 6572d52 (embed retry) 的邏輯:
1. self.host @property lazy resolve
2. generate / generate_embedding 失敗自動 retry 下一台主機(最多 3 次)
3. mark_unhealthy 後下次 self.host 取新主機
4. caller 顯式 host=... 凍結不 retry
5. 三主機都失敗 → return failure不無限迴圈
6. Phase 13 補強OllamaResponse.input_tokens/output_tokens 解析
紀律:不打真實 Ollama全 mock requests.post。
"""
from unittest.mock import patch, MagicMock
import pytest
# ═══════════════════════════════════════════════════════════════════════════
# Fixtures
# ═══════════════════════════════════════════════════════════════════════════
@pytest.fixture(autouse=True)
def _reset_state():
"""每 test 清 unhealthy marks + resolve cache"""
import services.ollama_service as oss
oss._unhealthy_marks.clear()
oss._resolved_host_cache['host'] = None
oss._resolved_host_cache['ts'] = 0
oss._fallback_111_circuit_cache.update({'blocked': False, 'reason': '', 'ts': 0})
oss._embedding_gcp_failure_circuit.update({'blocked_until': 0.0, 'notice_ts': 0.0, 'tried': ()})
yield
oss._unhealthy_marks.clear()
oss._resolved_host_cache['host'] = None
oss._resolved_host_cache['ts'] = 0
oss._fallback_111_circuit_cache.update({'blocked': False, 'reason': '', 'ts': 0})
oss._embedding_gcp_failure_circuit.update({'blocked_until': 0.0, 'notice_ts': 0.0, 'tried': ()})
# ═══════════════════════════════════════════════════════════════════════════
# T1: self.host 改 @property不再凍結 instance
# ═══════════════════════════════════════════════════════════════════════════
def test_self_host_is_lazy_property_not_frozen():
"""OllamaService.host 應為 property每次存取走 resolve_ollama_host()"""
from services.ollama_service import OllamaService
svc = OllamaService()
# property 不在 __init__ 凍結
assert svc._explicit_host is None
# 存取 .host 走 resolve
with patch('services.ollama_service.resolve_ollama_host', return_value='http://test:11434') as mock_resolve:
h1 = svc.host
h2 = svc.host
assert h1 == h2 == 'http://test:11434'
assert mock_resolve.call_count >= 2 # 每次 access 都呼叫
def test_explicit_host_freezes():
"""caller 顯式傳 host → _explicit_host 凍結,不走 lazy resolve"""
from services.ollama_service import OllamaService
svc = OllamaService(host='http://explicit:11434')
assert svc._explicit_host == 'http://explicit:11434'
with patch('services.ollama_service.resolve_ollama_host') as mock_resolve:
h = svc.host
assert h == 'http://explicit:11434'
mock_resolve.assert_not_called()
# ═══════════════════════════════════════════════════════════════════════════
# T2: generate 三主機 retry 鏈(核心 hotfix e862a90
# ═══════════════════════════════════════════════════════════════════════════
def test_generate_retries_on_first_host_timeout():
"""第一台 timeout → mark_unhealthy → 第二台成功"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
# 序列:第一次 timeout第二次成功
fake_ok = MagicMock(status_code=200)
fake_ok.json.return_value = {
'response': 'OK from secondary',
'prompt_eval_count': 100, 'eval_count': 50,
}
call_results = [requests.Timeout('first host'), fake_ok]
# mock host property 回傳兩台不同主機
hosts = ['http://host1:11434', 'http://host2:11434']
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch('services.ollama_service.requests.post', side_effect=call_results) as mock_post:
resp = svc.generate('test prompt')
assert resp.success is True
assert resp.content == 'OK from secondary'
assert resp.input_tokens == 100
assert resp.output_tokens == 50
assert mock_post.call_count == 2 # 第一次 timeout, 第二次成功
def test_generate_all_three_hosts_fail_returns_failure():
"""三主機都失敗 → success=False不無限迴圈"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
hosts = ['http://h1:11434', 'http://h2:11434', 'http://h3:11434']
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('all timeout')) as mock_post:
resp = svc.generate('test')
assert resp.success is False
assert mock_post.call_count == 3 # 試了 3 次
assert 'all 3 hosts failed' in (resp.error or '')
def test_generate_same_host_breaks_loop():
"""resolve 連 3 次回同主機cache 沒過期)→ 不無限迴圈"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
same_host = 'http://stuck:11434'
with patch('services.ollama_service.resolve_ollama_host', return_value=same_host), \
patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('stuck')) as mock_post:
resp = svc.generate('test')
assert resp.success is False
# 第一次試 stuck → 失敗 → 第二次 self.host 還是 stuckcache 還在)→ break
assert mock_post.call_count == 1
def test_generate_forces_final_fallback_when_unhealthy_ttl_expires_mid_request():
"""長 timeout 期間 unhealthy TTL 過期時,第三輪仍應打 111 fallback。"""
import requests
from services import ollama_service as oss
from services.ollama_service import OllamaService
svc = OllamaService()
hosts = [
oss.OLLAMA_HOST_PRIMARY,
oss.OLLAMA_HOST_SECONDARY,
oss.OLLAMA_HOST_PRIMARY, # 模擬 primary unhealthy mark 過期後 resolver 又選回 primary
]
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('all timeout')) as mock_post:
resp = svc.generate('test')
posted_hosts = [call.args[0].split('/api/generate')[0] for call in mock_post.call_args_list]
assert resp.success is False
assert mock_post.call_count == 3
assert posted_hosts == [oss.OLLAMA_HOST_PRIMARY, oss.OLLAMA_HOST_SECONDARY, oss.OLLAMA_HOST_FALLBACK]
assert 'all 3 hosts failed' in (resp.error or '')
def test_generate_uses_secondary_model_fallback_before_111():
"""GCP-B 缺 coder 模型時,先改用 secondary fallback不直接推到 111。"""
from services import ollama_service as oss
from services.ollama_service import OllamaService
svc = OllamaService()
fake_ok = MagicMock(status_code=200)
fake_ok.json.return_value = {
'response': 'OK from secondary fallback',
'prompt_eval_count': 10,
'eval_count': 5,
}
with patch('services.ollama_service.resolve_ollama_host', return_value=oss.OLLAMA_HOST_SECONDARY), \
patch('services.ollama_service.requests.post', return_value=fake_ok) as mock_post:
resp = svc.generate('test prompt', model='qwen2.5-coder:7b')
assert resp.success is True
assert resp.host == oss.OLLAMA_HOST_SECONDARY
assert resp.model == 'gemma3:4b'
payload = mock_post.call_args.kwargs['json']
assert payload['model'] == 'gemma3:4b'
def test_generate_model_404_does_not_mark_host_unhealthy():
"""模型不存在是 model availability不應把整台 GCP-B 標成 unhealthy。"""
from services import ollama_service as oss
from services.ollama_service import OllamaService
svc = OllamaService()
fake_404 = MagicMock(status_code=404, text='{"error":"model not found"}')
fake_ok = MagicMock(status_code=200)
fake_ok.json.return_value = {'response': 'OK', 'prompt_eval_count': 3, 'eval_count': 2}
with patch('services.ollama_service.resolve_ollama_host', side_effect=[
oss.OLLAMA_HOST_SECONDARY,
oss.OLLAMA_HOST_FALLBACK,
]), patch('services.ollama_service.requests.post', side_effect=[fake_404, fake_ok]):
resp = svc.generate('test prompt', model='tiny-missing-model')
assert resp.success is True
assert oss.OLLAMA_HOST_SECONDARY not in oss._unhealthy_marks
def test_generate_can_disable_111_fallback_for_batch_llm_work():
"""批量 LLM 任務只跑 GCP-A/GCP-Bresolver 落到 111 時也不直接空跑。"""
import requests
from services import ollama_service as oss
from services.ollama_service import OllamaService
svc = OllamaService()
hosts = [
oss.OLLAMA_HOST_SECONDARY,
oss.OLLAMA_HOST_FALLBACK,
]
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('secondary timeout')) as mock_post:
resp = svc.generate('test', allow_111_fallback=False)
posted_hosts = [call.args[0].split('/api/generate')[0] for call in mock_post.call_args_list]
assert resp.success is False
assert posted_hosts == [oss.OLLAMA_HOST_SECONDARY, oss.OLLAMA_HOST_PRIMARY]
assert oss.OLLAMA_HOST_FALLBACK not in posted_hosts
assert 'timeout' in (resp.error or '')
def test_generate_skips_111_when_circuit_breaker_blocks_fallback():
"""111 使用率過高時generate 不應再把第三輪送到 111。"""
import requests
from services import ollama_service as oss
from services.ollama_service import OllamaService
svc = OllamaService()
hosts = [
oss.OLLAMA_HOST_PRIMARY,
oss.OLLAMA_HOST_SECONDARY,
oss.OLLAMA_HOST_PRIMARY,
]
def fake_111_circuit(host):
if host == oss.OLLAMA_HOST_FALLBACK:
return True, '111 circuit breaker active'
return False, ''
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch('services.ollama_service._fallback_111_block_reason', side_effect=fake_111_circuit), \
patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('gcp timeout')) as mock_post:
resp = svc.generate('test')
posted_hosts = [call.args[0].split('/api/generate')[0] for call in mock_post.call_args_list]
assert resp.success is False
assert posted_hosts == [oss.OLLAMA_HOST_PRIMARY, oss.OLLAMA_HOST_SECONDARY]
assert oss.OLLAMA_HOST_FALLBACK not in posted_hosts
assert '111 circuit breaker active' in (resp.error or '')
def test_111_circuit_breaker_blocks_when_recent_share_is_high(monkeypatch):
"""ai_calls 顯示 111 占比過高時circuit breaker 回傳 blocked。"""
from services import ollama_service as oss
class FakeResult:
def fetchone(self):
return (100, 12)
class FakeSession:
def execute(self, *args, **kwargs):
return FakeResult()
def close(self):
pass
monkeypatch.setenv('OLLAMA_111_CIRCUIT_CACHE_SEC', '60')
monkeypatch.setenv('OLLAMA_111_CIRCUIT_PCT', '5')
monkeypatch.setenv('OLLAMA_111_CIRCUIT_MIN_TOTAL', '20')
monkeypatch.setenv('OLLAMA_111_CIRCUIT_MIN_111', '5')
monkeypatch.setattr('database.manager.get_session', lambda: FakeSession())
oss._fallback_111_circuit_cache.update({'blocked': False, 'reason': '', 'ts': 0})
blocked, reason = oss._fallback_111_block_reason(oss.OLLAMA_HOST_FALLBACK)
assert blocked is True
assert '111 circuit breaker active' in reason
def test_111_circuit_breaker_fails_open_when_db_is_unavailable(monkeypatch):
"""DB 觀測失敗不可讓 Ollama fallback 全面中斷。"""
from services import ollama_service as oss
monkeypatch.setattr(
'database.manager.get_session',
lambda: (_ for _ in ()).throw(RuntimeError('db down')),
)
oss._fallback_111_circuit_cache.update({'blocked': False, 'reason': '', 'ts': 0})
blocked, reason = oss._fallback_111_block_reason(oss.OLLAMA_HOST_FALLBACK)
assert blocked is False
assert reason == ''
def test_generate_token_parsing_phase13():
"""Phase 13 補強OllamaResponse 解 prompt_eval_count + eval_count"""
from services.ollama_service import OllamaService
svc = OllamaService()
fake_resp = MagicMock(status_code=200)
fake_resp.json.return_value = {
'response': 'hello',
'prompt_eval_count': 250,
'eval_count': 80,
'total_duration': 1500000000, # 1.5s in nanoseconds
}
with patch('services.ollama_service.resolve_ollama_host', return_value='http://x:11434'), \
patch('services.ollama_service.requests.post', return_value=fake_resp):
resp = svc.generate('test')
assert resp.success is True
assert resp.input_tokens == 250
assert resp.output_tokens == 80
assert resp.total_duration == 1.5 # 轉換為秒
# ═══════════════════════════════════════════════════════════════════════════
# T3: generate_embedding 三主機 retryhotfix 6572d52
# ═══════════════════════════════════════════════════════════════════════════
def test_embedding_retries_on_first_host_timeout():
"""embed 第一台 timeout → 第二台成功"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
fake_ok = MagicMock(status_code=200)
fake_ok.json.return_value = {'embeddings': [[0.1, 0.2, 0.3]]}
hosts = ['http://h1:11434', 'http://h2:11434']
call_seq = [requests.Timeout('h1'), fake_ok]
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch.dict('os.environ', {}, clear=False), \
patch('services.ollama_service.requests.post', side_effect=call_seq) as mock_post:
# 確保不走 EMBEDDING_HOST env
import os
os.environ.pop('EMBEDDING_HOST', None)
vec = svc.generate_embedding('test text')
assert vec == [0.1, 0.2, 0.3]
assert mock_post.call_count == 2 # 第二台成功
def test_embedding_explicit_host_no_retry():
"""caller 顯式 host=... → 不 retry失敗即回 []"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
with patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('explicit host fail')) as mock_post:
vec = svc.generate_embedding('test', host='http://explicit:11434')
assert vec == []
# 顯式 host 凍結不 retry → 嘗試一次(/api/embed可能再試 /api/embeddings legacy
# 但 retry 鏈不啟動(沒有 for-loop 迭代不同主機)
# 我們驗post 呼叫次數 ≤ 2最多主路徑+legacy 各一次,不會 retry 不同主機)
assert mock_post.call_count <= 2
def test_embedding_all_three_hosts_fail_returns_empty():
"""embed 三主機都失敗 → 回 []"""
import requests
from services.ollama_service import OllamaService
svc = OllamaService()
hosts = ['http://h1:11434', 'http://h2:11434', 'http://h3:11434']
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch.dict('os.environ', {}, clear=False), \
patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('all fail')) as mock_post:
import os
os.environ.pop('EMBEDDING_HOST', None)
vec = svc.generate_embedding('test')
assert vec == []
# 三主機 retry每次主路徑+legacy = 6 次(但 legacy 也是 timeout
# 實際看 _embed_one 邏輯timeout 的 except 直接 mark_unhealthy 不試 legacy
# 所以是 3 次(主路徑 timeout × 3 主機)
assert mock_post.call_count == 3
def test_embedding_can_disable_111_fallback_for_background_rag_work():
"""背景 embedding/RAG 任務只跑 GCP-A/GCP-B避免 111 承接 bge-m3 長任務。"""
import requests
from services import ollama_service as oss
from services.ollama_service import OllamaService
svc = OllamaService()
hosts = [
oss.OLLAMA_HOST_SECONDARY,
oss.OLLAMA_HOST_FALLBACK,
]
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
patch.dict('os.environ', {}, clear=False), \
patch('services.ollama_service.requests.post',
side_effect=requests.Timeout('secondary timeout')) as mock_post:
import os
os.environ.pop('EMBEDDING_HOST', None)
vec = svc.generate_embedding('test text', allow_111_fallback=False)
posted_hosts = [call.args[0].split('/api/embed')[0] for call in mock_post.call_args_list]
assert vec == []
assert posted_hosts == [oss.OLLAMA_HOST_SECONDARY, oss.OLLAMA_HOST_PRIMARY]
assert oss.OLLAMA_HOST_FALLBACK not in posted_hosts
def test_embedding_fallback_disabled_uses_gcp_chain_when_resolver_returns_111():
"""resolver 若因 unhealthy cache 回 111背景 embedding 仍要嘗試 GCP-A/GCP-B。"""
import requests
from services import ollama_service as oss
from services.ollama_service import OllamaService
svc = OllamaService()
with patch(
'services.ollama_service.resolve_ollama_host',
side_effect=[oss.OLLAMA_HOST_FALLBACK, oss.OLLAMA_HOST_FALLBACK],
), patch.dict('os.environ', {}, clear=False), patch(
'services.ollama_service.requests.post',
side_effect=requests.Timeout('gcp timeout'),
) as mock_post:
import os
os.environ.pop('EMBEDDING_HOST', None)
vec = svc.generate_embedding('test text', allow_111_fallback=False)
posted_hosts = [call.args[0].split('/api/embed')[0] for call in mock_post.call_args_list]
assert vec == []
assert posted_hosts == [oss.OLLAMA_HOST_PRIMARY, oss.OLLAMA_HOST_SECONDARY]
assert oss.OLLAMA_HOST_FALLBACK not in posted_hosts
def test_embedding_fallback_disabled_opens_short_gcp_failure_circuit():
"""GCP-A/GCP-B 全掛時,背景 embedding 短暫熔斷,避免下一筆立刻重打兩台。"""
import requests
from services import ollama_service as oss
from services.ollama_service import OllamaService
svc = OllamaService()
with patch('services.ollama_service.EMBED_GCP_FAILURE_COOLDOWN_SEC', 60), \
patch('services.ollama_service.resolve_ollama_host', side_effect=[
oss.OLLAMA_HOST_PRIMARY,
oss.OLLAMA_HOST_SECONDARY,
oss.OLLAMA_HOST_PRIMARY,
]), \
patch.dict('os.environ', {}, clear=False), \
patch(
'services.ollama_service.requests.post',
side_effect=requests.Timeout('gcp timeout'),
) as mock_post:
import os
os.environ.pop('EMBEDDING_HOST', None)
first = svc.generate_embedding('test text', allow_111_fallback=False)
second = svc.generate_embedding('another text', allow_111_fallback=False)
posted_hosts = [call.args[0].split('/api/embed')[0] for call in mock_post.call_args_list]
assert first == []
assert second == []
assert posted_hosts == [oss.OLLAMA_HOST_PRIMARY, oss.OLLAMA_HOST_SECONDARY]
assert oss._embedding_gcp_failure_circuit['blocked_until'] > 0
assert oss.is_embedding_gcp_circuit_open() is True
assert oss.embedding_gcp_circuit_remaining_seconds() > 0
def test_embedding_health_label_maps_direct_and_proxy_gcp_hosts():
"""host_health skip 要對齊 scheduler 寫入的 host_label。"""
from services import ollama_service as oss
assert oss._host_label_for_embedding_health("http://34.87.90.216:11434") == "Primary (GCP)"
assert oss._host_label_for_embedding_health("http://192.168.0.110:11435") == "Primary (GCP)"
assert oss._host_label_for_embedding_health("http://34.21.145.224:11434") == "Secondary (GCP)"
assert oss._host_label_for_embedding_health("http://192.168.0.110:11436") == "Secondary (GCP)"
assert oss._host_label_for_embedding_health("http://192.168.0.111:11434") == ""
def test_recent_embedding_host_unhealthy_reads_fresh_host_health_probe(monkeypatch):
"""最新 host_health_probes 若顯示 GCP embedding runtime unhealthy背景 embedding 可先跳過。"""
from datetime import datetime
from services import ollama_service as oss
seen_params = []
class FakeResult:
def fetchone(self):
return (False, "EmbedProbe ReadTimeout", datetime.now())
class FakeSession:
def execute(self, _statement, params):
seen_params.append(dict(params))
return FakeResult()
def close(self):
pass
monkeypatch.setenv("OLLAMA_EMBED_HOST_HEALTH_SKIP_ENABLED", "true")
monkeypatch.setenv("OLLAMA_EMBED_HOST_HEALTH_SKIP_WINDOW_MINUTES", "20")
monkeypatch.setattr("database.manager.get_session", lambda: FakeSession())
assert oss._recent_embedding_host_unhealthy(oss.OLLAMA_HOST_SECONDARY) is True
assert seen_params == [
{"host_label": "Secondary (GCP)", "host_url": oss.OLLAMA_HOST_SECONDARY}
]
def test_recent_embedding_host_unhealthy_fails_open_when_db_is_unavailable(monkeypatch):
"""host health 查詢失敗不可阻斷 embedding最多回到原本網路 retry。"""
from services import ollama_service as oss
monkeypatch.setattr(
"database.manager.get_session",
lambda: (_ for _ in ()).throw(RuntimeError("db down")),
)
assert oss._recent_embedding_host_unhealthy(oss.OLLAMA_HOST_SECONDARY) is False
def test_embedding_fallback_disabled_skips_recent_unhealthy_gcp_hosts():
"""背景 embedding 會直接跳過 host_health 最近標成 unhealthy 的 GCP不打 111。"""
from services import ollama_service as oss
from services.ollama_service import OllamaService
svc = OllamaService()
def fake_recent_unhealthy(host):
return host in {oss.OLLAMA_HOST_PRIMARY, oss.OLLAMA_HOST_SECONDARY}
with patch('services.ollama_service.EMBED_GCP_FAILURE_COOLDOWN_SEC', 60), \
patch('services.ollama_service.resolve_ollama_host', side_effect=[
oss.OLLAMA_HOST_PRIMARY,
oss.OLLAMA_HOST_PRIMARY,
oss.OLLAMA_HOST_FALLBACK,
]), \
patch('services.ollama_service._recent_embedding_host_unhealthy', side_effect=fake_recent_unhealthy), \
patch.dict('os.environ', {}, clear=False), \
patch('services.ollama_service.requests.post') as mock_post:
import os
os.environ.pop('EMBEDDING_HOST', None)
vec = svc.generate_embedding('test text', allow_111_fallback=False)
assert vec == []
mock_post.assert_not_called()
assert oss._embedding_gcp_failure_circuit['blocked_until'] > 0
assert oss._embedding_gcp_failure_circuit['tried'] == (
oss.OLLAMA_HOST_PRIMARY,
oss.OLLAMA_HOST_SECONDARY,
)
def test_embedding_ignores_111_embedding_host_when_fallback_disabled():
"""EMBEDDING_HOST 若誤設 111背景 embedding 仍回 GCP resolver不直接棄跑。"""
from services import ollama_service as oss
from services.ollama_service import OllamaService
svc = OllamaService()
fake_ok = MagicMock(status_code=200)
fake_ok.json.return_value = {'embeddings': [[0.7, 0.8]]}
with patch('services.ollama_service.resolve_ollama_host', return_value=oss.OLLAMA_HOST_SECONDARY), \
patch.dict('os.environ', {'EMBEDDING_HOST': oss.OLLAMA_HOST_FALLBACK}, clear=False), \
patch('services.ollama_service.requests.post', return_value=fake_ok) as mock_post:
vec = svc.generate_embedding('test text', allow_111_fallback=False)
posted_hosts = [call.args[0].split('/api/embed')[0] for call in mock_post.call_args_list]
assert vec == [0.7, 0.8]
assert posted_hosts == [oss.OLLAMA_HOST_SECONDARY]
# ═══════════════════════════════════════════════════════════════════════════
# T4: mark_unhealthy 觸發 cache 失效(驗 self.host 取新主機)
# ═══════════════════════════════════════════════════════════════════════════
def test_mark_unhealthy_invalidates_cache_for_next_host():
"""generate 失敗 mark_unhealthy → 下次 self.host 走 resolve 取新主機"""
import services.ollama_service as oss
from services.ollama_service import OllamaService
svc = OllamaService()
# 先 cache 一個主機
oss._resolved_host_cache['host'] = 'http://primary:11434'
oss._resolved_host_cache['ts'] = __import__('time').time()
# mark_unhealthy 應清空 cache
oss.mark_unhealthy('http://primary:11434')
assert oss._resolved_host_cache['host'] is None