611 lines
26 KiB
Python
611 lines
26 KiB
Python
"""
|
||
tests/test_ollama_retry_chain.py
|
||
─────────────────────────────────────────────────────────────────
|
||
Operation Ollama-First v5.0 / Phase 13 補強 — 三主機 retry 鏈驗證
|
||
|
||
驗證 hotfix e862a90 (generate retry) + 6572d52 (embed retry) 的邏輯:
|
||
1. self.host @property lazy resolve
|
||
2. generate / generate_embedding 失敗自動 retry 下一台主機(最多 3 次)
|
||
3. mark_unhealthy 後下次 self.host 取新主機
|
||
4. caller 顯式 host=... 凍結不 retry
|
||
5. 三主機都失敗 → return failure(不無限迴圈)
|
||
6. Phase 13 補強:OllamaResponse.input_tokens/output_tokens 解析
|
||
|
||
紀律:不打真實 Ollama,全 mock requests.post。
|
||
"""
|
||
|
||
from unittest.mock import patch, MagicMock
|
||
|
||
import pytest
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# Fixtures
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
@pytest.fixture(autouse=True)
|
||
def _reset_state():
|
||
"""每 test 清 unhealthy marks + resolve cache"""
|
||
import services.ollama_service as oss
|
||
oss._unhealthy_marks.clear()
|
||
oss._resolved_host_cache['host'] = None
|
||
oss._resolved_host_cache['ts'] = 0
|
||
oss._fallback_111_circuit_cache.update({'blocked': False, 'reason': '', 'ts': 0})
|
||
oss._embedding_gcp_failure_circuit.update({'blocked_until': 0.0, 'notice_ts': 0.0, 'tried': ()})
|
||
yield
|
||
oss._unhealthy_marks.clear()
|
||
oss._resolved_host_cache['host'] = None
|
||
oss._resolved_host_cache['ts'] = 0
|
||
oss._fallback_111_circuit_cache.update({'blocked': False, 'reason': '', 'ts': 0})
|
||
oss._embedding_gcp_failure_circuit.update({'blocked_until': 0.0, 'notice_ts': 0.0, 'tried': ()})
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# T1: self.host 改 @property(不再凍結 instance)
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_self_host_is_lazy_property_not_frozen():
|
||
"""OllamaService.host 應為 property,每次存取走 resolve_ollama_host()"""
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
# property 不在 __init__ 凍結
|
||
assert svc._explicit_host is None
|
||
|
||
# 存取 .host 走 resolve
|
||
with patch('services.ollama_service.resolve_ollama_host', return_value='http://test:11434') as mock_resolve:
|
||
h1 = svc.host
|
||
h2 = svc.host
|
||
assert h1 == h2 == 'http://test:11434'
|
||
assert mock_resolve.call_count >= 2 # 每次 access 都呼叫
|
||
|
||
|
||
def test_explicit_host_freezes():
|
||
"""caller 顯式傳 host → _explicit_host 凍結,不走 lazy resolve"""
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService(host='http://explicit:11434')
|
||
assert svc._explicit_host == 'http://explicit:11434'
|
||
|
||
with patch('services.ollama_service.resolve_ollama_host') as mock_resolve:
|
||
h = svc.host
|
||
assert h == 'http://explicit:11434'
|
||
mock_resolve.assert_not_called()
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# T2: generate 三主機 retry 鏈(核心 hotfix e862a90)
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_generate_retries_on_first_host_timeout():
|
||
"""第一台 timeout → mark_unhealthy → 第二台成功"""
|
||
import requests
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
|
||
# 序列:第一次 timeout,第二次成功
|
||
fake_ok = MagicMock(status_code=200)
|
||
fake_ok.json.return_value = {
|
||
'response': 'OK from secondary',
|
||
'prompt_eval_count': 100, 'eval_count': 50,
|
||
}
|
||
call_results = [requests.Timeout('first host'), fake_ok]
|
||
|
||
# mock host property 回傳兩台不同主機
|
||
hosts = ['http://host1:11434', 'http://host2:11434']
|
||
|
||
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
|
||
patch('services.ollama_service.requests.post', side_effect=call_results) as mock_post:
|
||
resp = svc.generate('test prompt')
|
||
|
||
assert resp.success is True
|
||
assert resp.content == 'OK from secondary'
|
||
assert resp.input_tokens == 100
|
||
assert resp.output_tokens == 50
|
||
assert mock_post.call_count == 2 # 第一次 timeout, 第二次成功
|
||
|
||
|
||
def test_generate_all_three_hosts_fail_returns_failure():
|
||
"""三主機都失敗 → success=False,不無限迴圈"""
|
||
import requests
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
hosts = ['http://h1:11434', 'http://h2:11434', 'http://h3:11434']
|
||
|
||
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
|
||
patch('services.ollama_service.requests.post',
|
||
side_effect=requests.Timeout('all timeout')) as mock_post:
|
||
resp = svc.generate('test')
|
||
|
||
assert resp.success is False
|
||
assert mock_post.call_count == 3 # 試了 3 次
|
||
assert 'all 3 hosts failed' in (resp.error or '')
|
||
|
||
|
||
def test_generate_same_host_breaks_loop():
|
||
"""resolve 連 3 次回同主機(cache 沒過期)→ 不無限迴圈"""
|
||
import requests
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
same_host = 'http://stuck:11434'
|
||
|
||
with patch('services.ollama_service.resolve_ollama_host', return_value=same_host), \
|
||
patch('services.ollama_service.requests.post',
|
||
side_effect=requests.Timeout('stuck')) as mock_post:
|
||
resp = svc.generate('test')
|
||
|
||
assert resp.success is False
|
||
# 第一次試 stuck → 失敗 → 第二次 self.host 還是 stuck(cache 還在)→ break
|
||
assert mock_post.call_count == 1
|
||
|
||
|
||
def test_generate_forces_final_fallback_when_unhealthy_ttl_expires_mid_request():
|
||
"""長 timeout 期間 unhealthy TTL 過期時,第三輪仍應打 111 fallback。"""
|
||
import requests
|
||
from services import ollama_service as oss
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
hosts = [
|
||
oss.OLLAMA_HOST_PRIMARY,
|
||
oss.OLLAMA_HOST_SECONDARY,
|
||
oss.OLLAMA_HOST_PRIMARY, # 模擬 primary unhealthy mark 過期後 resolver 又選回 primary
|
||
]
|
||
|
||
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
|
||
patch('services.ollama_service.requests.post',
|
||
side_effect=requests.Timeout('all timeout')) as mock_post:
|
||
resp = svc.generate('test')
|
||
|
||
posted_hosts = [call.args[0].split('/api/generate')[0] for call in mock_post.call_args_list]
|
||
assert resp.success is False
|
||
assert mock_post.call_count == 3
|
||
assert posted_hosts == [oss.OLLAMA_HOST_PRIMARY, oss.OLLAMA_HOST_SECONDARY, oss.OLLAMA_HOST_FALLBACK]
|
||
assert 'all 3 hosts failed' in (resp.error or '')
|
||
|
||
|
||
def test_generate_uses_secondary_model_fallback_before_111():
|
||
"""GCP-B 缺 coder 模型時,先改用 secondary fallback,不直接推到 111。"""
|
||
from services import ollama_service as oss
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
fake_ok = MagicMock(status_code=200)
|
||
fake_ok.json.return_value = {
|
||
'response': 'OK from secondary fallback',
|
||
'prompt_eval_count': 10,
|
||
'eval_count': 5,
|
||
}
|
||
|
||
with patch('services.ollama_service.resolve_ollama_host', return_value=oss.OLLAMA_HOST_SECONDARY), \
|
||
patch('services.ollama_service.requests.post', return_value=fake_ok) as mock_post:
|
||
resp = svc.generate('test prompt', model='qwen2.5-coder:7b')
|
||
|
||
assert resp.success is True
|
||
assert resp.host == oss.OLLAMA_HOST_SECONDARY
|
||
assert resp.model == 'gemma3:4b'
|
||
payload = mock_post.call_args.kwargs['json']
|
||
assert payload['model'] == 'gemma3:4b'
|
||
|
||
|
||
def test_generate_model_404_does_not_mark_host_unhealthy():
|
||
"""模型不存在是 model availability,不應把整台 GCP-B 標成 unhealthy。"""
|
||
from services import ollama_service as oss
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
fake_404 = MagicMock(status_code=404, text='{"error":"model not found"}')
|
||
fake_ok = MagicMock(status_code=200)
|
||
fake_ok.json.return_value = {'response': 'OK', 'prompt_eval_count': 3, 'eval_count': 2}
|
||
|
||
with patch('services.ollama_service.resolve_ollama_host', side_effect=[
|
||
oss.OLLAMA_HOST_SECONDARY,
|
||
oss.OLLAMA_HOST_FALLBACK,
|
||
]), patch('services.ollama_service.requests.post', side_effect=[fake_404, fake_ok]):
|
||
resp = svc.generate('test prompt', model='tiny-missing-model')
|
||
|
||
assert resp.success is True
|
||
assert oss.OLLAMA_HOST_SECONDARY not in oss._unhealthy_marks
|
||
|
||
|
||
def test_generate_can_disable_111_fallback_for_batch_llm_work():
|
||
"""批量 LLM 任務只跑 GCP-A/GCP-B,resolver 落到 111 時也不直接空跑。"""
|
||
import requests
|
||
from services import ollama_service as oss
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
hosts = [
|
||
oss.OLLAMA_HOST_SECONDARY,
|
||
oss.OLLAMA_HOST_FALLBACK,
|
||
]
|
||
|
||
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
|
||
patch('services.ollama_service.requests.post',
|
||
side_effect=requests.Timeout('secondary timeout')) as mock_post:
|
||
resp = svc.generate('test', allow_111_fallback=False)
|
||
|
||
posted_hosts = [call.args[0].split('/api/generate')[0] for call in mock_post.call_args_list]
|
||
assert resp.success is False
|
||
assert posted_hosts == [oss.OLLAMA_HOST_SECONDARY, oss.OLLAMA_HOST_PRIMARY]
|
||
assert oss.OLLAMA_HOST_FALLBACK not in posted_hosts
|
||
assert 'timeout' in (resp.error or '')
|
||
|
||
|
||
def test_generate_skips_111_when_circuit_breaker_blocks_fallback():
|
||
"""111 使用率過高時,generate 不應再把第三輪送到 111。"""
|
||
import requests
|
||
from services import ollama_service as oss
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
hosts = [
|
||
oss.OLLAMA_HOST_PRIMARY,
|
||
oss.OLLAMA_HOST_SECONDARY,
|
||
oss.OLLAMA_HOST_PRIMARY,
|
||
]
|
||
|
||
def fake_111_circuit(host):
|
||
if host == oss.OLLAMA_HOST_FALLBACK:
|
||
return True, '111 circuit breaker active'
|
||
return False, ''
|
||
|
||
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
|
||
patch('services.ollama_service._fallback_111_block_reason', side_effect=fake_111_circuit), \
|
||
patch('services.ollama_service.requests.post',
|
||
side_effect=requests.Timeout('gcp timeout')) as mock_post:
|
||
resp = svc.generate('test')
|
||
|
||
posted_hosts = [call.args[0].split('/api/generate')[0] for call in mock_post.call_args_list]
|
||
assert resp.success is False
|
||
assert posted_hosts == [oss.OLLAMA_HOST_PRIMARY, oss.OLLAMA_HOST_SECONDARY]
|
||
assert oss.OLLAMA_HOST_FALLBACK not in posted_hosts
|
||
assert '111 circuit breaker active' in (resp.error or '')
|
||
|
||
|
||
def test_111_circuit_breaker_blocks_when_recent_share_is_high(monkeypatch):
|
||
"""ai_calls 顯示 111 占比過高時,circuit breaker 回傳 blocked。"""
|
||
from services import ollama_service as oss
|
||
|
||
class FakeResult:
|
||
def fetchone(self):
|
||
return (100, 12)
|
||
|
||
class FakeSession:
|
||
def execute(self, *args, **kwargs):
|
||
return FakeResult()
|
||
|
||
def close(self):
|
||
pass
|
||
|
||
monkeypatch.setenv('OLLAMA_111_CIRCUIT_CACHE_SEC', '60')
|
||
monkeypatch.setenv('OLLAMA_111_CIRCUIT_PCT', '5')
|
||
monkeypatch.setenv('OLLAMA_111_CIRCUIT_MIN_TOTAL', '20')
|
||
monkeypatch.setenv('OLLAMA_111_CIRCUIT_MIN_111', '5')
|
||
monkeypatch.setattr('database.manager.get_session', lambda: FakeSession())
|
||
oss._fallback_111_circuit_cache.update({'blocked': False, 'reason': '', 'ts': 0})
|
||
|
||
blocked, reason = oss._fallback_111_block_reason(oss.OLLAMA_HOST_FALLBACK)
|
||
|
||
assert blocked is True
|
||
assert '111 circuit breaker active' in reason
|
||
|
||
|
||
def test_111_circuit_breaker_fails_open_when_db_is_unavailable(monkeypatch):
|
||
"""DB 觀測失敗不可讓 Ollama fallback 全面中斷。"""
|
||
from services import ollama_service as oss
|
||
|
||
monkeypatch.setattr(
|
||
'database.manager.get_session',
|
||
lambda: (_ for _ in ()).throw(RuntimeError('db down')),
|
||
)
|
||
oss._fallback_111_circuit_cache.update({'blocked': False, 'reason': '', 'ts': 0})
|
||
|
||
blocked, reason = oss._fallback_111_block_reason(oss.OLLAMA_HOST_FALLBACK)
|
||
|
||
assert blocked is False
|
||
assert reason == ''
|
||
|
||
|
||
def test_generate_token_parsing_phase13():
|
||
"""Phase 13 補強:OllamaResponse 解 prompt_eval_count + eval_count"""
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
fake_resp = MagicMock(status_code=200)
|
||
fake_resp.json.return_value = {
|
||
'response': 'hello',
|
||
'prompt_eval_count': 250,
|
||
'eval_count': 80,
|
||
'total_duration': 1500000000, # 1.5s in nanoseconds
|
||
}
|
||
|
||
with patch('services.ollama_service.resolve_ollama_host', return_value='http://x:11434'), \
|
||
patch('services.ollama_service.requests.post', return_value=fake_resp):
|
||
resp = svc.generate('test')
|
||
|
||
assert resp.success is True
|
||
assert resp.input_tokens == 250
|
||
assert resp.output_tokens == 80
|
||
assert resp.total_duration == 1.5 # 轉換為秒
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# T3: generate_embedding 三主機 retry(hotfix 6572d52)
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_embedding_retries_on_first_host_timeout():
|
||
"""embed 第一台 timeout → 第二台成功"""
|
||
import requests
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
fake_ok = MagicMock(status_code=200)
|
||
fake_ok.json.return_value = {'embeddings': [[0.1, 0.2, 0.3]]}
|
||
|
||
hosts = ['http://h1:11434', 'http://h2:11434']
|
||
call_seq = [requests.Timeout('h1'), fake_ok]
|
||
|
||
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
|
||
patch.dict('os.environ', {}, clear=False), \
|
||
patch('services.ollama_service.requests.post', side_effect=call_seq) as mock_post:
|
||
# 確保不走 EMBEDDING_HOST env
|
||
import os
|
||
os.environ.pop('EMBEDDING_HOST', None)
|
||
vec = svc.generate_embedding('test text')
|
||
|
||
assert vec == [0.1, 0.2, 0.3]
|
||
assert mock_post.call_count == 2 # 第二台成功
|
||
|
||
|
||
def test_embedding_explicit_host_no_retry():
|
||
"""caller 顯式 host=... → 不 retry,失敗即回 []"""
|
||
import requests
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
|
||
with patch('services.ollama_service.requests.post',
|
||
side_effect=requests.Timeout('explicit host fail')) as mock_post:
|
||
vec = svc.generate_embedding('test', host='http://explicit:11434')
|
||
|
||
assert vec == []
|
||
# 顯式 host 凍結不 retry → 嘗試一次(/api/embed),可能再試 /api/embeddings legacy
|
||
# 但 retry 鏈不啟動(沒有 for-loop 迭代不同主機)
|
||
# 我們驗:post 呼叫次數 ≤ 2(最多主路徑+legacy 各一次,不會 retry 不同主機)
|
||
assert mock_post.call_count <= 2
|
||
|
||
|
||
def test_embedding_all_three_hosts_fail_returns_empty():
|
||
"""embed 三主機都失敗 → 回 []"""
|
||
import requests
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
hosts = ['http://h1:11434', 'http://h2:11434', 'http://h3:11434']
|
||
|
||
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
|
||
patch.dict('os.environ', {}, clear=False), \
|
||
patch('services.ollama_service.requests.post',
|
||
side_effect=requests.Timeout('all fail')) as mock_post:
|
||
import os
|
||
os.environ.pop('EMBEDDING_HOST', None)
|
||
vec = svc.generate_embedding('test')
|
||
|
||
assert vec == []
|
||
# 三主機 retry,每次主路徑+legacy = 6 次(但 legacy 也是 timeout)
|
||
# 實際看 _embed_one 邏輯:timeout 的 except 直接 mark_unhealthy 不試 legacy
|
||
# 所以是 3 次(主路徑 timeout × 3 主機)
|
||
assert mock_post.call_count == 3
|
||
|
||
|
||
def test_embedding_can_disable_111_fallback_for_background_rag_work():
|
||
"""背景 embedding/RAG 任務只跑 GCP-A/GCP-B,避免 111 承接 bge-m3 長任務。"""
|
||
import requests
|
||
from services import ollama_service as oss
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
hosts = [
|
||
oss.OLLAMA_HOST_SECONDARY,
|
||
oss.OLLAMA_HOST_FALLBACK,
|
||
]
|
||
|
||
with patch('services.ollama_service.resolve_ollama_host', side_effect=hosts), \
|
||
patch.dict('os.environ', {}, clear=False), \
|
||
patch('services.ollama_service.requests.post',
|
||
side_effect=requests.Timeout('secondary timeout')) as mock_post:
|
||
import os
|
||
os.environ.pop('EMBEDDING_HOST', None)
|
||
vec = svc.generate_embedding('test text', allow_111_fallback=False)
|
||
|
||
posted_hosts = [call.args[0].split('/api/embed')[0] for call in mock_post.call_args_list]
|
||
assert vec == []
|
||
assert posted_hosts == [oss.OLLAMA_HOST_SECONDARY, oss.OLLAMA_HOST_PRIMARY]
|
||
assert oss.OLLAMA_HOST_FALLBACK not in posted_hosts
|
||
|
||
|
||
def test_embedding_fallback_disabled_uses_gcp_chain_when_resolver_returns_111():
|
||
"""resolver 若因 unhealthy cache 回 111,背景 embedding 仍要嘗試 GCP-A/GCP-B。"""
|
||
import requests
|
||
from services import ollama_service as oss
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
|
||
with patch(
|
||
'services.ollama_service.resolve_ollama_host',
|
||
side_effect=[oss.OLLAMA_HOST_FALLBACK, oss.OLLAMA_HOST_FALLBACK],
|
||
), patch.dict('os.environ', {}, clear=False), patch(
|
||
'services.ollama_service.requests.post',
|
||
side_effect=requests.Timeout('gcp timeout'),
|
||
) as mock_post:
|
||
import os
|
||
os.environ.pop('EMBEDDING_HOST', None)
|
||
vec = svc.generate_embedding('test text', allow_111_fallback=False)
|
||
|
||
posted_hosts = [call.args[0].split('/api/embed')[0] for call in mock_post.call_args_list]
|
||
assert vec == []
|
||
assert posted_hosts == [oss.OLLAMA_HOST_PRIMARY, oss.OLLAMA_HOST_SECONDARY]
|
||
assert oss.OLLAMA_HOST_FALLBACK not in posted_hosts
|
||
|
||
|
||
def test_embedding_fallback_disabled_opens_short_gcp_failure_circuit():
|
||
"""GCP-A/GCP-B 全掛時,背景 embedding 短暫熔斷,避免下一筆立刻重打兩台。"""
|
||
import requests
|
||
from services import ollama_service as oss
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
|
||
with patch('services.ollama_service.EMBED_GCP_FAILURE_COOLDOWN_SEC', 60), \
|
||
patch('services.ollama_service.resolve_ollama_host', side_effect=[
|
||
oss.OLLAMA_HOST_PRIMARY,
|
||
oss.OLLAMA_HOST_SECONDARY,
|
||
oss.OLLAMA_HOST_PRIMARY,
|
||
]), \
|
||
patch.dict('os.environ', {}, clear=False), \
|
||
patch(
|
||
'services.ollama_service.requests.post',
|
||
side_effect=requests.Timeout('gcp timeout'),
|
||
) as mock_post:
|
||
import os
|
||
os.environ.pop('EMBEDDING_HOST', None)
|
||
first = svc.generate_embedding('test text', allow_111_fallback=False)
|
||
second = svc.generate_embedding('another text', allow_111_fallback=False)
|
||
|
||
posted_hosts = [call.args[0].split('/api/embed')[0] for call in mock_post.call_args_list]
|
||
assert first == []
|
||
assert second == []
|
||
assert posted_hosts == [oss.OLLAMA_HOST_PRIMARY, oss.OLLAMA_HOST_SECONDARY]
|
||
assert oss._embedding_gcp_failure_circuit['blocked_until'] > 0
|
||
assert oss.is_embedding_gcp_circuit_open() is True
|
||
assert oss.embedding_gcp_circuit_remaining_seconds() > 0
|
||
|
||
|
||
def test_embedding_health_label_maps_direct_and_proxy_gcp_hosts():
|
||
"""host_health skip 要對齊 scheduler 寫入的 host_label。"""
|
||
from services import ollama_service as oss
|
||
|
||
assert oss._host_label_for_embedding_health("http://34.87.90.216:11434") == "Primary (GCP)"
|
||
assert oss._host_label_for_embedding_health("http://192.168.0.110:11435") == "Primary (GCP)"
|
||
assert oss._host_label_for_embedding_health("http://34.21.145.224:11434") == "Secondary (GCP)"
|
||
assert oss._host_label_for_embedding_health("http://192.168.0.110:11436") == "Secondary (GCP)"
|
||
assert oss._host_label_for_embedding_health("http://192.168.0.111:11434") == ""
|
||
|
||
|
||
def test_recent_embedding_host_unhealthy_reads_fresh_host_health_probe(monkeypatch):
|
||
"""最新 host_health_probes 若顯示 GCP embedding runtime unhealthy,背景 embedding 可先跳過。"""
|
||
from datetime import datetime
|
||
from services import ollama_service as oss
|
||
|
||
seen_params = []
|
||
|
||
class FakeResult:
|
||
def fetchone(self):
|
||
return (False, "EmbedProbe ReadTimeout", datetime.now())
|
||
|
||
class FakeSession:
|
||
def execute(self, _statement, params):
|
||
seen_params.append(dict(params))
|
||
return FakeResult()
|
||
|
||
def close(self):
|
||
pass
|
||
|
||
monkeypatch.setenv("OLLAMA_EMBED_HOST_HEALTH_SKIP_ENABLED", "true")
|
||
monkeypatch.setenv("OLLAMA_EMBED_HOST_HEALTH_SKIP_WINDOW_MINUTES", "20")
|
||
monkeypatch.setattr("database.manager.get_session", lambda: FakeSession())
|
||
|
||
assert oss._recent_embedding_host_unhealthy(oss.OLLAMA_HOST_SECONDARY) is True
|
||
assert seen_params == [
|
||
{"host_label": "Secondary (GCP)", "host_url": oss.OLLAMA_HOST_SECONDARY}
|
||
]
|
||
|
||
|
||
def test_recent_embedding_host_unhealthy_fails_open_when_db_is_unavailable(monkeypatch):
|
||
"""host health 查詢失敗不可阻斷 embedding;最多回到原本網路 retry。"""
|
||
from services import ollama_service as oss
|
||
|
||
monkeypatch.setattr(
|
||
"database.manager.get_session",
|
||
lambda: (_ for _ in ()).throw(RuntimeError("db down")),
|
||
)
|
||
|
||
assert oss._recent_embedding_host_unhealthy(oss.OLLAMA_HOST_SECONDARY) is False
|
||
|
||
|
||
def test_embedding_fallback_disabled_skips_recent_unhealthy_gcp_hosts():
|
||
"""背景 embedding 會直接跳過 host_health 最近標成 unhealthy 的 GCP,不打 111。"""
|
||
from services import ollama_service as oss
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
|
||
def fake_recent_unhealthy(host):
|
||
return host in {oss.OLLAMA_HOST_PRIMARY, oss.OLLAMA_HOST_SECONDARY}
|
||
|
||
with patch('services.ollama_service.EMBED_GCP_FAILURE_COOLDOWN_SEC', 60), \
|
||
patch('services.ollama_service.resolve_ollama_host', side_effect=[
|
||
oss.OLLAMA_HOST_PRIMARY,
|
||
oss.OLLAMA_HOST_PRIMARY,
|
||
oss.OLLAMA_HOST_FALLBACK,
|
||
]), \
|
||
patch('services.ollama_service._recent_embedding_host_unhealthy', side_effect=fake_recent_unhealthy), \
|
||
patch.dict('os.environ', {}, clear=False), \
|
||
patch('services.ollama_service.requests.post') as mock_post:
|
||
import os
|
||
os.environ.pop('EMBEDDING_HOST', None)
|
||
vec = svc.generate_embedding('test text', allow_111_fallback=False)
|
||
|
||
assert vec == []
|
||
mock_post.assert_not_called()
|
||
assert oss._embedding_gcp_failure_circuit['blocked_until'] > 0
|
||
assert oss._embedding_gcp_failure_circuit['tried'] == (
|
||
oss.OLLAMA_HOST_PRIMARY,
|
||
oss.OLLAMA_HOST_SECONDARY,
|
||
)
|
||
|
||
|
||
def test_embedding_ignores_111_embedding_host_when_fallback_disabled():
|
||
"""EMBEDDING_HOST 若誤設 111,背景 embedding 仍回 GCP resolver,不直接棄跑。"""
|
||
from services import ollama_service as oss
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
fake_ok = MagicMock(status_code=200)
|
||
fake_ok.json.return_value = {'embeddings': [[0.7, 0.8]]}
|
||
|
||
with patch('services.ollama_service.resolve_ollama_host', return_value=oss.OLLAMA_HOST_SECONDARY), \
|
||
patch.dict('os.environ', {'EMBEDDING_HOST': oss.OLLAMA_HOST_FALLBACK}, clear=False), \
|
||
patch('services.ollama_service.requests.post', return_value=fake_ok) as mock_post:
|
||
vec = svc.generate_embedding('test text', allow_111_fallback=False)
|
||
|
||
posted_hosts = [call.args[0].split('/api/embed')[0] for call in mock_post.call_args_list]
|
||
assert vec == [0.7, 0.8]
|
||
assert posted_hosts == [oss.OLLAMA_HOST_SECONDARY]
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
# T4: mark_unhealthy 觸發 cache 失效(驗 self.host 取新主機)
|
||
# ═══════════════════════════════════════════════════════════════════════════
|
||
|
||
def test_mark_unhealthy_invalidates_cache_for_next_host():
|
||
"""generate 失敗 mark_unhealthy → 下次 self.host 走 resolve 取新主機"""
|
||
import services.ollama_service as oss
|
||
from services.ollama_service import OllamaService
|
||
|
||
svc = OllamaService()
|
||
|
||
# 先 cache 一個主機
|
||
oss._resolved_host_cache['host'] = 'http://primary:11434'
|
||
oss._resolved_host_cache['ts'] = __import__('time').time()
|
||
|
||
# mark_unhealthy 應清空 cache
|
||
oss.mark_unhealthy('http://primary:11434')
|
||
|
||
assert oss._resolved_host_cache['host'] is None
|