ewoooc/tests/test_ollama_resolve.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
ADR-027 Phase 2 — Ollama host resolve / health probe / unhealthy mark 驗證

涵蓋：
  B3 — _is_reachable HTTP probe（取代純 TCP）
  B4 — mark_unhealthy / _is_unhealthy / cache 失效互動
  B1/B2 — config.get_ollama_host / get_embedding_host / get_hermes_url lazy 行為
"""

import os
import time
import importlib
from unittest.mock import patch, MagicMock

import pytest


# ─────────────────────────────────────────────────────────────────────────────
# 必要：每個 test 前清空 ollama_service 的 module-level cache，避免 cross-test 污染
# ─────────────────────────────────────────────────────────────────────────────
@pytest.fixture(autouse=True)
def _reset_ollama_caches():
    from services import ollama_service as oss
    oss._resolved_host_cache['host'] = None
    oss._resolved_host_cache['ts'] = 0
    oss._unhealthy_marks.clear()
    yield
    oss._resolved_host_cache['host'] = None
    oss._resolved_host_cache['ts'] = 0
    oss._unhealthy_marks.clear()


# ═══════════════════════════════════════════════════════════════════════════
# B3 — HTTP probe 取代 TCP
# ═══════════════════════════════════════════════════════════════════════════

def test_resolve_picks_primary_when_http_200():
    from services import ollama_service as oss
    fake_resp = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp) as mock_get:
        host = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    assert host == 'http://primary.example:11434'
    # 驗證有打 /api/version
    called_url = mock_get.call_args[0][0]
    assert called_url == 'http://primary.example:11434/api/version'


def test_resolve_falls_back_when_http_500():
    """B3 關鍵：純 TCP 通但 HTTP 500（process 卡死）也視為 unreachable"""
    from services import ollama_service as oss
    fake_resp = MagicMock(status_code=500)
    with patch('services.ollama_service.requests.get', return_value=fake_resp):
        host = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    assert host == 'http://fallback.example:11434'


def test_resolve_falls_back_on_request_exception():
    from services import ollama_service as oss
    with patch('services.ollama_service.requests.get', side_effect=Exception('connect timeout')):
        host = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    assert host == 'http://fallback.example:11434'


# ═══════════════════════════════════════════════════════════════════════════
# B4 — mark_unhealthy 行為
# ═══════════════════════════════════════════════════════════════════════════

def test_mark_unhealthy_triggers_immediate_fallback():
    """B4: primary 標 unhealthy 後，三主機架構下應跳到 secondary（非 fallback）"""
    from services import ollama_service as oss
    oss.mark_unhealthy('http://primary.example:11434')
    # 即使 HTTP 探測會回 200，被標 unhealthy 的主機仍應跳過
    fake_resp = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp):
        host = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    # 三主機架構：primary 不可用 → secondary（非 fallback）
    assert host == 'http://secondary.example:11434'


def test_mark_unhealthy_primary_and_secondary_falls_to_fallback():
    """B4 三主機級聯：primary+secondary 都掛才走 fallback"""
    from services import ollama_service as oss
    oss.mark_unhealthy('http://primary.example:11434')
    oss.mark_unhealthy('http://secondary.example:11434')
    fake_resp = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp):
        host = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    assert host == 'http://fallback.example:11434'


def test_unhealthy_mark_expires_after_ttl():
    from services import ollama_service as oss
    oss.mark_unhealthy('http://primary.example:11434')
    # 偽造時間 31 秒前打的標
    oss._unhealthy_marks['http://primary.example:11434'] = time.time() - 31
    fake_resp = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp):
        host = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    assert host == 'http://primary.example:11434'


def test_mark_unhealthy_invalidates_resolved_cache():
    """B4 重點：被標 unhealthy 後，舊 cache 不能再返回 primary"""
    from services import ollama_service as oss
    # 先讓 primary 被 cache
    fake_resp_ok = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp_ok):
        first = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    assert first == 'http://primary.example:11434'

    # 模擬 generate 失敗，標 unhealthy
    oss.mark_unhealthy('http://primary.example:11434')

    # 即使 cache 還在 TTL 內（120s），下一次 resolve 必須跳過 primary
    # 三主機架構下應跳到 secondary
    fake_resp_again = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp_again):
        second = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    assert second == 'http://secondary.example:11434'


def test_mark_unhealthy_handles_trailing_slash():
    from services import ollama_service as oss
    oss.mark_unhealthy('http://primary.example:11434/')
    assert oss._is_unhealthy('http://primary.example:11434') is True
    assert oss._is_unhealthy('http://primary.example:11434/') is True


def test_mark_unhealthy_ignores_empty():
    from services import ollama_service as oss
    oss.mark_unhealthy('')
    oss.mark_unhealthy(None)
    assert len(oss._unhealthy_marks) == 0


# ═══════════════════════════════════════════════════════════════════════════
# B1/B2 — config lazy getters
# ═══════════════════════════════════════════════════════════════════════════

def test_get_ollama_host_uses_approved_env_when_set(monkeypatch):
    monkeypatch.setenv('OLLAMA_HOST', 'http://34.21.145.224:11434')
    import config
    importlib.reload(config)  # 確保 env 變更生效
    assert config.get_ollama_host() == 'http://34.21.145.224:11434'


def test_get_ollama_host_rejects_unapproved_env(monkeypatch):
    monkeypatch.setenv('OLLAMA_HOST', 'http://192.168.0.188:11434')
    fake_resp = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp):
        import config
        importlib.reload(config)
        host = config.get_ollama_host()
    assert host == 'http://34.143.170.20:11434'


def test_get_ollama_host_falls_back_to_resolve_without_env(monkeypatch):
    monkeypatch.delenv('OLLAMA_HOST', raising=False)
    fake_resp = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp):
        # reload to ensure env is honored
        import config
        importlib.reload(config)
        host = config.get_ollama_host()
    # primary URL 由 env OLLAMA_HOST_PRIMARY 控制（預設 GCP-SSD 34.143.170.20）
    assert host.startswith('http://')


def test_get_embedding_host_prefers_env(monkeypatch):
    monkeypatch.setenv('EMBEDDING_HOST', 'http://192.168.0.111:11434')
    import config
    importlib.reload(config)
    assert config.get_embedding_host() == 'http://192.168.0.111:11434'


def test_get_hermes_url_prefers_env(monkeypatch):
    monkeypatch.setenv('HERMES_URL', 'http://34.143.170.20:11434')
    import config
    importlib.reload(config)
    assert config.get_hermes_url() == 'http://34.143.170.20:11434'


# ═══════════════════════════════════════════════════════════════════════════
# Cache TTL 行為
# ═══════════════════════════════════════════════════════════════════════════

def test_resolve_uses_cache_within_ttl():
    """同一 TTL 內第二次 resolve 不應再呼叫 HTTP"""
    from services import ollama_service as oss
    fake_resp = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp) as mock_get:
        oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
        oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    # cache 命中 → 第二次不打 HTTP
    assert mock_get.call_count == 1


def test_111_fallback_downgrades_heavy_model_and_shortens_keep_alive(monkeypatch):
    """111 是 final fallback；14B+ 模型不得長駐這台 16GB Mac。"""
    from services import ollama_service as oss

    monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "qwen2.5:7b-instruct")
    monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
    monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
    monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
    monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("qwen3:14b",))

    fake_resp = MagicMock(status_code=200)
    fake_resp.json.return_value = {
        "response": "ok",
        "prompt_eval_count": 3,
        "eval_count": 2,
        "total_duration": 1_000_000_000,
    }
    svc = oss.OllamaService(host="http://192.168.0.111:11434", model="qwen3:14b")

    with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
        resp = svc.generate("hi", timeout=120, keep_alive="24h")

    payload = mock_post.call_args.kwargs["json"]
    assert payload["model"] == "qwen2.5:7b-instruct"
    assert payload["keep_alive"] == "5m"
    assert payload["options"]["num_ctx"] == 4096
    assert mock_post.call_args.kwargs["timeout"] == 45
    assert resp.model == "qwen2.5:7b-instruct"


def test_111_fallback_keeps_light_model_but_caps_timeout(monkeypatch):
    from requests import Timeout
    from services import ollama_service as oss

    monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
    monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
    monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
    svc = oss.OllamaService(host="http://192.168.0.111:11434", model="llama3.2:latest")

    with patch("services.ollama_service.requests.post", side_effect=Timeout):
        resp = svc.generate("hi", timeout=120, keep_alive="24h")

    assert resp.success is False
    assert "timeout (45s)" in resp.error


def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
    from services import ollama_service as oss

    monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "llama3.2:latest")
    monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
    monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 45)
    monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
    monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("hermes3:*",))

    fake_resp = MagicMock(status_code=200)
    fake_resp.json.return_value = {
        "response": "ok",
        "prompt_eval_count": 3,
        "eval_count": 2,
        "total_duration": 1_000_000_000,
    }
    svc = oss.OllamaService(host="http://192.168.0.111:11434", model="hermes3:latest")

    with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
        resp = svc.generate("hi", timeout=120, keep_alive="24h")

    payload = mock_post.call_args.kwargs["json"]
    assert payload["model"] == "llama3.2:latest"
    assert payload["keep_alive"] == "5m"
    assert payload["options"]["num_ctx"] == 4096
    assert resp.model == "llama3.2:latest"