ewoooc/tests/test_ollama_resolve.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
ADR-027 Phase 2 — Ollama host resolve / health probe / unhealthy mark 驗證

涵蓋：
  B3 — _is_reachable HTTP probe（取代純 TCP）
  B4 — mark_unhealthy / _is_unhealthy / cache 失效互動
  B1/B2 — config.get_ollama_host / get_embedding_host / get_hermes_url lazy 行為
"""

import os
import time
import importlib
from unittest.mock import patch, MagicMock

import pytest


# ─────────────────────────────────────────────────────────────────────────────
# 必要：每個 test 前清空 ollama_service 的 module-level cache，避免 cross-test 污染
# ─────────────────────────────────────────────────────────────────────────────
@pytest.fixture(autouse=True)
def _reset_ollama_caches():
    from services import ollama_service as oss
    oss._resolved_host_cache['host'] = None
    oss._resolved_host_cache['ts'] = 0
    oss._unhealthy_marks.clear()
    yield
    oss._resolved_host_cache['host'] = None
    oss._resolved_host_cache['ts'] = 0
    oss._unhealthy_marks.clear()


# ═══════════════════════════════════════════════════════════════════════════
# B3 — HTTP probe 取代 TCP
# ═══════════════════════════════════════════════════════════════════════════

def test_resolve_picks_primary_when_http_200():
    from services import ollama_service as oss
    fake_resp = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp) as mock_get:
        host = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    assert host == 'http://primary.example:11434'
    # 驗證有打 /api/version
    called_url = mock_get.call_args[0][0]
    assert called_url == 'http://primary.example:11434/api/version'


def test_resolve_falls_back_when_http_500():
    """B3 關鍵：純 TCP 通但 HTTP 500（process 卡死）也視為 unreachable"""
    from services import ollama_service as oss
    fake_resp = MagicMock(status_code=500)
    with patch('services.ollama_service.requests.get', return_value=fake_resp):
        host = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    assert host == 'http://fallback.example:11434'


def test_resolve_falls_back_on_request_exception():
    from services import ollama_service as oss
    with patch('services.ollama_service.requests.get', side_effect=Exception('connect timeout')):
        host = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    assert host == 'http://fallback.example:11434'


def test_resolve_uses_primary_proxy_rescue_before_secondary():
    """正式主機直連 GCP-A 不通時，先走 110 primary proxy，再考慮 GCP-B。"""
    from services import ollama_service as oss

    fake_ok = MagicMock(status_code=200)
    seen_urls = []

    def fake_get(url, timeout=None):
        seen_urls.append(url)
        if url == f"{oss.OLLAMA_HOST_PRIMARY}/api/version":
            raise Exception("primary direct timeout")
        if url == f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version":
            return fake_ok
        raise AssertionError(f"should not reach {url}")

    with patch('services.ollama_service.requests.get', side_effect=fake_get):
        host = oss.resolve_ollama_host()

    assert host == oss.OLLAMA_HOST_PRIMARY_PROXY
    assert seen_urls == [
        f"{oss.OLLAMA_HOST_PRIMARY}/api/version",
        f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version",
    ]


def test_resolve_skips_recent_unhealthy_direct_primary_and_uses_proxy(monkeypatch):
    """host_health 已判定 GCP-A direct 不健康時，不再等待 direct timeout。"""
    from datetime import datetime
    from services import ollama_service as oss

    class FakeResult:
        def fetchone(self):
            return (False, "ConnectTimeout", datetime.now())

    class FakeSession:
        def execute(self, *args, **kwargs):
            return FakeResult()

        def close(self):
            pass

    fake_ok = MagicMock(status_code=200)
    seen_urls = []

    def fake_get(url, timeout=None):
        seen_urls.append(url)
        if url == f"{oss.OLLAMA_HOST_PRIMARY}/api/version":
            raise AssertionError("recent unhealthy direct host should be skipped")
        if url == f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version":
            return fake_ok
        raise AssertionError(f"should not reach {url}")

    monkeypatch.setenv("OLLAMA_RESOLVE_HOST_HEALTH_SKIP_ENABLED", "true")
    monkeypatch.setenv("OLLAMA_RESOLVE_HOST_HEALTH_SKIP_WINDOW_MINUTES", "20")
    monkeypatch.setattr("database.manager.get_session", lambda: FakeSession())

    with patch('services.ollama_service.requests.get', side_effect=fake_get):
        host = oss.resolve_ollama_host()

    assert host == oss.OLLAMA_HOST_PRIMARY_PROXY
    assert seen_urls == [f"{oss.OLLAMA_HOST_PRIMARY_PROXY}/api/version"]


def test_recent_direct_host_unhealthy_matches_actual_host_url(monkeypatch):
    """舊 GCP-A 的 unhealthy 紀錄不能誤擋新 GCP-A。"""
    from datetime import datetime
    from services import ollama_service as oss

    seen_params = []

    class FakeResult:
        def __init__(self, row):
            self.row = row

        def fetchone(self):
            return self.row

    class FakeSession:
        def execute(self, _statement, params):
            seen_params.append(dict(params))
            if params.get("host_url") == oss.OLLAMA_HOST_PRIMARY:
                return FakeResult((False, "ConnectTimeout", datetime.now()))
            return FakeResult(None)

        def close(self):
            pass

    monkeypatch.setenv("OLLAMA_RESOLVE_HOST_HEALTH_SKIP_ENABLED", "true")
    monkeypatch.setattr("database.manager.get_session", lambda: FakeSession())

    assert oss._recent_direct_host_unhealthy(oss.OLLAMA_HOST_PRIMARY) is True
    assert seen_params == [
        {"host_label": "Primary (GCP)", "host_url": oss.OLLAMA_HOST_PRIMARY}
    ]


def test_retired_gcp_a_host_is_not_approved(monkeypatch):
    """已退役 GCP-A 不可再被 env 白名單接受。"""
    from services import ollama_service as oss

    monkeypatch.setenv("OLLAMA_HOST_PRIMARY", "http://34.143.170.20:11434")

    assert oss.is_approved_ollama_host("http://34.143.170.20:11434") is False
    assert oss.approved_ollama_env("OLLAMA_HOST_PRIMARY", oss.OLLAMA_HOST_PRIMARY) == oss.OLLAMA_HOST_PRIMARY


# ═══════════════════════════════════════════════════════════════════════════
# B4 — mark_unhealthy 行為
# ═══════════════════════════════════════════════════════════════════════════

def test_mark_unhealthy_triggers_immediate_fallback():
    """B4: primary 標 unhealthy 後，三主機架構下應跳到 secondary（非 fallback）"""
    from services import ollama_service as oss
    oss.mark_unhealthy('http://primary.example:11434')
    # 即使 HTTP 探測會回 200，被標 unhealthy 的主機仍應跳過
    fake_resp = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp):
        host = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    # 三主機架構：primary 不可用 → secondary（非 fallback）
    assert host == 'http://secondary.example:11434'


def test_mark_unhealthy_primary_and_secondary_falls_to_fallback():
    """B4 三主機級聯：primary+secondary 都掛才走 fallback"""
    from services import ollama_service as oss
    oss.mark_unhealthy('http://primary.example:11434')
    oss.mark_unhealthy('http://secondary.example:11434')
    fake_resp = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp):
        host = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    assert host == 'http://fallback.example:11434'


def test_unhealthy_mark_expires_after_ttl():
    from services import ollama_service as oss
    oss.mark_unhealthy('http://primary.example:11434')
    # 偽造時間 31 秒前打的標
    oss._unhealthy_marks['http://primary.example:11434'] = time.time() - 31
    fake_resp = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp):
        host = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    assert host == 'http://primary.example:11434'


def test_mark_unhealthy_invalidates_resolved_cache():
    """B4 重點：被標 unhealthy 後，舊 cache 不能再返回 primary"""
    from services import ollama_service as oss
    # 先讓 primary 被 cache
    fake_resp_ok = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp_ok):
        first = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    assert first == 'http://primary.example:11434'

    # 模擬 generate 失敗，標 unhealthy
    oss.mark_unhealthy('http://primary.example:11434')

    # 即使 cache 還在 TTL 內（120s），下一次 resolve 必須跳過 primary
    # 三主機架構下應跳到 secondary
    fake_resp_again = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp_again):
        second = oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    assert second == 'http://secondary.example:11434'


def test_mark_unhealthy_handles_trailing_slash():
    from services import ollama_service as oss
    oss.mark_unhealthy('http://primary.example:11434/')
    assert oss._is_unhealthy('http://primary.example:11434') is True
    assert oss._is_unhealthy('http://primary.example:11434/') is True


def test_mark_unhealthy_ignores_empty():
    from services import ollama_service as oss
    oss.mark_unhealthy('')
    oss.mark_unhealthy(None)
    assert len(oss._unhealthy_marks) == 0


# ═══════════════════════════════════════════════════════════════════════════
# B1/B2 — config lazy getters
# ═══════════════════════════════════════════════════════════════════════════

def test_get_ollama_host_uses_approved_env_when_set(monkeypatch):
    monkeypatch.setenv('OLLAMA_HOST', 'http://34.21.145.224:11434')
    import config
    importlib.reload(config)  # 確保 env 變更生效
    assert config.get_ollama_host() == 'http://34.21.145.224:11434'


def test_get_ollama_host_rejects_unapproved_env(monkeypatch):
    monkeypatch.setenv('OLLAMA_HOST', 'http://192.168.0.188:11434')
    fake_resp = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp):
        import config
        importlib.reload(config)
        host = config.get_ollama_host()
    assert host == 'http://34.87.90.216:11434'


def test_get_ollama_host_falls_back_to_resolve_without_env(monkeypatch):
    monkeypatch.delenv('OLLAMA_HOST', raising=False)
    fake_resp = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp):
        # reload to ensure env is honored
        import config
        importlib.reload(config)
        host = config.get_ollama_host()
    # primary URL 由 env OLLAMA_HOST_PRIMARY 控制（預設 GCP-SSD 34.87.90.216）
    assert host.startswith('http://')


def test_config_ollama_compat_constants_do_not_probe_network(monkeypatch):
    monkeypatch.setenv('OLLAMA_HOST', 'http://192.168.0.188:11434')
    monkeypatch.setenv('HERMES_URL', 'http://192.168.0.188:11434')
    monkeypatch.setenv('EMBEDDING_HOST', 'http://192.168.0.188:11434')
    monkeypatch.setenv('OLLAMA_HOST_PRIMARY', 'http://34.87.90.216:11434')
    with patch('services.ollama_service.requests.get') as mock_get:
        import config
        importlib.reload(config)

    mock_get.assert_not_called()
    assert config.OLLAMA_HOST == 'http://34.87.90.216:11434'
    assert config.HERMES_URL == 'http://34.87.90.216:11434'
    assert config.EMBEDDING_HOST == 'http://34.87.90.216:11434'


def test_get_embedding_host_prefers_env(monkeypatch):
    monkeypatch.setenv('EMBEDDING_HOST', 'http://192.168.0.111:11434')
    import config
    importlib.reload(config)
    assert config.get_embedding_host() == 'http://192.168.0.111:11434'


def test_get_hermes_url_prefers_env(monkeypatch):
    monkeypatch.setenv('HERMES_URL', 'http://34.87.90.216:11434')
    import config
    importlib.reload(config)
    assert config.get_hermes_url() == 'http://34.87.90.216:11434'


# ═══════════════════════════════════════════════════════════════════════════
# Cache TTL 行為
# ═══════════════════════════════════════════════════════════════════════════

def test_resolve_uses_cache_within_ttl():
    """同一 TTL 內第二次 resolve 不應再呼叫 HTTP"""
    from services import ollama_service as oss
    fake_resp = MagicMock(status_code=200)
    with patch('services.ollama_service.requests.get', return_value=fake_resp) as mock_get:
        oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
        oss.resolve_ollama_host(
            primary='http://primary.example:11434',
            secondary='http://secondary.example:11434',
            fallback='http://fallback.example:11434',
        )
    # cache 命中 → 第二次不打 HTTP
    assert mock_get.call_count == 1


def test_111_fallback_downgrades_heavy_model_and_shortens_keep_alive(monkeypatch):
    """111 是 final fallback；14B+ 模型不得長駐這台 16GB Mac。"""
    from services import ollama_service as oss

    monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "qwen2.5:7b-instruct")
    monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
    monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
    monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
    monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
    monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("qwen3:14b",))

    fake_resp = MagicMock(status_code=200)
    fake_resp.json.return_value = {
        "response": "ok",
        "prompt_eval_count": 3,
        "eval_count": 2,
        "total_duration": 1_000_000_000,
    }
    svc = oss.OllamaService(host="http://192.168.0.111:11434", model="qwen3:14b")

    with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
        resp = svc.generate(
            "hi",
            timeout=120,
            keep_alive="24h",
            options={"num_ctx": 131072, "num_predict": 4096},
        )

    payload = mock_post.call_args.kwargs["json"]
    assert payload["model"] == "qwen2.5:7b-instruct"
    assert payload["keep_alive"] == "5m"
    assert payload["options"]["num_ctx"] == 4096
    assert payload["options"]["num_predict"] == 512
    assert mock_post.call_args.kwargs["timeout"] == 20
    assert resp.model == "qwen2.5:7b-instruct"


def test_111_fallback_keeps_light_model_but_caps_timeout(monkeypatch):
    from requests import Timeout
    from services import ollama_service as oss

    monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
    monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
    monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
    monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
    svc = oss.OllamaService(host="http://192.168.0.111:11434", model="llama3.2:latest")

    with patch("services.ollama_service.requests.post", side_effect=Timeout):
        resp = svc.generate("hi", timeout=120, keep_alive="24h")

    assert resp.success is False
    assert "timeout (20s)" in resp.error


def test_111_fallback_downgrades_hermes_context_heavy_model(monkeypatch):
    from services import ollama_service as oss

    monkeypatch.setattr(oss, "FALLBACK_111_MODEL", "llama3.2:latest")
    monkeypatch.setattr(oss, "FALLBACK_111_KEEP_ALIVE", "5m")
    monkeypatch.setattr(oss, "FALLBACK_111_MAX_TIMEOUT", 20)
    monkeypatch.setattr(oss, "FALLBACK_111_NUM_CTX", 4096)
    monkeypatch.setattr(oss, "FALLBACK_111_NUM_PREDICT", 512)
    monkeypatch.setattr(oss, "FALLBACK_111_MODEL_PATTERNS", ("hermes3:*",))

    fake_resp = MagicMock(status_code=200)
    fake_resp.json.return_value = {
        "response": "ok",
        "prompt_eval_count": 3,
        "eval_count": 2,
        "total_duration": 1_000_000_000,
    }
    svc = oss.OllamaService(host="http://192.168.0.111:11434", model="hermes3:latest")

    with patch("services.ollama_service.requests.post", return_value=fake_resp) as mock_post:
        resp = svc.generate("hi", timeout=120, keep_alive="24h")

    payload = mock_post.call_args.kwargs["json"]
    assert payload["model"] == "llama3.2:latest"
    assert payload["keep_alive"] == "5m"
    assert payload["options"]["num_ctx"] == 4096
    assert payload["options"]["num_predict"] == 512
    assert resp.model == "llama3.2:latest"