ewoooc/tests/test_nemotron_qwen3_compat.py

"""
test_nemotron_qwen3_compat.py
─────────────────────────────────────────────────────────────────
Operation Ollama-First v5.0 / Phase 3 / A9 — Nemotron qwen3 切換相容性測試

驗證面：
  T1. qwen3 chat 回應 OpenAI tool_calls 結構 → _parse_tool_calls_struct 正確
  T2. qwen3 沒回 tool_calls → _parse_content_fallback 正確（與 NIM 同等容錯）
  T3. qwen3 同時回 tool_calls + content → 優先採用 tool_calls
  T4. qwen3 連線失敗 → 不丟例外給上游，自動 fallback NIM 路徑
  T5. qwen3 + NIM 都失敗 → ADR-004 走 Hermes 規則引擎降級（含「🟡 [規則引擎]」標記）
  T6. NEMOTRON_OLLAMA_FIRST=false → 緊急退回 NIM-first，不呼叫 qwen3

紀律：
  - 所有 HTTP 互動 mock，不實際呼叫 GCP Ollama 或 NIM
  - 與 test_nemotron_fallback 共存，使用同款 FakeThreat
  - assert log_ai_call 路徑可被 monkeypatch 旁路（不污染 ai_calls 表）
"""
from dataclasses import dataclass
from contextlib import contextmanager
import pytest


# ─────────────────────────────────────────────────────────────
# Fixtures
# ─────────────────────────────────────────────────────────────
@dataclass
class FakeThreat:
    sku: str = "SKU-Q1"
    name: str = "qwen3 測試品"
    momo_price: float = 1200.0
    pchome_price: float = 980.0
    gap_pct: float = 22.4
    sales_7d_delta_pct: float = -35.0
    risk: str = "HIGH"
    recommended_action: str = "建議跟進降價"
    confidence: float = 0.85
    sales_7d_curr_amount: float = 78000.0
    sales_7d_prev_amount: float = 120000.0


class _FakeResp:
    def __init__(self, payload: dict, status: int = 200):
        self._payload = payload
        self.status_code = status

    def raise_for_status(self):
        if self.status_code >= 400:
            import requests
            raise requests.HTTPError(f"HTTP {self.status_code}", response=self)

    def json(self):
        return self._payload


@contextmanager
def _noop_log_ai_call(*args, **kwargs):
    """Mock log_ai_call context manager — 不寫 ai_calls 表，回傳具備所需 setter 的 stub"""
    class _Ctx:
        def set_tokens(self, **_kw): pass
        def set_provider(self, *_a, **_kw): pass
        def set_error(self, *_a, **_kw): pass
        def fallback_to_caller(self, *_a, **_kw): pass
        def set_cache_hit(self, *_a, **_kw): pass
        def add_meta(self, *_a, **_kw): pass
    yield _Ctx()


@pytest.fixture(autouse=True)
def _reset_global_state():
    """test 互相污染防線：每個 test 前後清 _ALERT_CACHE + ollama unhealthy marks。

    根因：dispatch() line 1328 _is_duplicate_alert 用 module-level _ALERT_CACHE，
    第一個 test 跑完留 "SKU-Q1" 在 cache，後續 test 命中去重 → dispatched=0 失敗。
    """
    import services.nemoton_dispatcher_service as _nem
    import services.ollama_service as _oss
    _nem._ALERT_CACHE.clear()
    _oss._unhealthy_marks.clear()
    _oss._resolved_host_cache['host'] = None
    _oss._resolved_host_cache['ts'] = 0
    yield
    _nem._ALERT_CACHE.clear()
    _oss._unhealthy_marks.clear()
    _oss._resolved_host_cache['host'] = None
    _oss._resolved_host_cache['ts'] = 0


def _patch_execution_methods(monkeypatch, dispatcher):
    """攔截實際 Telegram/DB 寫入，記錄被呼叫的 tool 名稱與 args（與 fallback test 共用 pattern）"""
    calls = []

    def record(kind):
        def _inner(*args, **kwargs):
            calls.append({"kind": kind, "args": args, "kwargs": kwargs})
        return _inner

    monkeypatch.setattr(dispatcher, "_exec_trigger_price_alert", record("price_alert"))
    monkeypatch.setattr(dispatcher, "_exec_add_to_recommendation", record("recommendation"))
    monkeypatch.setattr(dispatcher, "_exec_flag_for_human_review", record("human_review"))
    return calls


def _enable_qwen3_path(monkeypatch, module):
    """打開 NEMOTRON_OLLAMA_FIRST + 旁路 mcp/log_ai_call/resolve_host 等副作用"""
    monkeypatch.setattr(module, "NEMOTRON_OLLAMA_FIRST", True)
    monkeypatch.setattr(module, "log_ai_call", _noop_log_ai_call)
    monkeypatch.setattr(module, "build_mcp_context", lambda: "MCP-MOCK")
    # 確保即使未被呼叫，import 路徑可解析
    import services.ollama_service as ollama_module
    monkeypatch.setattr(ollama_module, "resolve_ollama_host", lambda: "http://34.87.90.216:11434")
    monkeypatch.setattr(ollama_module, "mark_unhealthy", lambda *a, **kw: None)


# ─────────────────────────────────────────────────────────────
# T1. qwen3 OpenAI tool_calls 結構 → 正確解析
# ─────────────────────────────────────────────────────────────
def test_qwen3_tool_calls_struct_parsed_and_executed(monkeypatch):
    """qwen3 回標準 OpenAI tool_calls 結構，dispatcher 應跳過 NIM 直接走工具執行"""
    import services.nemoton_dispatcher_service as module

    _enable_qwen3_path(monkeypatch, module)

    # mock GCP Ollama /api/chat 回 OpenAI 兼容結構
    fake_body = {
        "message": {
            "role": "assistant",
            "content": "",
            "tool_calls": [
                {
                    "function": {
                        "name": "trigger_price_alert",
                        "arguments": {
                            "sku": "SKU-Q1",
                            "name": "qwen3 測試品",
                            "gap_pct": 22.4,
                            "sales_delta": -35.0,
                            "action": "跟進降價至 $980",
                            "confidence": 0.85,
                        },
                    }
                }
            ],
        },
        "prompt_eval_count": 320,
        "eval_count": 64,
        "done": True,
    }
    monkeypatch.setattr(
        module.requests, "post", lambda *a, **kw: _FakeResp(fake_body)
    )

    dispatcher = module.NemotronDispatcher()
    calls = _patch_execution_methods(monkeypatch, dispatcher)
    # NIM 路徑必須完全沒被觸發（驗證 qwen3 確實是主路徑）
    nim_called = {"v": False}

    def _nim_should_not_be_called(*a, **kw):
        nim_called["v"] = True
        raise AssertionError("NIM 不應被呼叫，qwen3 已成功")

    monkeypatch.setattr(dispatcher, "_call_nim", _nim_should_not_be_called)

    threats = [FakeThreat()]
    result = dispatcher.dispatch(threats, hermes_stats={"duration_sec": 1.0})

    assert nim_called["v"] is False, "qwen3 成功時 NIM 不可被觸發"
    assert result["dispatched"] == 1
    assert result["nim_stats"].get("provider") == "gcp_ollama"
    assert result["nim_stats"].get("model") == module.NEMOTRON_OLLAMA_MODEL
    assert calls and calls[0]["kind"] == "price_alert"


def test_qwen3_retries_secondary_when_primary_chat_fails(monkeypatch):
    """qwen3 chat 第一台失敗時，應標 unhealthy 並在同一請求嘗試第二台。"""
    import requests
    import services.nemoton_dispatcher_service as module
    import services.ollama_service as ollama_module

    _enable_qwen3_path(monkeypatch, module)
    hosts = iter([
        "http://34.87.90.216:11434",
        "http://34.21.145.224:11434",
    ])
    marked = []
    monkeypatch.setattr(ollama_module, "resolve_ollama_host", lambda: next(hosts))
    monkeypatch.setattr(ollama_module, "mark_unhealthy", lambda host: marked.append(host))

    fake_body = {
        "message": {
            "role": "assistant",
            "content": "",
            "tool_calls": [{
                "function": {
                    "name": "trigger_price_alert",
                    "arguments": {
                        "sku": "SKU-Q1",
                        "name": "qwen3 測試品",
                        "gap_pct": 22.4,
                        "sales_delta": -35.0,
                        "action": "跟進降價",
                        "confidence": 0.85,
                    },
                }
            }],
        },
        "prompt_eval_count": 200,
        "eval_count": 40,
    }
    responses = [requests.Timeout("primary down"), _FakeResp(fake_body)]

    def fake_post(*args, **kwargs):
        result = responses.pop(0)
        if isinstance(result, Exception):
            raise result
        return result

    monkeypatch.setattr(module.requests, "post", fake_post)

    dispatcher = module.NemotronDispatcher()
    calls = _patch_execution_methods(monkeypatch, dispatcher)
    result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})

    assert marked == ["http://34.87.90.216:11434"]
    assert result["nim_stats"].get("host") == "http://34.21.145.224:11434"
    assert result["dispatched"] == 1
    assert calls[0]["kind"] == "price_alert"


# ─────────────────────────────────────────────────────────────
# T2. qwen3 沒回 tool_calls 但 content 含 JSON list → fallback 解析
# ─────────────────────────────────────────────────────────────
def test_qwen3_content_only_fallback_parsing(monkeypatch):
    """qwen3 把工具呼叫塞在 content（list[dict]）→ _parse_content_fallback 應接住"""
    import services.nemoton_dispatcher_service as module

    _enable_qwen3_path(monkeypatch, module)

    content_payload = (
        '[{"name": "flag_for_human_review", '
        '"parameters": {"sku": "SKU-Q1", "name": "qwen3 測試品", '
        '"concern": "信心不足", "confidence": 0.45}}]'
    )
    fake_body = {
        "message": {"role": "assistant", "content": content_payload, "tool_calls": []},
        "prompt_eval_count": 100,
        "eval_count": 30,
    }
    monkeypatch.setattr(module.requests, "post", lambda *a, **kw: _FakeResp(fake_body))

    dispatcher = module.NemotronDispatcher()
    calls = _patch_execution_methods(monkeypatch, dispatcher)
    monkeypatch.setattr(
        dispatcher, "_call_nim",
        lambda threats: (_ for _ in ()).throw(AssertionError("NIM 不應被呼叫")),
    )

    result = dispatcher.dispatch([FakeThreat(confidence=0.45)], hermes_stats={"duration_sec": 1.0})

    assert result["dispatched"] == 1
    assert calls and calls[0]["kind"] == "human_review"


# ─────────────────────────────────────────────────────────────
# T3. tool_calls + content 同時存在 → 優先 tool_calls
# ─────────────────────────────────────────────────────────────
def test_qwen3_tool_calls_takes_precedence_over_content(monkeypatch):
    import services.nemoton_dispatcher_service as module

    _enable_qwen3_path(monkeypatch, module)

    fake_body = {
        "message": {
            "role": "assistant",
            "content": '[{"name": "flag_for_human_review", "parameters": {"sku": "X"}}]',
            "tool_calls": [
                {
                    "function": {
                        "name": "trigger_price_alert",
                        "arguments": {
                            "sku": "SKU-Q1",
                            "name": "qwen3 測試品",
                            "gap_pct": 22.4,
                            "sales_delta": -35.0,
                            "action": "降價",
                            "confidence": 0.85,
                        },
                    }
                }
            ],
        },
        "prompt_eval_count": 200,
        "eval_count": 40,
    }
    monkeypatch.setattr(module.requests, "post", lambda *a, **kw: _FakeResp(fake_body))

    dispatcher = module.NemotronDispatcher()
    calls = _patch_execution_methods(monkeypatch, dispatcher)
    result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})

    assert result["dispatched"] == 1
    assert calls[0]["kind"] == "price_alert", "tool_calls 結構必須優先於 content fallback"


# ─────────────────────────────────────────────────────────────
# T4. qwen3 連線失敗 → 不爆，自動 fallback 到 NIM
# ─────────────────────────────────────────────────────────────
def test_qwen3_connection_error_falls_back_to_nim(monkeypatch):
    """GCP Ollama 連不上時，dispatcher 應靜默改走 NIM，最終仍能 dispatch"""
    import requests
    import services.nemoton_dispatcher_service as module

    _enable_qwen3_path(monkeypatch, module)

    def _boom(*a, **kw):
        raise requests.ConnectionError("GCP unreachable")

    monkeypatch.setattr(module.requests, "post", _boom)

    # NIM 路徑：給 valid key + quota，且 mock _call_nim 回傳 1 個 tool_call
    monkeypatch.setattr(module, "NIM_API_KEY", "fake-key")
    monkeypatch.setattr(module, "_check_nim_quota", lambda: True)

    dispatcher = module.NemotronDispatcher()
    calls = _patch_execution_methods(monkeypatch, dispatcher)
    nim_invoked = {"v": False}

    def _fake_nim(threats):
        nim_invoked["v"] = True
        return (
            [{
                "tool": "trigger_price_alert",
                "args": {
                    "sku": "SKU-Q1", "name": "qwen3 測試品",
                    "gap_pct": 22.4, "sales_delta": -35.0,
                    "action": "降價", "confidence": 0.85,
                },
            }],
            {"total_tokens": 256, "quota_used": 5},
        )

    monkeypatch.setattr(dispatcher, "_call_nim", _fake_nim)

    result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})

    assert nim_invoked["v"] is True, "qwen3 失敗後必須 fallback 至 NIM"
    assert result["dispatched"] == 1
    assert result["nim_stats"].get("total_tokens") == 256
    assert calls[0]["kind"] == "price_alert"


# ─────────────────────────────────────────────────────────────
# T5. qwen3 + NIM 全失敗 → ADR-004 Hermes 規則引擎兜底
# ─────────────────────────────────────────────────────────────
def test_qwen3_and_nim_both_fail_falls_back_to_hermes_rules(monkeypatch):
    """雙路全爆時必須走 Hermes 規則引擎，並保留 🟡 [規則引擎] 標記"""
    import requests
    import services.nemoton_dispatcher_service as module

    _enable_qwen3_path(monkeypatch, module)
    monkeypatch.setattr(module.requests, "post",
                        lambda *a, **kw: (_ for _ in ()).throw(requests.ConnectionError("qwen3 down")))

    monkeypatch.setattr(module, "NIM_API_KEY", "fake-key")
    monkeypatch.setattr(module, "_check_nim_quota", lambda: True)

    dispatcher = module.NemotronDispatcher()

    # 攔 _call_nim 也擲 timeout
    monkeypatch.setattr(
        dispatcher, "_call_nim",
        lambda threats: (_ for _ in ()).throw(requests.Timeout("NIM timeout")),
    )

    # 攔住規則引擎內部呼叫的 _exec_*，記錄 concern / reason 文字驗證 🟡 標記
    # 規則引擎部分 _exec_* 用 positional args（line 787-795 _exec_trigger_price_alert
    # 簽名: sku, name, gap_pct, sales_delta, action, confidence, ...），
    # record helper 必須把 positional 與 keyword 合併才能 .get('action')。
    captured = []

    def _merge_positional(name_order, args, kwargs):
        merged = dict(kwargs)
        for i, val in enumerate(args):
            if i < len(name_order):
                merged.setdefault(name_order[i], val)
        return merged

    def record_review(*args, **kwargs):
        merged = _merge_positional(
            ['sku', 'name', 'concern', 'confidence', 'footprint',
             'momo_price', 'comp_price', 'gap_pct', 'sales_delta',
             'revenue_loss_7d', 'recommended_price'],
            args, kwargs)
        captured.append(("human_review", merged))

    def record_alert(*args, **kwargs):
        merged = _merge_positional(
            ['sku', 'name', 'gap_pct', 'sales_delta', 'action', 'confidence',
             'momo_price', 'comp_price', 'footprint',
             'revenue_loss_7d', 'recommended_price'],
            args, kwargs)
        captured.append(("price_alert", merged))

    def record_reco(*args, **kwargs):
        captured.append(("recommendation", kwargs))

    monkeypatch.setattr(dispatcher, "_exec_flag_for_human_review", record_review)
    monkeypatch.setattr(dispatcher, "_exec_trigger_price_alert", record_alert)
    monkeypatch.setattr(dispatcher, "_exec_add_to_recommendation", record_reco)

    # gap_pct=22.4 + risk=HIGH → 規則 2：trigger_price_alert，action 應有 🟡 [規則引擎] 前綴
    result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})

    assert result["nim_stats"].get("degraded") is True, "ADR-004 降級旗標必須存在"
    assert captured, "規則引擎必須兜底執行至少一次"
    kind, kwargs = captured[0]
    assert kind == "price_alert"
    assert "🟡 [規則引擎]" in kwargs.get("action", ""), \
        "ADR-004 鐵律：Hermes 規則引擎兜底時必須帶『🟡 [規則引擎]』標記"
    # footprint 也應帶 🟡 [降級模式 ADR-004] 標記（給 Telegram 告警頭顯示）
    assert "🟡 [降級模式 ADR-004]" in kwargs.get("footprint", "")


# ─────────────────────────────────────────────────────────────
# T6. feature flag 顯式 false → 緊急退路，qwen3 完全不被呼叫
# ─────────────────────────────────────────────────────────────
def test_flag_false_preserves_nim_first_emergency_path(monkeypatch):
    """NEMOTRON_OLLAMA_FIRST=false 時：dispatch 不應觸碰 GCP Ollama，
    nim_stats 不可帶 provider='gcp_ollama'。"""
    import services.nemoton_dispatcher_service as module

    # 不打開 flag（預設值），但安全起見明確 set false
    monkeypatch.setattr(module, "NEMOTRON_OLLAMA_FIRST", False)

    # 任何呼叫 requests.post 都視為錯誤（戰前 NIM 路徑會被 _call_nim mock 接走）
    qwen3_post_called = {"v": False}

    def _maybe_post(*a, **kw):
        qwen3_post_called["v"] = True
        raise AssertionError("flag=false 時不可呼叫 GCP Ollama HTTP")

    monkeypatch.setattr(module.requests, "post", _maybe_post)

    monkeypatch.setattr(module, "NIM_API_KEY", "fake-key")
    monkeypatch.setattr(module, "_check_nim_quota", lambda: True)

    dispatcher = module.NemotronDispatcher()
    calls = _patch_execution_methods(monkeypatch, dispatcher)
    monkeypatch.setattr(
        dispatcher, "_call_nim",
        lambda threats: (
            [{
                "tool": "trigger_price_alert",
                "args": {
                    "sku": "SKU-Q1", "name": "qwen3 測試品",
                    "gap_pct": 22.4, "sales_delta": -35.0,
                    "action": "降價", "confidence": 0.85,
                },
            }],
            {"total_tokens": 100, "quota_used": 1},
        ),
    )

    result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})

    assert qwen3_post_called["v"] is False
    assert result["dispatched"] == 1
    assert result["nim_stats"].get("provider") in (None, "nim"), \
        "flag=false 時 nim_stats 不應帶 provider='gcp_ollama'"


# ─────────────────────────────────────────────────────────────
# T7. 共用 helper 純單元測試（OpenAI tool_calls schema 邊界）
# ─────────────────────────────────────────────────────────────
def test_parse_tool_calls_struct_handles_string_arguments():
    """NIM 回 arguments 是 JSON 字串、qwen3 回 dict — 兩者都得接住"""
    from services.nemoton_dispatcher_service import _parse_tool_calls_struct

    # NIM 風格（arguments 是 JSON 字串）
    nim_style = [{"function": {"name": "foo", "arguments": '{"a": 1, "b": "x"}'}}]
    out_nim = _parse_tool_calls_struct(nim_style)
    assert out_nim == [{"tool": "foo", "args": {"a": 1, "b": "x"}}]

    # qwen3/Ollama 風格（arguments 已是 dict）
    qwen_style = [{"function": {"name": "bar", "arguments": {"a": 2}}}]
    out_qwen = _parse_tool_calls_struct(qwen_style)
    assert out_qwen == [{"tool": "bar", "args": {"a": 2}}]

    # 邊界：空 / 壞 JSON / 缺 name → 不爆，回空或忽略
    assert _parse_tool_calls_struct([]) == []
    assert _parse_tool_calls_struct(None) == []
    bad = [{"function": {"name": "baz", "arguments": "{not json"}}]
    out_bad = _parse_tool_calls_struct(bad)
    assert out_bad == [{"tool": "baz", "args": {}}]
    no_name = [{"function": {"arguments": "{}"}}]
    assert _parse_tool_calls_struct(no_name) == []


def test_parse_content_fallback_handles_various_shapes():
    from services.nemoton_dispatcher_service import _parse_content_fallback

    # OpenAI 老風格 [{"name", "parameters"}]
    out1 = _parse_content_fallback('[{"name": "foo", "parameters": {"a": 1}}]')
    assert out1 == [{"tool": "foo", "args": {"a": 1}}]

    # 帶 function 嵌套
    out2 = _parse_content_fallback('[{"function": {"name": "bar"}, "arguments": "{\\"b\\": 2}"}]')
    assert out2 == [{"tool": "bar", "args": {"b": 2}}]

    # 非 list / 非 JSON / 空字串 → []
    assert _parse_content_fallback("") == []
    assert _parse_content_fallback("not json") == []
    assert _parse_content_fallback('{"a":1}') == []