Files
ewoooc/tests/test_nemotron_qwen3_compat.py
OoO ba5fe06b13
Some checks failed
CD Pipeline / deploy (push) Has been cancelled
fix: update ollama primary host
2026-06-18 14:24:55 +08:00

514 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
test_nemotron_qwen3_compat.py
─────────────────────────────────────────────────────────────────
Operation Ollama-First v5.0 / Phase 3 / A9 — Nemotron qwen3 切換相容性測試
驗證面:
T1. qwen3 chat 回應 OpenAI tool_calls 結構 → _parse_tool_calls_struct 正確
T2. qwen3 沒回 tool_calls → _parse_content_fallback 正確(與 NIM 同等容錯)
T3. qwen3 同時回 tool_calls + content → 優先採用 tool_calls
T4. qwen3 連線失敗 → 不丟例外給上游,自動 fallback NIM 路徑
T5. qwen3 + NIM 都失敗 → ADR-004 走 Hermes 規則引擎降級(含「🟡 [規則引擎]」標記)
T6. NEMOTRON_OLLAMA_FIRST=false → 緊急退回 NIM-first不呼叫 qwen3
紀律:
- 所有 HTTP 互動 mock不實際呼叫 GCP Ollama 或 NIM
- 與 test_nemotron_fallback 共存,使用同款 FakeThreat
- assert log_ai_call 路徑可被 monkeypatch 旁路(不污染 ai_calls 表)
"""
from dataclasses import dataclass
from contextlib import contextmanager
import pytest
# ─────────────────────────────────────────────────────────────
# Fixtures
# ─────────────────────────────────────────────────────────────
@dataclass
class FakeThreat:
sku: str = "SKU-Q1"
name: str = "qwen3 測試品"
momo_price: float = 1200.0
pchome_price: float = 980.0
gap_pct: float = 22.4
sales_7d_delta_pct: float = -35.0
risk: str = "HIGH"
recommended_action: str = "建議跟進降價"
confidence: float = 0.85
sales_7d_curr_amount: float = 78000.0
sales_7d_prev_amount: float = 120000.0
class _FakeResp:
def __init__(self, payload: dict, status: int = 200):
self._payload = payload
self.status_code = status
def raise_for_status(self):
if self.status_code >= 400:
import requests
raise requests.HTTPError(f"HTTP {self.status_code}", response=self)
def json(self):
return self._payload
@contextmanager
def _noop_log_ai_call(*args, **kwargs):
"""Mock log_ai_call context manager — 不寫 ai_calls 表,回傳具備所需 setter 的 stub"""
class _Ctx:
def set_tokens(self, **_kw): pass
def set_provider(self, *_a, **_kw): pass
def set_error(self, *_a, **_kw): pass
def fallback_to_caller(self, *_a, **_kw): pass
def set_cache_hit(self, *_a, **_kw): pass
def add_meta(self, *_a, **_kw): pass
yield _Ctx()
@pytest.fixture(autouse=True)
def _reset_global_state():
"""test 互相污染防線:每個 test 前後清 _ALERT_CACHE + ollama unhealthy marks。
根因dispatch() line 1328 _is_duplicate_alert 用 module-level _ALERT_CACHE
第一個 test 跑完留 "SKU-Q1" 在 cache後續 test 命中去重 → dispatched=0 失敗。
"""
import services.nemoton_dispatcher_service as _nem
import services.ollama_service as _oss
_nem._ALERT_CACHE.clear()
_oss._unhealthy_marks.clear()
_oss._resolved_host_cache['host'] = None
_oss._resolved_host_cache['ts'] = 0
yield
_nem._ALERT_CACHE.clear()
_oss._unhealthy_marks.clear()
_oss._resolved_host_cache['host'] = None
_oss._resolved_host_cache['ts'] = 0
def _patch_execution_methods(monkeypatch, dispatcher):
"""攔截實際 Telegram/DB 寫入,記錄被呼叫的 tool 名稱與 args與 fallback test 共用 pattern"""
calls = []
def record(kind):
def _inner(*args, **kwargs):
calls.append({"kind": kind, "args": args, "kwargs": kwargs})
return _inner
monkeypatch.setattr(dispatcher, "_exec_trigger_price_alert", record("price_alert"))
monkeypatch.setattr(dispatcher, "_exec_add_to_recommendation", record("recommendation"))
monkeypatch.setattr(dispatcher, "_exec_flag_for_human_review", record("human_review"))
return calls
def _enable_qwen3_path(monkeypatch, module):
"""打開 NEMOTRON_OLLAMA_FIRST + 旁路 mcp/log_ai_call/resolve_host 等副作用"""
monkeypatch.setattr(module, "NEMOTRON_OLLAMA_FIRST", True)
monkeypatch.setattr(module, "log_ai_call", _noop_log_ai_call)
monkeypatch.setattr(module, "build_mcp_context", lambda: "MCP-MOCK")
# 確保即使未被呼叫import 路徑可解析
import services.ollama_service as ollama_module
monkeypatch.setattr(ollama_module, "resolve_ollama_host", lambda: "http://34.87.90.216:11434")
monkeypatch.setattr(ollama_module, "mark_unhealthy", lambda *a, **kw: None)
# ─────────────────────────────────────────────────────────────
# T1. qwen3 OpenAI tool_calls 結構 → 正確解析
# ─────────────────────────────────────────────────────────────
def test_qwen3_tool_calls_struct_parsed_and_executed(monkeypatch):
"""qwen3 回標準 OpenAI tool_calls 結構dispatcher 應跳過 NIM 直接走工具執行"""
import services.nemoton_dispatcher_service as module
_enable_qwen3_path(monkeypatch, module)
# mock GCP Ollama /api/chat 回 OpenAI 兼容結構
fake_body = {
"message": {
"role": "assistant",
"content": "",
"tool_calls": [
{
"function": {
"name": "trigger_price_alert",
"arguments": {
"sku": "SKU-Q1",
"name": "qwen3 測試品",
"gap_pct": 22.4,
"sales_delta": -35.0,
"action": "跟進降價至 $980",
"confidence": 0.85,
},
}
}
],
},
"prompt_eval_count": 320,
"eval_count": 64,
"done": True,
}
monkeypatch.setattr(
module.requests, "post", lambda *a, **kw: _FakeResp(fake_body)
)
dispatcher = module.NemotronDispatcher()
calls = _patch_execution_methods(monkeypatch, dispatcher)
# NIM 路徑必須完全沒被觸發(驗證 qwen3 確實是主路徑)
nim_called = {"v": False}
def _nim_should_not_be_called(*a, **kw):
nim_called["v"] = True
raise AssertionError("NIM 不應被呼叫qwen3 已成功")
monkeypatch.setattr(dispatcher, "_call_nim", _nim_should_not_be_called)
threats = [FakeThreat()]
result = dispatcher.dispatch(threats, hermes_stats={"duration_sec": 1.0})
assert nim_called["v"] is False, "qwen3 成功時 NIM 不可被觸發"
assert result["dispatched"] == 1
assert result["nim_stats"].get("provider") == "gcp_ollama"
assert result["nim_stats"].get("model") == module.NEMOTRON_OLLAMA_MODEL
assert calls and calls[0]["kind"] == "price_alert"
def test_qwen3_retries_secondary_when_primary_chat_fails(monkeypatch):
"""qwen3 chat 第一台失敗時,應標 unhealthy 並在同一請求嘗試第二台。"""
import requests
import services.nemoton_dispatcher_service as module
import services.ollama_service as ollama_module
_enable_qwen3_path(monkeypatch, module)
hosts = iter([
"http://34.87.90.216:11434",
"http://34.21.145.224:11434",
])
marked = []
monkeypatch.setattr(ollama_module, "resolve_ollama_host", lambda: next(hosts))
monkeypatch.setattr(ollama_module, "mark_unhealthy", lambda host: marked.append(host))
fake_body = {
"message": {
"role": "assistant",
"content": "",
"tool_calls": [{
"function": {
"name": "trigger_price_alert",
"arguments": {
"sku": "SKU-Q1",
"name": "qwen3 測試品",
"gap_pct": 22.4,
"sales_delta": -35.0,
"action": "跟進降價",
"confidence": 0.85,
},
}
}],
},
"prompt_eval_count": 200,
"eval_count": 40,
}
responses = [requests.Timeout("primary down"), _FakeResp(fake_body)]
def fake_post(*args, **kwargs):
result = responses.pop(0)
if isinstance(result, Exception):
raise result
return result
monkeypatch.setattr(module.requests, "post", fake_post)
dispatcher = module.NemotronDispatcher()
calls = _patch_execution_methods(monkeypatch, dispatcher)
result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})
assert marked == ["http://34.87.90.216:11434"]
assert result["nim_stats"].get("host") == "http://34.21.145.224:11434"
assert result["dispatched"] == 1
assert calls[0]["kind"] == "price_alert"
# ─────────────────────────────────────────────────────────────
# T2. qwen3 沒回 tool_calls 但 content 含 JSON list → fallback 解析
# ─────────────────────────────────────────────────────────────
def test_qwen3_content_only_fallback_parsing(monkeypatch):
"""qwen3 把工具呼叫塞在 contentlist[dict])→ _parse_content_fallback 應接住"""
import services.nemoton_dispatcher_service as module
_enable_qwen3_path(monkeypatch, module)
content_payload = (
'[{"name": "flag_for_human_review", '
'"parameters": {"sku": "SKU-Q1", "name": "qwen3 測試品", '
'"concern": "信心不足", "confidence": 0.45}}]'
)
fake_body = {
"message": {"role": "assistant", "content": content_payload, "tool_calls": []},
"prompt_eval_count": 100,
"eval_count": 30,
}
monkeypatch.setattr(module.requests, "post", lambda *a, **kw: _FakeResp(fake_body))
dispatcher = module.NemotronDispatcher()
calls = _patch_execution_methods(monkeypatch, dispatcher)
monkeypatch.setattr(
dispatcher, "_call_nim",
lambda threats: (_ for _ in ()).throw(AssertionError("NIM 不應被呼叫")),
)
result = dispatcher.dispatch([FakeThreat(confidence=0.45)], hermes_stats={"duration_sec": 1.0})
assert result["dispatched"] == 1
assert calls and calls[0]["kind"] == "human_review"
# ─────────────────────────────────────────────────────────────
# T3. tool_calls + content 同時存在 → 優先 tool_calls
# ─────────────────────────────────────────────────────────────
def test_qwen3_tool_calls_takes_precedence_over_content(monkeypatch):
import services.nemoton_dispatcher_service as module
_enable_qwen3_path(monkeypatch, module)
fake_body = {
"message": {
"role": "assistant",
"content": '[{"name": "flag_for_human_review", "parameters": {"sku": "X"}}]',
"tool_calls": [
{
"function": {
"name": "trigger_price_alert",
"arguments": {
"sku": "SKU-Q1",
"name": "qwen3 測試品",
"gap_pct": 22.4,
"sales_delta": -35.0,
"action": "降價",
"confidence": 0.85,
},
}
}
],
},
"prompt_eval_count": 200,
"eval_count": 40,
}
monkeypatch.setattr(module.requests, "post", lambda *a, **kw: _FakeResp(fake_body))
dispatcher = module.NemotronDispatcher()
calls = _patch_execution_methods(monkeypatch, dispatcher)
result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})
assert result["dispatched"] == 1
assert calls[0]["kind"] == "price_alert", "tool_calls 結構必須優先於 content fallback"
# ─────────────────────────────────────────────────────────────
# T4. qwen3 連線失敗 → 不爆,自動 fallback 到 NIM
# ─────────────────────────────────────────────────────────────
def test_qwen3_connection_error_falls_back_to_nim(monkeypatch):
"""GCP Ollama 連不上時dispatcher 應靜默改走 NIM最終仍能 dispatch"""
import requests
import services.nemoton_dispatcher_service as module
_enable_qwen3_path(monkeypatch, module)
def _boom(*a, **kw):
raise requests.ConnectionError("GCP unreachable")
monkeypatch.setattr(module.requests, "post", _boom)
# NIM 路徑:給 valid key + quota且 mock _call_nim 回傳 1 個 tool_call
monkeypatch.setattr(module, "NIM_API_KEY", "fake-key")
monkeypatch.setattr(module, "_check_nim_quota", lambda: True)
dispatcher = module.NemotronDispatcher()
calls = _patch_execution_methods(monkeypatch, dispatcher)
nim_invoked = {"v": False}
def _fake_nim(threats):
nim_invoked["v"] = True
return (
[{
"tool": "trigger_price_alert",
"args": {
"sku": "SKU-Q1", "name": "qwen3 測試品",
"gap_pct": 22.4, "sales_delta": -35.0,
"action": "降價", "confidence": 0.85,
},
}],
{"total_tokens": 256, "quota_used": 5},
)
monkeypatch.setattr(dispatcher, "_call_nim", _fake_nim)
result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})
assert nim_invoked["v"] is True, "qwen3 失敗後必須 fallback 至 NIM"
assert result["dispatched"] == 1
assert result["nim_stats"].get("total_tokens") == 256
assert calls[0]["kind"] == "price_alert"
# ─────────────────────────────────────────────────────────────
# T5. qwen3 + NIM 全失敗 → ADR-004 Hermes 規則引擎兜底
# ─────────────────────────────────────────────────────────────
def test_qwen3_and_nim_both_fail_falls_back_to_hermes_rules(monkeypatch):
"""雙路全爆時必須走 Hermes 規則引擎,並保留 🟡 [規則引擎] 標記"""
import requests
import services.nemoton_dispatcher_service as module
_enable_qwen3_path(monkeypatch, module)
monkeypatch.setattr(module.requests, "post",
lambda *a, **kw: (_ for _ in ()).throw(requests.ConnectionError("qwen3 down")))
monkeypatch.setattr(module, "NIM_API_KEY", "fake-key")
monkeypatch.setattr(module, "_check_nim_quota", lambda: True)
dispatcher = module.NemotronDispatcher()
# 攔 _call_nim 也擲 timeout
monkeypatch.setattr(
dispatcher, "_call_nim",
lambda threats: (_ for _ in ()).throw(requests.Timeout("NIM timeout")),
)
# 攔住規則引擎內部呼叫的 _exec_*,記錄 concern / reason 文字驗證 🟡 標記
# 規則引擎部分 _exec_* 用 positional argsline 787-795 _exec_trigger_price_alert
# 簽名: sku, name, gap_pct, sales_delta, action, confidence, ...
# record helper 必須把 positional 與 keyword 合併才能 .get('action')。
captured = []
def _merge_positional(name_order, args, kwargs):
merged = dict(kwargs)
for i, val in enumerate(args):
if i < len(name_order):
merged.setdefault(name_order[i], val)
return merged
def record_review(*args, **kwargs):
merged = _merge_positional(
['sku', 'name', 'concern', 'confidence', 'footprint',
'momo_price', 'comp_price', 'gap_pct', 'sales_delta',
'revenue_loss_7d', 'recommended_price'],
args, kwargs)
captured.append(("human_review", merged))
def record_alert(*args, **kwargs):
merged = _merge_positional(
['sku', 'name', 'gap_pct', 'sales_delta', 'action', 'confidence',
'momo_price', 'comp_price', 'footprint',
'revenue_loss_7d', 'recommended_price'],
args, kwargs)
captured.append(("price_alert", merged))
def record_reco(*args, **kwargs):
captured.append(("recommendation", kwargs))
monkeypatch.setattr(dispatcher, "_exec_flag_for_human_review", record_review)
monkeypatch.setattr(dispatcher, "_exec_trigger_price_alert", record_alert)
monkeypatch.setattr(dispatcher, "_exec_add_to_recommendation", record_reco)
# gap_pct=22.4 + risk=HIGH → 規則 2trigger_price_alertaction 應有 🟡 [規則引擎] 前綴
result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})
assert result["nim_stats"].get("degraded") is True, "ADR-004 降級旗標必須存在"
assert captured, "規則引擎必須兜底執行至少一次"
kind, kwargs = captured[0]
assert kind == "price_alert"
assert "🟡 [規則引擎]" in kwargs.get("action", ""), \
"ADR-004 鐵律Hermes 規則引擎兜底時必須帶『🟡 [規則引擎]』標記"
# footprint 也應帶 🟡 [降級模式 ADR-004] 標記(給 Telegram 告警頭顯示)
assert "🟡 [降級模式 ADR-004]" in kwargs.get("footprint", "")
# ─────────────────────────────────────────────────────────────
# T6. feature flag 顯式 false → 緊急退路qwen3 完全不被呼叫
# ─────────────────────────────────────────────────────────────
def test_flag_false_preserves_nim_first_emergency_path(monkeypatch):
"""NEMOTRON_OLLAMA_FIRST=false 時dispatch 不應觸碰 GCP Ollama
nim_stats 不可帶 provider='gcp_ollama'"""
import services.nemoton_dispatcher_service as module
# 不打開 flag預設值但安全起見明確 set false
monkeypatch.setattr(module, "NEMOTRON_OLLAMA_FIRST", False)
# 任何呼叫 requests.post 都視為錯誤(戰前 NIM 路徑會被 _call_nim mock 接走)
qwen3_post_called = {"v": False}
def _maybe_post(*a, **kw):
qwen3_post_called["v"] = True
raise AssertionError("flag=false 時不可呼叫 GCP Ollama HTTP")
monkeypatch.setattr(module.requests, "post", _maybe_post)
monkeypatch.setattr(module, "NIM_API_KEY", "fake-key")
monkeypatch.setattr(module, "_check_nim_quota", lambda: True)
dispatcher = module.NemotronDispatcher()
calls = _patch_execution_methods(monkeypatch, dispatcher)
monkeypatch.setattr(
dispatcher, "_call_nim",
lambda threats: (
[{
"tool": "trigger_price_alert",
"args": {
"sku": "SKU-Q1", "name": "qwen3 測試品",
"gap_pct": 22.4, "sales_delta": -35.0,
"action": "降價", "confidence": 0.85,
},
}],
{"total_tokens": 100, "quota_used": 1},
),
)
result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})
assert qwen3_post_called["v"] is False
assert result["dispatched"] == 1
assert result["nim_stats"].get("provider") in (None, "nim"), \
"flag=false 時 nim_stats 不應帶 provider='gcp_ollama'"
# ─────────────────────────────────────────────────────────────
# T7. 共用 helper 純單元測試OpenAI tool_calls schema 邊界)
# ─────────────────────────────────────────────────────────────
def test_parse_tool_calls_struct_handles_string_arguments():
"""NIM 回 arguments 是 JSON 字串、qwen3 回 dict — 兩者都得接住"""
from services.nemoton_dispatcher_service import _parse_tool_calls_struct
# NIM 風格arguments 是 JSON 字串)
nim_style = [{"function": {"name": "foo", "arguments": '{"a": 1, "b": "x"}'}}]
out_nim = _parse_tool_calls_struct(nim_style)
assert out_nim == [{"tool": "foo", "args": {"a": 1, "b": "x"}}]
# qwen3/Ollama 風格arguments 已是 dict
qwen_style = [{"function": {"name": "bar", "arguments": {"a": 2}}}]
out_qwen = _parse_tool_calls_struct(qwen_style)
assert out_qwen == [{"tool": "bar", "args": {"a": 2}}]
# 邊界:空 / 壞 JSON / 缺 name → 不爆,回空或忽略
assert _parse_tool_calls_struct([]) == []
assert _parse_tool_calls_struct(None) == []
bad = [{"function": {"name": "baz", "arguments": "{not json"}}]
out_bad = _parse_tool_calls_struct(bad)
assert out_bad == [{"tool": "baz", "args": {}}]
no_name = [{"function": {"arguments": "{}"}}]
assert _parse_tool_calls_struct(no_name) == []
def test_parse_content_fallback_handles_various_shapes():
from services.nemoton_dispatcher_service import _parse_content_fallback
# OpenAI 老風格 [{"name", "parameters"}]
out1 = _parse_content_fallback('[{"name": "foo", "parameters": {"a": 1}}]')
assert out1 == [{"tool": "foo", "args": {"a": 1}}]
# 帶 function 嵌套
out2 = _parse_content_fallback('[{"function": {"name": "bar"}, "arguments": "{\\"b\\": 2}"}]')
assert out2 == [{"tool": "bar", "args": {"b": 2}}]
# 非 list / 非 JSON / 空字串 → []
assert _parse_content_fallback("") == []
assert _parse_content_fallback("not json") == []
assert _parse_content_fallback('{"a":1}') == []