fix(ollama): disable thinking for deepseek call sites
This commit is contained in:
@@ -276,6 +276,8 @@ async def process_nl_message(
|
||||
f"{endpoint.url}/api/chat",
|
||||
json={
|
||||
"model": model,
|
||||
# Keep Hermes responses in message.content across Ollama 0.24+.
|
||||
"think": False,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": prompt_with_ctx},
|
||||
|
||||
@@ -720,7 +720,14 @@ async def _call_ollama(prompt: str, ollama_url: str, model: str) -> str | None:
|
||||
try:
|
||||
resp = await client.post(
|
||||
f"{endpoint.url}/api/generate",
|
||||
json={"model": model, "prompt": prompt, "stream": False, "options": {"temperature": 0.1}},
|
||||
json={
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
# Preserve response-body compatibility with Ollama 0.24 thinking models.
|
||||
"think": False,
|
||||
"options": {"temperature": 0.1},
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("response", "")
|
||||
|
||||
@@ -167,6 +167,9 @@ class ChatManager:
|
||||
json={
|
||||
"model": MODEL,
|
||||
"stream": False,
|
||||
# Ollama 0.24 separates deepseek-r1 thinking from final text.
|
||||
# Chat callers expect message.content to contain the answer.
|
||||
"think": False,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_message},
|
||||
|
||||
@@ -678,7 +678,13 @@ async def _nemoclaw_second_opinion(incident: "Incident", primary_result: dict) -
|
||||
try:
|
||||
resp = await client.post(
|
||||
f"{endpoint.url.rstrip('/')}/api/generate",
|
||||
json={"model": model, "prompt": prompt, "stream": False},
|
||||
json={
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
# Ollama 0.24 puts deepseek-r1 output in `thinking` unless disabled.
|
||||
"think": False,
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
@@ -220,6 +220,9 @@ class LogSummaryService:
|
||||
"model": SUMMARY_MODEL,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
# Ollama 0.24 returns deepseek-r1 text in `thinking` by default.
|
||||
# Existing callers read `response`, so force final-answer mode.
|
||||
"think": False,
|
||||
"options": {"temperature": 0.1, "num_predict": 200},
|
||||
},
|
||||
)
|
||||
|
||||
@@ -103,6 +103,7 @@ async def test_nemoclaw_chat_uses_resolved_interactive_lane(
|
||||
url, payload = _FakeAsyncClient.posted[0]
|
||||
assert url == "http://gcp-a:11435/api/chat"
|
||||
assert payload["model"] == "deepseek-r1:14b"
|
||||
assert payload["think"] is False
|
||||
|
||||
|
||||
def test_chat_manager_has_no_direct_gemini_generation_path() -> None:
|
||||
|
||||
@@ -22,6 +22,7 @@ class _FakeResponse:
|
||||
|
||||
class _FakeAsyncClient:
|
||||
posted_urls: list[str] = []
|
||||
posted_payloads: list[dict[str, Any]] = []
|
||||
fail_urls: set[str] = set()
|
||||
response: str = ""
|
||||
|
||||
@@ -37,6 +38,7 @@ class _FakeAsyncClient:
|
||||
|
||||
async def post(self, url: str, *, json: dict[str, Any]) -> _FakeResponse:
|
||||
self.posted_urls.append(url)
|
||||
self.posted_payloads.append(json)
|
||||
if url in self.fail_urls:
|
||||
raise RuntimeError("endpoint unavailable")
|
||||
return _FakeResponse(self.response)
|
||||
@@ -45,6 +47,7 @@ class _FakeAsyncClient:
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_fake_client() -> None:
|
||||
_FakeAsyncClient.posted_urls = []
|
||||
_FakeAsyncClient.posted_payloads = []
|
||||
_FakeAsyncClient.fail_urls = set()
|
||||
_FakeAsyncClient.response = ""
|
||||
|
||||
@@ -115,6 +118,7 @@ async def test_nemoclaw_second_opinion_tries_gcp_b_after_gcp_a_failure(
|
||||
"http://gcp-a:11435/api/generate",
|
||||
"http://gcp-b:11436/api/generate",
|
||||
]
|
||||
assert all(payload["think"] is False for payload in _FakeAsyncClient.posted_payloads)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
Reference in New Issue
Block a user