fix(ollama): disable thinking for deepseek call sites
All checks were successful
CD Pipeline / tests (push) Successful in 1m31s
Code Review / ai-code-review (push) Successful in 26s
CD Pipeline / build-and-deploy (push) Successful in 5m27s
CD Pipeline / post-deploy-checks (push) Successful in 1m40s

This commit is contained in:
Your Name
2026-05-25 23:19:14 +08:00
parent 6112fd07ae
commit 3953ef6d57
7 changed files with 28 additions and 2 deletions

View File

@@ -276,6 +276,8 @@ async def process_nl_message(
f"{endpoint.url}/api/chat",
json={
"model": model,
# Keep Hermes responses in message.content across Ollama 0.24+.
"think": False,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt_with_ctx},

View File

@@ -720,7 +720,14 @@ async def _call_ollama(prompt: str, ollama_url: str, model: str) -> str | None:
try:
resp = await client.post(
f"{endpoint.url}/api/generate",
json={"model": model, "prompt": prompt, "stream": False, "options": {"temperature": 0.1}},
json={
"model": model,
"prompt": prompt,
"stream": False,
# Preserve response-body compatibility with Ollama 0.24 thinking models.
"think": False,
"options": {"temperature": 0.1},
},
)
resp.raise_for_status()
return resp.json().get("response", "")

View File

@@ -167,6 +167,9 @@ class ChatManager:
json={
"model": MODEL,
"stream": False,
# Ollama 0.24 separates deepseek-r1 thinking from final text.
# Chat callers expect message.content to contain the answer.
"think": False,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},

View File

@@ -678,7 +678,13 @@ async def _nemoclaw_second_opinion(incident: "Incident", primary_result: dict) -
try:
resp = await client.post(
f"{endpoint.url.rstrip('/')}/api/generate",
json={"model": model, "prompt": prompt, "stream": False},
json={
"model": model,
"prompt": prompt,
"stream": False,
# Ollama 0.24 puts deepseek-r1 output in `thinking` unless disabled.
"think": False,
},
)
resp.raise_for_status()
data = resp.json()

View File

@@ -220,6 +220,9 @@ class LogSummaryService:
"model": SUMMARY_MODEL,
"prompt": prompt,
"stream": False,
# Ollama 0.24 returns deepseek-r1 text in `thinking` by default.
# Existing callers read `response`, so force final-answer mode.
"think": False,
"options": {"temperature": 0.1, "num_predict": 200},
},
)

View File

@@ -103,6 +103,7 @@ async def test_nemoclaw_chat_uses_resolved_interactive_lane(
url, payload = _FakeAsyncClient.posted[0]
assert url == "http://gcp-a:11435/api/chat"
assert payload["model"] == "deepseek-r1:14b"
assert payload["think"] is False
def test_chat_manager_has_no_direct_gemini_generation_path() -> None:

View File

@@ -22,6 +22,7 @@ class _FakeResponse:
class _FakeAsyncClient:
posted_urls: list[str] = []
posted_payloads: list[dict[str, Any]] = []
fail_urls: set[str] = set()
response: str = ""
@@ -37,6 +38,7 @@ class _FakeAsyncClient:
async def post(self, url: str, *, json: dict[str, Any]) -> _FakeResponse:
self.posted_urls.append(url)
self.posted_payloads.append(json)
if url in self.fail_urls:
raise RuntimeError("endpoint unavailable")
return _FakeResponse(self.response)
@@ -45,6 +47,7 @@ class _FakeAsyncClient:
@pytest.fixture(autouse=True)
def _reset_fake_client() -> None:
_FakeAsyncClient.posted_urls = []
_FakeAsyncClient.posted_payloads = []
_FakeAsyncClient.fail_urls = set()
_FakeAsyncClient.response = ""
@@ -115,6 +118,7 @@ async def test_nemoclaw_second_opinion_tries_gcp_b_after_gcp_a_failure(
"http://gcp-a:11435/api/generate",
"http://gcp-b:11436/api/generate",
]
assert all(payload["think"] is False for payload in _FakeAsyncClient.posted_payloads)
@pytest.mark.asyncio