fix(drift): parse ollama json wrapped responses
All checks were successful
Code Review / ai-code-review (push) Successful in 13s
CD Pipeline / tests (push) Successful in 1m16s
CD Pipeline / build-and-deploy (push) Successful in 3m52s
CD Pipeline / post-deploy-checks (push) Successful in 1m30s

This commit is contained in:
Your Name
2026-05-06 19:39:01 +08:00
parent 8882301243
commit 2aaaa5654f
3 changed files with 92 additions and 15 deletions

View File

@@ -17,6 +17,7 @@ Drift Interpreter - Phase 25 P2 Config Drift Detection
from __future__ import annotations
import json
import re
from typing import TYPE_CHECKING
import structlog
@@ -52,6 +53,58 @@ _INTENT_PROMPT_TEMPLATE = """你是 AWOOOI GitOps 守門員,請分析以下 K8
"""
def _strip_think_blocks(text: str) -> str:
"""移除 qwen/deepseek 類模型常見的 <think> 推理段。"""
return re.sub(r"<think>[\s\S]*?</think>", "", text, flags=re.IGNORECASE).strip()
def _extract_first_json_object(text: str) -> dict | None:
"""
從 LLM 回應中擷取第一個 JSON object。
Ollama qwen3/deepseek 常會在 JSON 前後加 `<think>` 或短句;這些文字不應
讓 drift intent 直接降級成 UNKNOWN。
"""
cleaned = _strip_think_blocks(text)
candidates = [cleaned]
candidates.extend(match.group(1).strip() for match in re.finditer(r"```(?:json)?\s*([\s\S]+?)```", cleaned))
start = cleaned.find("{")
if start >= 0:
in_string = False
escaped = False
depth = 0
for idx, ch in enumerate(cleaned[start:], start=start):
if escaped:
escaped = False
continue
if ch == "\\":
escaped = True
continue
if ch == '"':
in_string = not in_string
continue
if in_string:
continue
if ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
candidates.append(cleaned[start : idx + 1])
break
for candidate in candidates:
try:
data = json.loads(candidate)
except Exception:
continue
if isinstance(data, dict):
return data
return None
class NemotronDriftInterpreter:
"""
使用 Nemotron 分析漂移意圖
@@ -135,19 +188,9 @@ class NemotronDriftInterpreter:
def _parse_response(self, text: str) -> DriftInterpretation:
"""解析 Nemotron JSON 回應"""
try:
# 嘗試直接解析
data = json.loads(text)
except Exception:
try:
import re
match = re.search(r"```(?:json)?\s*([\s\S]+?)```", text)
if match:
data = json.loads(match.group(1))
else:
return self._unknown_result("無法解析 JSON")
except Exception:
return self._unknown_result("JSON 解析失敗")
data = _extract_first_json_object(text)
if data is None:
return self._unknown_result("無法解析 JSON")
try:
intent_str = data.get("intent", "unknown")

View File

@@ -40,3 +40,36 @@ async def test_drift_interpreter_declares_ollama_first_governance_lane(
assert fake_openclaw.alert_context["enforce_ollama_first"] is True
assert fake_openclaw.alert_context["task_type"] == "diagnose"
assert fake_openclaw.alert_context["allow_gcp_heavy_model"] is True
def test_drift_interpreter_strips_think_blocks_before_json_parse() -> None:
result = NemotronDriftInterpreter()._parse_response(
"""
<think>先判斷 replicas 變化是否為 HPA。</think>
{
"intent": "automated_change",
"explanation": "replicas 變化通常是 HPA 自動調整",
"risk": "LOW",
"confidence": 0.72
}
"""
)
assert result.intent == DriftIntent.AUTOMATED_CHANGE
assert result.risk == "LOW"
assert result.confidence == 0.72
def test_drift_interpreter_extracts_json_from_markdown() -> None:
result = NemotronDriftInterpreter()._parse_response(
"""
以下為判斷:
```json
{"intent":"human_error","explanation":"欄位異常變更","risk":"MEDIUM","confidence":0.61}
```
"""
)
assert result.intent == DriftIntent.HUMAN_ERROR
assert result.risk == "MEDIUM"
assert result.confidence == 0.61

View File

@@ -4047,7 +4047,8 @@ ruff check --select F401,F821,I001 apps/api/src/services/heartbeat_report_servic
| Production image | `awoooi-api:2ef54ccc9462c5fb1f74ca4f5997fe9564c9418f` |
| Live provider order | Drift Interpreter 單次 live 驗證顯示 `ollama_gcp_a → ollama_gcp_b → ollama_local → gemini` |
| Live 實際 provider | `ollama_gcp_a` 成功,模型 `qwen3:14b`tokens `269`latency 約 `56.5s`,未觸發 Gemini |
| 後續缺口 | 測試 prompt 回傳仍解析成 `unknown`,需另外強化 Drift Interpreter 的 JSON schema / correction retry這不是費用路由問題 |
| Drift JSON parser | 補上 `<think>...</think>` 移除、Markdown code fence JSON、首個 JSON object 擷取,避免 qwen3/deepseek 回應外包文字時直接降級成 `unknown` |
| 後續缺口 | 若模型輸出欄位語義錯誤,仍需後續補 JSON schema / correction retry這不是費用路由問題 |
### 驗證
@@ -4055,7 +4056,7 @@ ruff check --select F401,F821,I001 apps/api/src/services/heartbeat_report_servic
DATABASE_URL='postgresql+asyncpg://test:test@localhost:5432/test' pytest \
apps/api/tests/test_openclaw_alert_cloud_fallback_gate.py \
apps/api/tests/test_drift_interpreter_ollama_first.py
# 8 passed
# 10 passed
ruff check \
apps/api/src/services/openclaw.py \