fix(openclaw): Nemotron 重試邏輯 + exhausted log key (未提交的修改)
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
- generate_incident_proposal_with_tools: 單次 try/except → 2次重試迴圈 - 失敗 log key: nemotron_collaboration_failed → nemotron_collaboration_exhausted - 失敗時 nemotron_enabled=True (讓統帥看到失敗狀態) - _call_nemotron_tools: timeout 超時改為拋出異常(讓外層重試) - 這是之前 Session 的本地修改,修正測試與實際實作不一致問題 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1635,45 +1635,66 @@ Focus on:
|
||||
)
|
||||
return proposal, provider, True
|
||||
|
||||
# Step 3: 呼叫 Nemotron Tool Calling
|
||||
# Step 3: 呼叫 Nemotron Tool Calling — 🔴 必須等到有結果,不可跳過
|
||||
# 2026-04-07 ogt: 統帥指示 Nemotron 不能跳過,必須等到處理完成
|
||||
logger.info(
|
||||
"nemotron_collaboration_start",
|
||||
incident_id=incident_id,
|
||||
risk_level=risk_level,
|
||||
)
|
||||
|
||||
try:
|
||||
nemotron_result = await self._call_nemotron_tools(
|
||||
incident_id=incident_id,
|
||||
reasoning=proposal.get("reasoning", ""),
|
||||
target_resource=proposal.get("target_resource", ""),
|
||||
suggested_action=proposal.get("action", ""),
|
||||
namespace=proposal.get("namespace", "awoooi-prod"),
|
||||
)
|
||||
max_retries = 2
|
||||
last_error = None
|
||||
for attempt in range(1, max_retries + 1):
|
||||
try:
|
||||
nemotron_result = await self._call_nemotron_tools(
|
||||
incident_id=incident_id,
|
||||
reasoning=proposal.get("reasoning", ""),
|
||||
target_resource=proposal.get("target_resource", ""),
|
||||
suggested_action=proposal.get("action", ""),
|
||||
namespace=proposal.get("namespace", "awoooi-prod"),
|
||||
)
|
||||
|
||||
proposal["nemotron_enabled"] = True
|
||||
proposal["nemotron_tools"] = nemotron_result.get("tools", [])
|
||||
proposal["nemotron_validation"] = nemotron_result.get("validation", "⏳ 驗證中")
|
||||
proposal["nemotron_latency_ms"] = nemotron_result.get("latency_ms", 0.0)
|
||||
proposal["nemotron_enabled"] = True
|
||||
proposal["nemotron_tools"] = nemotron_result.get("tools", [])
|
||||
proposal["nemotron_validation"] = nemotron_result.get("validation", "⏳ 驗證中")
|
||||
proposal["nemotron_latency_ms"] = nemotron_result.get("latency_ms", 0.0)
|
||||
|
||||
logger.info(
|
||||
"nemotron_collaboration_complete",
|
||||
incident_id=incident_id,
|
||||
tools_count=len(proposal["nemotron_tools"]),
|
||||
validation=proposal["nemotron_validation"],
|
||||
latency_ms=proposal["nemotron_latency_ms"],
|
||||
)
|
||||
logger.info(
|
||||
"nemotron_collaboration_complete",
|
||||
incident_id=incident_id,
|
||||
tools_count=len(proposal["nemotron_tools"]),
|
||||
validation=proposal["nemotron_validation"],
|
||||
latency_ms=proposal["nemotron_latency_ms"],
|
||||
attempt=attempt,
|
||||
)
|
||||
last_error = None
|
||||
break # 成功,跳出重試迴圈
|
||||
|
||||
except Exception as e:
|
||||
# Nemotron 失敗不阻塞主流程,降級為純 OpenClaw
|
||||
logger.warning(
|
||||
"nemotron_collaboration_failed",
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logger.warning(
|
||||
"nemotron_collaboration_retry",
|
||||
incident_id=incident_id,
|
||||
error=str(e),
|
||||
attempt=attempt,
|
||||
max_retries=max_retries,
|
||||
)
|
||||
if attempt < max_retries:
|
||||
import asyncio
|
||||
await asyncio.sleep(2) # 重試前等 2 秒
|
||||
|
||||
# 重試全部失敗 — 仍然標記 enabled 並顯示失敗狀態(不隱藏)
|
||||
if last_error is not None:
|
||||
logger.error(
|
||||
"nemotron_collaboration_exhausted",
|
||||
incident_id=incident_id,
|
||||
error=str(e),
|
||||
error=str(last_error),
|
||||
retries=max_retries,
|
||||
)
|
||||
proposal["nemotron_enabled"] = False
|
||||
proposal["nemotron_tools"] = None
|
||||
proposal["nemotron_validation"] = "❌ 呼叫失敗"
|
||||
proposal["nemotron_enabled"] = True # 🔴 仍然顯示區塊,讓統帥知道失敗了
|
||||
proposal["nemotron_tools"] = []
|
||||
proposal["nemotron_validation"] = f"❌ {max_retries}次重試均失敗"
|
||||
proposal["nemotron_latency_ms"] = 0.0
|
||||
|
||||
return proposal, provider, True
|
||||
@@ -1776,8 +1797,8 @@ Focus on:
|
||||
]
|
||||
|
||||
try:
|
||||
# 設置超時
|
||||
timeout = settings.NEMOTRON_TIMEOUT_SECONDS
|
||||
# 2026-04-07 ogt: 統帥指示不可跳過 Nemotron,用 120 秒寬裕超時
|
||||
timeout = 120
|
||||
|
||||
result = await asyncio.wait_for(
|
||||
nvidia.tool_call(
|
||||
@@ -1822,16 +1843,13 @@ Focus on:
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
latency_ms = (time.time() - start_time) * 1000
|
||||
logger.warning(
|
||||
logger.error(
|
||||
"nemotron_tool_call_timeout",
|
||||
incident_id=incident_id,
|
||||
timeout_seconds=settings.NEMOTRON_TIMEOUT_SECONDS,
|
||||
timeout_seconds=timeout,
|
||||
)
|
||||
return {
|
||||
"tools": [],
|
||||
"validation": "⏳ 呼叫超時",
|
||||
"latency_ms": latency_ms,
|
||||
}
|
||||
# 超時也拋出,讓外層重試
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
latency_ms = (time.time() - start_time) * 1000
|
||||
|
||||
Reference in New Issue
Block a user