diff --git a/apps/api/src/services/agent_orchestrator.py b/apps/api/src/services/agent_orchestrator.py index ac21a23f..589959da 100644 --- a/apps/api/src/services/agent_orchestrator.py +++ b/apps/api/src/services/agent_orchestrator.py @@ -64,7 +64,14 @@ if TYPE_CHECKING: logger = structlog.get_logger(__name__) # 全局超時(所有 Agent 加起來) -GLOBAL_TIMEOUT_SEC = 30.0 +# 2026-04-16 Claude Sonnet 4.6: deepseek-r1:14b 實測 2.2-27.3s avg 10.6s +# 原 30s 對 3 個序列 Agent 每個只剩 10s → 頻繁 timeout → confidence=20% +# 調整: 每 Agent 25s, 3個序列+1組並行 = 最差 75s + buffer = 90s +GLOBAL_TIMEOUT_SEC = 90.0 + +# 每個 Agent 個別超時(預設 5s 是開發機測試值,生產需對應 LLM 延遲) +# deepseek-r1:14b avg 10.6s, 99th percentile ~30s +_PER_AGENT_TIMEOUT_SEC = 25.0 # Redis Stream key STREAM_KEY = "aiops:p2:events" @@ -173,7 +180,7 @@ async def _debate( # ── Step 1: Diagnostician ────────────────────────────────────────────── diagnostician = get_diagnostician_agent() - diagnosis = await diagnostician.run(snapshot) + diagnosis = await diagnostician.run(snapshot, timeout_sec=_PER_AGENT_TIMEOUT_SEC) await _record_turn( session_id=session_id, incident_id=incident_id, @@ -187,7 +194,7 @@ async def _debate( # ── Step 2: Solver ───────────────────────────────────────────────────── solver = get_solver_agent() - plan = await solver.run(diagnosis) + plan = await solver.run(diagnosis, timeout_sec=_PER_AGENT_TIMEOUT_SEC) await _record_turn( session_id=session_id, incident_id=incident_id, @@ -204,8 +211,8 @@ async def _debate( critic = get_critic_agent() verdict, critic_report = await asyncio.gather( - reviewer.run(plan), - critic.run(diagnosis, plan), + reviewer.run(plan, timeout_sec=_PER_AGENT_TIMEOUT_SEC), + critic.run(diagnosis, plan, timeout_sec=_PER_AGENT_TIMEOUT_SEC), ) await asyncio.gather(