fix(mcp): balance host alert tool suggestions
All checks were successful
Code Review / ai-code-review (push) Successful in 11s
CD Pipeline / tests (push) Successful in 1m17s
CD Pipeline / build-and-deploy (push) Successful in 3m41s
CD Pipeline / post-deploy-checks (push) Successful in 1m43s

This commit is contained in:
Your Name
2026-05-18 12:14:21 +08:00
parent 989390f7ce
commit 64c7044282
2 changed files with 62 additions and 5 deletions

View File

@@ -171,9 +171,13 @@ class MCPToolRegistry:
"""
suggested: list[RegisteredTool] = []
labels = incident_labels or {}
# namespace alone is only routing context. Requiring a concrete
# workload/node locator prevents host alerts such as HostErrorLogFlood
# from being misrouted into Kubernetes tools just because an upstream
# bridge added namespace="infra".
has_k8s_locator = any(
labels.get(key)
for key in ("deployment", "pod", "node", "namespace", "container")
for key in ("deployment", "pod", "node", "container")
)
# 依優先度排序後篩選
@@ -195,8 +199,10 @@ class MCPToolRegistry:
# 但允許多個工具覆蓋同一維度(例如 D1 需要 kubectl_describe + kubectl_events
suggested.append(reg)
# 取前 max_tools 個
result = suggested[:max_tools]
# 取前 max_tools 個;同一 provider 先給半數上限,避免單一 MCP
# provider 擠滿整個 8D 預算,讓 SignOz/Prometheus/Sentry 這類側證
# 有機會和主診斷工具一起進入調查。
result = _select_provider_balanced_tools(suggested, max_tools)
logger.debug(
"mcp_registry_suggest_tools",
@@ -219,6 +225,37 @@ class MCPToolRegistry:
return len(self._tools)
def _select_provider_balanced_tools(
tools: list[RegisteredTool],
max_tools: int,
) -> list[RegisteredTool]:
if max_tools <= 0:
return []
provider_soft_cap = max(2, max_tools // 2)
selected: list[RegisteredTool] = []
deferred: list[RegisteredTool] = []
provider_counts: dict[str, int] = {}
for reg in tools:
if len(selected) >= max_tools:
break
provider_name = reg.provider.name
count = provider_counts.get(provider_name, 0)
if count >= provider_soft_cap:
deferred.append(reg)
continue
selected.append(reg)
provider_counts[provider_name] = count + 1
for reg in deferred:
if len(selected) >= max_tools:
break
selected.append(reg)
return selected
# ─────────────────────────────────────────────────────────────────────────────
# 工具自動分類(根據 tool name 推斷感官維度)
# ─────────────────────────────────────────────────────────────────────────────

View File

@@ -282,6 +282,16 @@ class TestSuggestTools:
assert "kubectl_describe" in names
assert "prometheus_query" in names
def test_namespace_only_non_kube_alert_does_not_get_k8s_tool(self):
registry = self._registry_with_tools()
tools = registry.suggest_tools(
alertname="HostErrorLogFlood",
incident_labels={"namespace": "infra"},
)
names = [t.tool.name for t in tools]
assert "kubectl_describe" not in names
assert "prometheus_query" in names
def test_empty_alertname_gets_generic_only(self):
registry = self._registry_with_tools()
tools = registry.suggest_tools(alertname="")
@@ -342,10 +352,19 @@ class TestSuggestTools:
registry = MCPToolRegistry()
ssh_provider = _StubProvider(
"ssh_host",
["ssh_diagnose", "ssh_get_top_processes", "ssh_get_container_logs"],
[
"ssh_diagnose",
"ssh_get_top_processes",
"ssh_get_container_logs",
"ssh_get_container_status",
"ssh_get_service_status",
"ssh_check_port",
],
)
k8s_provider = _StubProvider("kubernetes", ["kubectl_describe"])
signoz_provider = _StubProvider("signoz", ["query_logs"])
prometheus_provider = _StubProvider("prometheus", ["prometheus_query"])
await registry.register_provider(k8s_provider)
await registry.register_provider(ssh_provider)
await registry.register_provider(signoz_provider)
await registry.register_provider(prometheus_provider)
@@ -358,7 +377,7 @@ class TestSuggestTools:
"sensor_ip": "192.168.0.188",
"host": "192.168.0.188",
},
max_tools=10,
max_tools=8,
)
names = [reg.tool.name for reg in tools]
@@ -366,6 +385,7 @@ class TestSuggestTools:
assert "ssh_get_top_processes" in names
assert "query_logs" in names
assert "prometheus_query" in names
assert "kubectl_describe" not in names
def test_get_all_tools_returns_all(self):
registry = MCPToolRegistry()