feat(auto-repair): route ssh diagnostics through mcp gateway
All checks were successful
Code Review / ai-code-review (push) Successful in 10s
run-migration / migrate (push) Successful in 9s
CD Pipeline / tests (push) Successful in 1m11s
CD Pipeline / build-and-deploy (push) Successful in 3m17s
CD Pipeline / post-deploy-checks (push) Successful in 1m16s

This commit is contained in:
Your Name
2026-05-14 21:13:05 +08:00
parent 0567135647
commit 813d088339
8 changed files with 662 additions and 6 deletions

View File

@@ -0,0 +1,166 @@
-- T23: auto-repair executor read-only MCP Gateway seed
-- 目的:讓 YAML_RULE/PlayBook 的只讀 SSH 診斷步驟經過 AwoooP MCP Gateway。
-- 邊界:只授權 read scopewrite/admin SSH 工具仍必須走 approval_executor + Gate 5。
SELECT set_config('app.project_id', 'awoooi', FALSE);
WITH agent_body AS (
SELECT jsonb_build_object(
'schema_version', 'awooop_agent_contract_v1',
'agent_id', 'auto_repair_executor',
'display_name', 'Auto Repair Executor',
'project_id', 'awoooi',
'purpose', 'Read-only auto-repair diagnostics through AwoooP MCP Gateway',
'allowed_scopes', jsonb_build_array('read'),
'forbidden_scopes', jsonb_build_array('write', 'admin'),
'stage', 't23_auto_repair_diagnostic_gateway'
) AS body_json
),
inserted_revision AS (
INSERT INTO awooop_contract_revisions (
project_id,
contract_family,
contract_id,
version_major,
version_minor,
lifecycle_status,
body_json,
body_hash,
body_schema_version,
publisher_id,
published_at
)
SELECT
'awoooi',
'agent',
'auto_repair_executor',
1,
0,
'active',
body_json,
encode(digest(body_json::text, 'sha256'), 'hex'),
'v1.0',
'migration:t23_auto_repair_executor_read_gateway',
NOW()
FROM agent_body
ON CONFLICT (project_id, contract_family, contract_id, version_major, version_minor)
DO NOTHING
RETURNING revision_id, project_id, contract_family, contract_id
),
chosen_revision AS (
SELECT revision_id, project_id, contract_family, contract_id
FROM inserted_revision
UNION ALL
SELECT revision_id, project_id, contract_family, contract_id
FROM awooop_contract_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'auto_repair_executor'
AND version_major = 1
AND version_minor = 0
AND lifecycle_status = 'active'
),
upsert_pointer AS (
INSERT INTO awooop_active_revisions (
project_id,
contract_family,
contract_id,
active_revision_id,
updated_at
)
SELECT DISTINCT ON (project_id, contract_family, contract_id)
project_id,
contract_family,
contract_id,
revision_id,
NOW()
FROM chosen_revision
ORDER BY project_id, contract_family, contract_id, revision_id
ON CONFLICT (project_id, contract_family, contract_id)
DO UPDATE SET
active_revision_id = EXCLUDED.active_revision_id,
updated_at = NOW()
RETURNING contract_id
)
SELECT 'auto_repair_executor_active_contracts', count(*) FROM upsert_pointer;
WITH read_tools(tool_name, description) AS (
VALUES
('ssh_diagnose', 'SSH host/container diagnosis read'),
('ssh_get_top_processes', 'SSH top processes read'),
('ssh_get_disk_usage', 'SSH disk usage read'),
('ssh_get_memory_info', 'SSH memory info read'),
('ssh_get_container_logs', 'SSH container logs read'),
('ssh_get_container_status', 'SSH container status read'),
('ssh_get_service_status', 'SSH service status read'),
('ssh_check_port', 'SSH port check read'),
('ssh_get_nginx_error_log', 'SSH nginx error log read'),
('ssh_get_swap_info', 'SSH swap info read')
),
upsert_tools AS (
INSERT INTO awooop_mcp_tool_registry (
project_id,
tool_name,
tool_type,
description,
allowed_scopes,
environment_tags,
is_active,
updated_at
)
SELECT
'awoooi',
tool_name,
'mcp_server',
description,
'["read"]'::jsonb,
'{"env": "prod"}'::jsonb,
TRUE,
NOW()
FROM read_tools
ON CONFLICT (project_id, tool_name)
DO UPDATE SET
description = EXCLUDED.description,
allowed_scopes = EXCLUDED.allowed_scopes,
environment_tags = EXCLUDED.environment_tags,
is_active = TRUE,
updated_at = NOW()
RETURNING tool_id, tool_name, allowed_scopes
),
upsert_grants AS (
INSERT INTO awooop_mcp_grants (
project_id,
agent_id,
tool_id,
granted_by,
granted_scopes,
expires_at,
is_revoked,
revoked_at,
revoked_by
)
SELECT
'awoooi',
'auto_repair_executor',
tool_id,
'migration:t23_auto_repair_executor_read_gateway',
allowed_scopes,
NULL,
FALSE,
NULL,
NULL
FROM upsert_tools
ON CONFLICT (project_id, agent_id, tool_id)
DO UPDATE SET
granted_by = EXCLUDED.granted_by,
granted_scopes = EXCLUDED.granted_scopes,
expires_at = NULL,
is_revoked = FALSE,
revoked_at = NULL,
revoked_by = NULL
RETURNING grant_id
)
SELECT
'auto_repair_executor_read_gateway',
(SELECT count(*) FROM upsert_tools) AS tool_rows,
(SELECT count(*) FROM upsert_grants) AS grant_rows;

View File

@@ -0,0 +1,24 @@
-- Rollback T23 auto-repair executor read-only MCP Gateway grant.
SELECT set_config('app.project_id', 'awoooi', FALSE);
UPDATE awooop_mcp_grants
SET is_revoked = TRUE,
revoked_at = NOW(),
revoked_by = 'rollback:t23_auto_repair_executor_read_gateway'
WHERE project_id = 'awoooi'
AND agent_id = 'auto_repair_executor'
AND granted_by = 'migration:t23_auto_repair_executor_read_gateway';
DELETE FROM awooop_active_revisions
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'auto_repair_executor';
UPDATE awooop_contract_revisions
SET lifecycle_status = 'retired'
WHERE project_id = 'awoooi'
AND contract_family = 'agent'
AND contract_id = 'auto_repair_executor'
AND publisher_id = 'migration:t23_auto_repair_executor_read_gateway'
AND lifecycle_status = 'active';

View File

@@ -65,6 +65,12 @@ DEFAULT_HOST_USERS = {
# AI/Web host is operated by the ollama account in the current topology.
"192.168.0.188": "ollama",
}
SHORT_HOST_MAP = {
"110": "192.168.0.110",
"120": "192.168.0.120",
"121": "192.168.0.121",
"188": "192.168.0.188",
}
DIAG_TIMEOUT = 10 # 診斷類超時(秒)
OP_TIMEOUT = 60 # 操作類超時(秒)
@@ -127,7 +133,9 @@ def _normalize_ssh_host(value: str) -> str:
if host.count(":") == 1:
maybe_host, maybe_port = host.rsplit(":", 1)
if maybe_port.isdigit():
return maybe_host
host = maybe_host
if host in SHORT_HOST_MAP:
return SHORT_HOST_MAP[host]
return host
@@ -240,6 +248,10 @@ class SSHProvider(MCPToolProvider):
),
input_schema={"type": "object", "properties": {
"host": {"type": "string", "description": "Target host IP"},
"container_name": {
"type": "string",
"description": "Optional Docker container name for container-focused diagnostics",
},
}, "required": ["host"]},
server_name=self.name,
),
@@ -542,12 +554,23 @@ class SSHProvider(MCPToolProvider):
# 所有接受用戶字串的工具,必須先通過 _validate_param() 白名單驗證
if tool_name == "ssh_diagnose":
# 2026-04-27 Claude Sonnet 4.6: 主機告警自動診斷 — 只讀,不修改任何狀態
return (
command = (
"echo '=== CPU TOP ===' && ps aux --sort=-%cpu | head -15 && "
"echo '=== MEMORY ===' && free -h && "
"echo '=== DISK ===' && df -h && "
"echo '=== LOAD ===' && uptime"
)
container_name = params.get("container_name")
if container_name:
name = _validate_param("container_name", str(container_name))
command = (
f"{command} && "
f"echo '=== DOCKER STATS {name} ===' && "
f"docker stats --no-stream {name} 2>&1 && "
f"echo '=== DOCKER INSPECT {name} ===' && "
f"docker inspect {name} 2>&1 | head -80"
)
return command
if tool_name == "ssh_get_top_processes":
return "ps aux --sort=-%cpu | head -15"

View File

@@ -22,9 +22,10 @@ Phase 8: 自動化層實作
- P0/P1 嚴重度 Incident 需要人工確認
"""
from dataclasses import dataclass
from collections.abc import Callable
from typing import Protocol
from dataclasses import dataclass
import re
from typing import Any, Protocol
import structlog
@@ -81,6 +82,55 @@ class AutoRepairResult:
execution_time_ms: int = 0
@dataclass(frozen=True)
class _SshMcpRoute:
"""Route a legacy SSH playbook command to a governed MCP tool."""
tool_name: str
params: dict[str, Any]
_SHORT_HOST_MAP: dict[str, str] = {
"110": "192.168.0.110",
"120": "192.168.0.120",
"121": "192.168.0.121",
"188": "192.168.0.188",
}
_SSH_DIAGNOSTIC_KEYWORDS = (
"ps aux",
"docker stats",
"docker inspect",
"docker logs",
"docker ps",
"docker top",
"df -h",
"du -",
"free -h",
"journalctl",
"systemctl show",
"tail ",
"top ",
"uptime",
)
_SSH_WRITE_KEYWORDS = (
"docker restart",
"docker start",
"docker stop",
"docker rm",
"docker prune",
"systemctl restart",
"systemctl stop",
"systemctl start",
"truncate ",
" rm ",
"rm -",
"certbot renew",
"bash ",
)
# =============================================================================
# Auto Repair Service Interface
# =============================================================================
@@ -918,6 +968,152 @@ class AutoRepairService:
# 安全降級:檢查失敗 → 保守拒絕
return False
def _route_legacy_ssh_command_to_mcp(
self,
incident: Incident,
command: str,
) -> _SshMcpRoute | None:
"""Map read-only legacy ``ssh {host} '...'`` steps to MCP Gateway.
YAML_RULE playbooks predate the URI executor and can contain compound
shell diagnostics. Those commands should not bypass the newer
scheme-based HostRepairAgent or loosen its shell safety guard; read-only
diagnostics are instead routed to the governed SSH MCP provider.
"""
raw_command = (command or "").strip()
lowered = raw_command.lower()
if not lowered.startswith("ssh "):
return None
if any(token in lowered for token in _SSH_WRITE_KEYWORDS):
return None
if not any(token in lowered for token in _SSH_DIAGNOSTIC_KEYWORDS):
return None
host = self._resolve_ssh_host_for_incident(incident, raw_command)
if not host:
return None
params: dict[str, Any] = {"host": host}
container_name = self._resolve_container_name_for_incident(incident, raw_command)
if container_name:
params["container_name"] = container_name
return _SshMcpRoute(tool_name="ssh_diagnose", params=params)
def _resolve_ssh_host_for_incident(self, incident: Incident, command: str) -> str:
"""Resolve ``{host}``, short host labels, and exporter instance ports."""
labels = self._incident_labels(incident)
raw_host = ""
match = re.match(r"ssh\s+([^\s'\"]+)", command.strip(), flags=re.IGNORECASE)
if match:
raw_host = match.group(1)
if not raw_host or "{" in raw_host or "}" in raw_host:
raw_host = (
str(labels.get("host") or "")
or str(labels.get("instance") or "")
or str(labels.get("node") or "")
or str(labels.get("exported_instance") or "")
)
return self._normalize_ssh_host(raw_host)
@staticmethod
def _normalize_ssh_host(raw_host: str) -> str:
host = (raw_host or "").strip()
if host.startswith("ssh://"):
host = host.removeprefix("ssh://")
if "@" in host:
host = host.rsplit("@", 1)[1]
if host.startswith("[") and "]" in host:
host = host[1:host.index("]")]
if host.count(":") == 1:
maybe_host, maybe_port = host.rsplit(":", 1)
if maybe_port.isdigit():
host = maybe_host
if host in _SHORT_HOST_MAP:
return _SHORT_HOST_MAP[host]
match = re.fullmatch(r"(?:node-exporter-|host-)?(110|120|121|188)", host)
if match:
return _SHORT_HOST_MAP[match.group(1)]
return host
def _resolve_container_name_for_incident(
self,
incident: Incident,
command: str,
) -> str:
labels = self._incident_labels(incident)
for key in ("container_name", "container", "name"):
value = str(labels.get(key) or "").strip()
if value and "{" not in value and "}" not in value:
return value
match = re.search(
r"docker\s+(?:stats\s+--no-stream|inspect|logs|top|ps\s+-a\s+--filter\s+name=)\s+([a-zA-Z0-9._-]+)",
command,
)
return match.group(1) if match else ""
@staticmethod
def _incident_labels(incident: Incident) -> dict[str, Any]:
for signal in incident.signals or []:
labels = getattr(signal, "labels", None)
if labels:
return labels
return {}
async def _execute_ssh_mcp_route(
self,
incident: Incident,
route: _SshMcpRoute,
) -> str:
"""Execute a routed SSH diagnostic through AwoooP MCP Gateway."""
try:
from src.db.base import get_db_context
from src.plugins.mcp.gateway import GatewayContext, McpGateway, McpGatewayError
from src.services.mcp_audit_context import with_mcp_audit_context
incident_id = incident.incident_id
params = with_mcp_audit_context(
route.params,
session_id=f"incident:{incident_id}:auto_repair_execute",
incident_id=incident_id,
flywheel_node="execute",
agent_role="auto_repair_executor",
)
async with get_db_context("awoooi") as db:
ctx = GatewayContext(
project_id="awoooi",
agent_id="auto_repair_executor",
tool_name=route.tool_name,
trace_id=incident_id,
is_shadow=False,
environment={"env": "prod"},
required_scope="read",
)
result = await McpGateway(db).call(ctx, params)
except McpGatewayError as exc:
return f"FAILED: mcp:{route.tool_name} {exc.error_code}: {exc}"
except Exception as exc:
logger.warning(
"auto_repair_ssh_mcp_route_failed",
incident_id=incident.incident_id,
tool=route.tool_name,
error=str(exc),
)
return f"FAILED: mcp:{route.tool_name} {exc}"
if result.success:
preview = str(result.output or "")[:500]
return f"SUCCESS: mcp:{route.tool_name} {preview}".strip()
return f"FAILED: mcp:{route.tool_name} {result.error or 'execution failed'}"
async def _execute_step(self, incident: Incident, step) -> str:
"""
執行單一修復步驟
@@ -949,6 +1145,10 @@ class AutoRepairService:
# 2026-04-06 Claude Code: Sprint 3 — repair_by_uri (URI scheme 路由)
if step.action_type == ActionType.SSH_COMMAND:
route = self._route_legacy_ssh_command_to_mcp(incident, step.command)
if route is not None:
return await self._execute_ssh_mcp_route(incident, route)
from src.services.host_repair_agent import HostRepairAgent
agent = HostRepairAgent()
approved = not getattr(step, "requires_approval", False)

View File

@@ -34,6 +34,7 @@ MASTER §3.1 L6×D1
from __future__ import annotations
import asyncio
import re
from typing import TYPE_CHECKING, Any
import structlog
@@ -236,6 +237,7 @@ class PostExecutionVerifier:
"pod_name": labels.get("pod", labels.get("name", "")),
"deployment": labels.get("deployment", ""),
"host": labels.get("instance", "").split(":")[0] or labels.get("host", ""),
"query": _build_prometheus_query(alertname, labels),
}
async def _call_one(reg) -> tuple[str, Any]:
@@ -501,16 +503,95 @@ def _get_incident_id(incident: "Incident") -> str:
def _get_alertname(incident: "Incident") -> str:
if incident.signals:
return incident.signals[0].labels.get("alertname", "")
signal = incident.signals[0]
labels = signal.labels or {}
return labels.get("alertname", "") or getattr(signal, "alert_name", "")
return ""
def _get_labels(incident: "Incident") -> dict[str, Any]:
if incident.signals:
return incident.signals[0].labels
return incident.signals[0].labels or {}
return {}
def _build_prometheus_query(alertname: str, labels: dict[str, Any]) -> str:
"""Build a non-empty PromQL probe for post-execution metric sensors."""
alert = (alertname or "").lower()
namespace = _safe_label_value(str(labels.get("namespace") or "awoooi-prod"))
pod_name = _safe_label_value(str(labels.get("pod") or labels.get("name") or ""))
host = _safe_label_value(str(labels.get("host") or _short_instance(labels.get("instance")) or ""))
container = _safe_label_value(str(
labels.get("container_name")
or labels.get("container")
or labels.get("name")
or ""
))
if alert.startswith("dockercontainer"):
selector = _selector({
"host": host,
"container_name": container,
})
if "memory" in alert:
return (
f"docker_container_memory_usage_bytes{selector} / "
f"docker_container_memory_limit_bytes{selector}"
)
if "restart" in alert:
return (
f"increase(docker_container_inspect_restart_count{selector}[15m]) "
f"or increase(docker_container_restart_count{selector}[15m])"
)
if "cpu" in alert:
return f"docker_container_cpu_cores{selector}"
return f"docker_container_info{selector}"
if any(key in alert for key in ("host", "node")):
instance_selector = _selector({"instance": str(labels.get("instance") or "")})
if "memory" in alert or "oom" in alert:
return f"node_memory_MemAvailable_bytes{instance_selector}"
if "disk" in alert or "storage" in alert:
return f"node_filesystem_avail_bytes{instance_selector}"
if "load" in alert or "cpu" in alert:
return f"node_load5{instance_selector}"
return f"up{instance_selector}"
pod_filter = f',pod=~"{pod_name}.*"' if pod_name else ""
if any(key in alert for key in ("memory", "mem", "oom")):
return (
f'avg(container_memory_working_set_bytes{{namespace="{namespace}"{pod_filter}}}) '
"/ 1048576"
)
if any(key in alert for key in ("cpu", "load", "throttl")):
return f'avg(rate(container_cpu_usage_seconds_total{{namespace="{namespace}"{pod_filter}}}[5m]))'
if any(key in alert for key in ("crash", "restart", "backoff")):
return f'sum(increase(kube_pod_container_status_restarts_total{{namespace="{namespace}"}}[15m]))'
if any(key in alert for key in ("http", "error", "5xx", "probe", "down", "unhealthy")):
return "1 - avg(probe_success)"
return f'up{{namespace="{namespace}"}}'
def _selector(labels: dict[str, str]) -> str:
parts = [f'{key}="{value}"' for key, value in labels.items() if value]
return "{" + ",".join(parts) + "}" if parts else ""
def _short_instance(instance: Any) -> str:
raw = str(instance or "")
if raw.count(":") == 1:
return raw.rsplit(":", 1)[0]
return raw
def _safe_label_value(value: str) -> str:
cleaned = (value or "").strip()
if re.fullmatch(r"[a-zA-Z0-9_.:-]{1,128}", cleaned):
return cleaned
return ""
async def _update_snapshot(
snapshot: EvidenceSnapshot,
post_state: dict[str, Any],

View File

@@ -19,6 +19,7 @@ from src.models.playbook import (
RiskLevel,
SymptomPattern,
)
from src.plugins.mcp.interfaces import MCPToolResult
from src.services.auto_repair_service import AutoRepairService
from src.utils.timezone import now_taipei
@@ -120,6 +121,14 @@ async def _no_cooldown(*args, **kwargs) -> tuple[bool, str]:
return True, "允許自動修復 (test bypass)"
class _DbContext:
async def __aenter__(self) -> object:
return object()
async def __aexit__(self, *_args: object) -> None:
return None
class TestAutoRepairService:
"""Auto Repair Service unit tests"""
@@ -415,6 +424,90 @@ class TestAutoRepairService:
assert service._should_escalate_failed_verification(incident, playbook) is False
def test_legacy_ssh_diagnostic_routes_to_mcp_gateway(self, service):
"""Legacy YAML_RULE SSH diagnostics become governed read-only MCP calls."""
incident = create_test_incident(
severity=Severity.P2,
alert_category="infrastructure",
alert_name="DockerContainerMemoryLimitPressure",
)
incident.signals[0].labels.update({
"host": "110",
"container_name": "sentry-self-hosted-clickhouse-1",
})
route = service._route_legacy_ssh_command_to_mcp(
incident,
'ssh {host} \'echo "=== LOAD ==="; uptime; docker stats --no-stream\'',
)
assert route is not None
assert route.tool_name == "ssh_diagnose"
assert route.params == {
"host": "192.168.0.110",
"container_name": "sentry-self-hosted-clickhouse-1",
}
def test_legacy_ssh_write_action_does_not_route_to_read_mcp(self, service):
"""Write operations must stay out of the read-only diagnostic grant."""
incident = create_test_incident(severity=Severity.P2)
route = service._route_legacy_ssh_command_to_mcp(
incident,
"ssh {host} 'docker restart minio'",
)
assert route is None
@pytest.mark.asyncio
async def test_execute_legacy_ssh_diagnostic_uses_mcp_gateway(
self,
service,
monkeypatch,
):
incident = create_test_incident(
severity=Severity.P2,
alert_category="infrastructure",
alert_name="DockerContainerMemoryLimitPressure",
)
incident.signals[0].labels.update({
"host": "110",
"container_name": "momo-scheduler",
})
step = RepairStep(
step_number=1,
action_type=ActionType.SSH_COMMAND,
command='ssh {host} \'echo "=== LOAD ==="; uptime; docker stats --no-stream\'',
risk_level=RiskLevel.LOW,
)
calls = []
class FakeGateway:
def __init__(self, db: object) -> None:
self.db = db
async def call(self, ctx, params):
calls.append({"ctx": ctx, "params": params, "db": self.db})
return MCPToolResult(
success=True,
execution_id="gw-ok",
output={"stdout": "=== CPU TOP ==="},
)
monkeypatch.setattr("src.db.base.get_db_context", lambda _project_id: _DbContext())
monkeypatch.setattr("src.plugins.mcp.gateway.McpGateway", FakeGateway)
result = await service._execute_step(incident, step)
assert result.startswith("SUCCESS: mcp:ssh_diagnose")
assert calls
assert calls[0]["ctx"].agent_id == "auto_repair_executor"
assert calls[0]["ctx"].required_scope == "read"
assert calls[0]["ctx"].is_shadow is False
assert calls[0]["params"]["host"] == "192.168.0.110"
assert calls[0]["params"]["container_name"] == "momo-scheduler"
assert calls[0]["params"]["_mcp_audit"]["flywheel_node"] == "execute"
@pytest.mark.asyncio
async def test_evaluate_low_risk_allowed(self, service, mock_playbook_service):
"""Test that LOW risk actions are allowed"""

View File

@@ -384,6 +384,25 @@ class _FakeRegistry:
]
class _PrometheusRegistry:
def __init__(self, provider: _CaptureProvider) -> None:
self.provider = provider
def suggest_tools(self, alertname: str = "", incident_labels: dict | None = None) -> list[RegisteredTool]:
return [
RegisteredTool(
tool=MCPTool(
name="prometheus_query",
description="",
input_schema={},
server_name="capture",
),
provider=self.provider,
dimensions=[SensorDimension.D3_METRICS],
)
]
class _DbContext:
async def __aenter__(self) -> object:
return object()
@@ -448,3 +467,39 @@ class TestCollectPostStateAuditContext:
assert ctx.required_scope == "read"
assert calls[0]["parameters"]["_mcp_audit"]["incident_id"] == "INC-TEST"
assert calls[0]["parameters"]["_mcp_audit"]["flywheel_node"] == "verify"
@pytest.mark.asyncio
async def test_collect_post_state_sends_prometheus_query_parameter(self):
provider = _CaptureProvider()
verifier = PostExecutionVerifier()
verifier._registry = _PrometheusRegistry(provider)
incident = _stub_incident(alertname="DockerContainerMemoryLimitPressure")
incident.signals[0].labels.update({
"host": "110",
"container_name": "momo-scheduler",
})
await verifier._collect_post_state(incident)
assert provider.seen_parameters is not None
query = provider.seen_parameters["query"]
assert "docker_container_memory_usage_bytes" in query
assert 'host="110"' in query
assert 'container_name="momo-scheduler"' in query
class TestPrometheusQueryBuilder:
def test_docker_memory_alert_query_is_not_empty(self):
query = pev_module._build_prometheus_query(
"DockerContainerMemoryLimitPressure",
{"host": "110", "container_name": "momo-scheduler"},
)
assert "docker_container_memory_usage_bytes" in query
assert "docker_container_memory_limit_bytes" in query
def test_canary_alert_uses_generic_non_empty_query(self):
query = pev_module._build_prometheus_query("AwoooPAutoRepairCanaryT16", {})
assert query
assert "up" in query

View File

@@ -21,6 +21,18 @@ async def test_ssh_diagnose_is_registered_read_only_tool():
assert "df -h" in command
def test_ssh_diagnose_can_include_container_read_only_context():
provider = SSHProvider()
command = provider._build_command(
"ssh_diagnose",
{"container_name": "sentry-self-hosted-clickhouse-1"},
)
assert "docker stats --no-stream sentry-self-hosted-clickhouse-1" in command
assert "docker inspect sentry-self-hosted-clickhouse-1" in command
def test_ssh_provider_uses_ollama_user_for_188():
provider = SSHProvider()
@@ -32,6 +44,8 @@ def test_ssh_provider_uses_ollama_user_for_188():
"raw,expected",
[
("192.168.0.110:9100", "192.168.0.110"),
("110:9100", "192.168.0.110"),
("188", "192.168.0.188"),
("wooo@192.168.0.110", "192.168.0.110"),
("ssh://wooo@192.168.0.110:22", "192.168.0.110"),
("192.168.0.188", "192.168.0.188"),