From e7d8da85f6ce6fe9d149855d8bf935a261bfeead Mon Sep 17 00:00:00 2001 From: OG T Date: Sun, 5 Apr 2026 11:22:00 +0800 Subject: [PATCH] =?UTF-8?q?feat(api):=20HostRepairAgent=20=E2=80=94=20SSH?= =?UTF-8?q?=20=E4=B8=BB=E6=A9=9F=E5=B1=A4=E4=BF=AE=E5=BE=A9=20(Task=2011)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - host_repair_agent.py: layer路由、command injection防護、asyncio SSH執行 - 測試: 12 cases 全通過 (routing/sanitize/success/fail/timeout/denied) - SSH key: /etc/repair-ssh/id_ed25519 (K8s secret mount) Co-Authored-By: Claude Sonnet 4.6 --- apps/api/src/services/host_repair_agent.py | 134 +++++++++++++++++++++ apps/api/tests/test_host_repair_agent.py | 130 ++++++++++++++++++++ 2 files changed, 264 insertions(+) create mode 100644 apps/api/src/services/host_repair_agent.py create mode 100644 apps/api/tests/test_host_repair_agent.py diff --git a/apps/api/src/services/host_repair_agent.py b/apps/api/src/services/host_repair_agent.py new file mode 100644 index 00000000..66108909 --- /dev/null +++ b/apps/api/src/services/host_repair_agent.py @@ -0,0 +1,134 @@ +""" +src/services/host_repair_agent.py +Host Repair Agent — 透過 SSH 執行主機層修復 +2026-04-05 Claude Code: Sprint 3 Host Auto-Repair +""" +import asyncio +import re +import logging +from dataclasses import dataclass, field + +logger = logging.getLogger(__name__) + +# SSH 連線設定 — layer → host config +LAYER_SSH_CONFIG: dict[str, dict] = { + "docker-110": { + "host": "192.168.0.110", + "user": "wooo", + "key_path": "/etc/repair-ssh/id_ed25519", + }, + "docker-188": { + "host": "192.168.0.188", + "user": "ollama", + "key_path": "/etc/repair-ssh/id_ed25519", + }, + "systemd-188": { + "host": "192.168.0.188", + "user": "ollama", + "key_path": "/etc/repair-ssh/id_ed25519", + }, +} + +# Component 名稱規則: 小寫英數 + 連字符,1-31 字元 +_COMPONENT_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,30}$") + +SSH_TIMEOUT = 60 # seconds + + +@dataclass +class HostRepairResult: + success: bool + layer: str + component: str + output: str = "" + error: str = "" + + +def get_ssh_config_for_layer(layer: str) -> dict: + """取得指定 layer 的 SSH 連線設定。k8s layer 不走 SSH。""" + if layer == "k8s" or layer.startswith("k8s"): + raise ValueError(f"Layer '{layer}' uses kubectl, not SSH") + config = LAYER_SSH_CONFIG.get(layer) + if config is None: + raise ValueError(f"Unknown layer: '{layer}'") + return config + + +def build_repair_command(component: str) -> str: + """組裝 repair 命令,防止 command injection。""" + if not _COMPONENT_RE.match(component): + raise ValueError(f"Invalid component name: '{component}'") + return f"repair:{component}" + + +class HostRepairAgent: + """透過 SSH 執行主機層修復命令。""" + + async def repair(self, layer: str, component: str) -> HostRepairResult: + """執行修復並回傳結果。""" + try: + config = get_ssh_config_for_layer(layer) + command = build_repair_command(component) + except ValueError as e: + return HostRepairResult( + success=False, + layer=layer, + component=component, + error=str(e), + ) + + try: + output = await self._ssh_execute( + host=config["host"], + user=config["user"], + command=command, + ) + except asyncio.TimeoutError: + return HostRepairResult( + success=False, + layer=layer, + component=component, + error=f"SSH timeout after {SSH_TIMEOUT}s", + ) + except Exception as e: + return HostRepairResult( + success=False, + layer=layer, + component=component, + error=str(e), + ) + + success = output.startswith("REPAIR_OK:") + return HostRepairResult( + success=success, + layer=layer, + component=component, + output=output, + error="" if success else output, + ) + + async def _ssh_execute(self, host: str, user: str, command: str) -> str: + """執行 SSH 命令,回傳 stdout。""" + key_path = LAYER_SSH_CONFIG.get( + next((k for k, v in LAYER_SSH_CONFIG.items() if v["host"] == host and v["user"] == user), None), + {} + ).get("key_path", "/etc/repair-ssh/id_ed25519") + + proc = await asyncio.wait_for( + asyncio.create_subprocess_exec( + "ssh", + "-i", key_path, + "-o", "StrictHostKeyChecking=no", + "-o", "BatchMode=yes", + "-o", f"ConnectTimeout={SSH_TIMEOUT}", + f"{user}@{host}", + command, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ), + timeout=SSH_TIMEOUT, + ) + stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=SSH_TIMEOUT) + output = stdout.decode().strip() + logger.info("SSH repair %s@%s %s → %s", user, host, command, output) + return output diff --git a/apps/api/tests/test_host_repair_agent.py b/apps/api/tests/test_host_repair_agent.py new file mode 100644 index 00000000..3b4feb99 --- /dev/null +++ b/apps/api/tests/test_host_repair_agent.py @@ -0,0 +1,130 @@ +""" +tests/test_host_repair_agent.py +Host Repair Agent 單元測試 +不需要實際 SSH 連線 — 測試路由邏輯和命令組裝 +""" +import asyncio +import pytest +from unittest.mock import AsyncMock, patch + + +# ============================================================================= +# 測試 HostRepairConfig 路由 +# ============================================================================= + +class TestHostRepairConfig: + def test_layer_docker_110_routes_to_110(self): + from src.services.host_repair_agent import get_ssh_config_for_layer + config = get_ssh_config_for_layer("docker-110") + assert config["user"] == "wooo" + assert config["host"] == "192.168.0.110" + + def test_layer_docker_188_routes_to_188(self): + from src.services.host_repair_agent import get_ssh_config_for_layer + config = get_ssh_config_for_layer("docker-188") + assert config["user"] == "ollama" + assert config["host"] == "192.168.0.188" + + def test_layer_systemd_188_routes_to_188(self): + from src.services.host_repair_agent import get_ssh_config_for_layer + config = get_ssh_config_for_layer("systemd-188") + assert config["user"] == "ollama" + assert config["host"] == "192.168.0.188" + + def test_unknown_layer_raises(self): + from src.services.host_repair_agent import get_ssh_config_for_layer + with pytest.raises(ValueError, match="Unknown layer"): + get_ssh_config_for_layer("unknown-layer") + + def test_k8s_layer_raises(self): + """k8s layer 不走 SSH,應 raise""" + from src.services.host_repair_agent import get_ssh_config_for_layer + with pytest.raises(ValueError, match="kubectl"): + get_ssh_config_for_layer("k8s") + + +# ============================================================================= +# 測試 SSH 命令組裝 +# ============================================================================= + +class TestSSHCommandBuilding: + def test_repair_command_format(self): + from src.services.host_repair_agent import build_repair_command + cmd = build_repair_command("sentry") + assert cmd == "repair:sentry" + + def test_repair_command_component_sanitized(self): + """防止 command injection""" + from src.services.host_repair_agent import build_repair_command + with pytest.raises(ValueError, match="Invalid component"): + build_repair_command("sentry; rm -rf /") + + def test_repair_command_valid_components(self): + from src.services.host_repair_agent import build_repair_command + valid = ["sentry", "harbor", "gitea", "openclaw", "gitea-runner", "alertmanager", "redis", "nginx"] + for component in valid: + cmd = build_repair_command(component) + assert cmd == f"repair:{component}" + + +# ============================================================================= +# 測試 HostRepairAgent.repair() 路由 +# ============================================================================= + +class TestHostRepairAgent: + @pytest.mark.asyncio + async def test_repair_success_returns_ok(self): + from src.services.host_repair_agent import HostRepairAgent + + agent = HostRepairAgent() + with patch.object(agent, "_ssh_execute", new_callable=AsyncMock) as mock_ssh: + mock_ssh.return_value = "REPAIR_OK:sentry" + + result = await agent.repair(layer="docker-110", component="sentry") + + assert result.success is True + assert result.component == "sentry" + assert result.layer == "docker-110" + mock_ssh.assert_called_once_with( + host="192.168.0.110", + user="wooo", + command="repair:sentry" + ) + + @pytest.mark.asyncio + async def test_repair_fail_returns_failure(self): + from src.services.host_repair_agent import HostRepairAgent + + agent = HostRepairAgent() + with patch.object(agent, "_ssh_execute", new_callable=AsyncMock) as mock_ssh: + mock_ssh.return_value = "REPAIR_FAIL:harbor:exit_1" + + result = await agent.repair(layer="docker-110", component="harbor") + + assert result.success is False + assert "REPAIR_FAIL" in result.error + + @pytest.mark.asyncio + async def test_repair_ssh_timeout_returns_failure(self): + from src.services.host_repair_agent import HostRepairAgent + + agent = HostRepairAgent() + with patch.object(agent, "_ssh_execute", new_callable=AsyncMock) as mock_ssh: + mock_ssh.side_effect = asyncio.TimeoutError() + + result = await agent.repair(layer="docker-110", component="sentry") + + assert result.success is False + assert "timeout" in result.error.lower() + + @pytest.mark.asyncio + async def test_repair_denied_returns_failure(self): + from src.services.host_repair_agent import HostRepairAgent + + agent = HostRepairAgent() + with patch.object(agent, "_ssh_execute", new_callable=AsyncMock) as mock_ssh: + mock_ssh.return_value = "REPAIR_DENIED:unknown_component:badcomponent" + + result = await agent.repair(layer="docker-110", component="badcomponent") + + assert result.success is False