diff --git a/services/auto_heal_service.py b/services/auto_heal_service.py index afbe41b..9137748 100644 --- a/services/auto_heal_service.py +++ b/services/auto_heal_service.py @@ -27,7 +27,6 @@ from sqlalchemy import text from services.logger_manager import SystemLogger from services.ai_automation_metrics import record_autoheal_action from database.manager import get_session -from utils.ssh_helper import run_ssh_command logger = SystemLogger("AutoHealService").get_logger() @@ -99,6 +98,17 @@ def _load_escalation(trigger_type: str) -> Optional[int]: return row[0] if row else None +# ---- SSH helper ---- +def _ensure_key_permissions(key_path: str) -> None: + if not os.path.exists(key_path): + logger.warning("SSH key not found: %s", key_path) + return + try: + os.chmod(key_path, 0o600) + except Exception as e: + logger.warning("Failed to secure SSH key: %s", e) + + def _ssh_exec( jump_host: str, jump_user: str, @@ -111,29 +121,45 @@ def _ssh_exec( Execute command on target_host via SSH jump host. command must be a list (argv) to avoid shell injection. """ + import subprocess + safe_key = key_path or SSH_KEY_PATH - result = run_ssh_command( - host=target_host, - user=target_user, - command=command, - port=SSH_PORT, - key_path=safe_key, - connect_timeout=SSH_CONNECT_TIMEOUT, - command_timeout=SSH_COMMAND_TIMEOUT, - jump_host=jump_host, - jump_user=jump_user, - batch_mode=True, - server_alive_interval=15, - server_alive_count_max=3, - logger=logger, - ) - return { - "success": result.success, - "exit_code": result.returncode, - "stdout": result.stdout, - "stderr": "SSH timeout" if result.stderr.startswith("SSH timeout after ") else result.stderr, - "command": command, - } + _ensure_key_permissions(safe_key) + + full_cmd = [ + "ssh", + "-p", str(SSH_PORT), + "-i", safe_key, + "-o", "StrictHostKeyChecking=no", + "-o", "BatchMode=yes", + "-o", f"ConnectTimeout={SSH_CONNECT_TIMEOUT}", + "-o", "ServerAliveInterval=15", + "-o", "ServerAliveCountMax=3", + "-J", f"{jump_user}@{jump_host}", + f"{target_user}@{target_host}", + "--", + *command, + ] + try: + result = subprocess.run( + full_cmd, + shell=False, + capture_output=True, + text=True, + timeout=SSH_COMMAND_TIMEOUT, + ) + return { + "success": result.returncode == 0, + "exit_code": result.returncode, + "stdout": result.stdout.strip(), + "stderr": result.stderr.strip(), + "command": command, + } + except subprocess.TimeoutExpired: + return {"success": False, "exit_code": -1, "stdout": "", "stderr": "SSH timeout", "command": command} + except Exception as e: + logger.warning("SSH exec error: %s", e) + return {"success": False, "exit_code": -1, "stdout": "", "stderr": str(e), "command": command} # ---- PlayBook ----