#!/usr/bin/env python3 """ AWOOOI Docker 容器自動發現 =========================== ADR-037 Wave C.3: 掃描 Docker 主機,自動更新 service-registry.yaml 功能: 1. SSH 連線 Docker 主機 (188, 110) 2. 取得執行中容器清單 3. 比對 service-registry.yaml 4. 輸出差異報告 (新增/移除/未監控) 用法: python ops/monitoring/discover_docker.py python ops/monitoring/discover_docker.py --update # 自動更新 registry python ops/monitoring/discover_docker.py --json # JSON 輸出 版本: v1.0 建立: 2026-03-29 (台北時區) 建立者: Claude Code (Phase 21 ADR-037) """ import argparse import json import subprocess import sys from pathlib import Path import yaml # 配置 SCRIPT_DIR = Path(__file__).parent REGISTRY_FILE = SCRIPT_DIR / "service-registry.yaml" # Docker 主機清單 DOCKER_HOSTS = [ { "host": "192.168.0.188", "name": "pg", "role": "ai-web", "ssh_user": "ogt", }, { "host": "192.168.0.110", "name": "harbor", "role": "devops", "ssh_user": "wooo", }, ] # 忽略的容器 (系統容器) IGNORE_CONTAINERS = { "k3s-agent", "k3s", "rancher-", "portainer", } def run_ssh_command(host: str, user: str, command: str) -> tuple[bool, str]: """執行 SSH 命令""" try: result = subprocess.run( [ "ssh", "-o", "BatchMode=yes", "-o", "ConnectTimeout=5", "-o", "StrictHostKeyChecking=accept-new", f"{user}@{host}", command, ], capture_output=True, text=True, timeout=30 ) return result.returncode == 0, result.stdout except subprocess.TimeoutExpired: return False, "SSH timeout" except Exception as e: return False, str(e) def discover_containers(host_info: dict) -> list[dict]: """發現 Docker 容器""" host = host_info["host"] user = host_info["ssh_user"] # 取得容器清單 (JSON 格式) cmd = 'docker ps --format "{{json .}}"' success, output = run_ssh_command(host, user, cmd) if not success: print(f" ❌ 無法連線 {host}: {output}") return [] containers = [] for line in output.strip().split("\n"): if not line: continue try: data = json.loads(line) name = data.get("Names", "") # 過濾系統容器 if any(name.startswith(ignore) for ignore in IGNORE_CONTAINERS): continue # 解析 Port ports = data.get("Ports", "") exposed_port = None if ports: # 格式: "0.0.0.0:8080->8080/tcp" for port_part in ports.split(","): if "->" in port_part: host_port = port_part.split("->")[0].split(":")[-1] try: exposed_port = int(host_port) except ValueError: pass break containers.append({ "name": name, "image": data.get("Image", ""), "status": data.get("Status", ""), "port": exposed_port, "host": host, "host_name": host_info["name"], "role": host_info["role"], }) except json.JSONDecodeError: continue return containers def load_registry() -> dict: """載入服務註冊表""" if not REGISTRY_FILE.exists(): return {"services": []} with open(REGISTRY_FILE) as f: return yaml.safe_load(f) or {"services": []} def compare_with_registry(containers: list[dict], registry: dict) -> dict: """比對容器與 registry""" # 建立 registry 服務索引 (按 host + name) registry_services = {} for svc in registry.get("services", []): if svc.get("type") == "docker": key = f"{svc.get('host', '')}:{svc.get('name', '')}" registry_services[key] = svc # 分類 result = { "monitored": [], # 已監控 "new": [], # 新發現 (未在 registry) "missing": [], # registry 有但容器不存在 "no_prometheus": [], # 在 registry 但未啟用 Prometheus } discovered_keys = set() for container in containers: key = f"{container['host']}:{container['name']}" discovered_keys.add(key) if key in registry_services: svc = registry_services[key] if svc.get("monitoring", {}).get("prometheus"): result["monitored"].append({ **container, "registry_name": svc["name"] }) else: result["no_prometheus"].append({ **container, "registry_name": svc["name"] }) else: result["new"].append(container) # 找出 registry 有但容器不存在的 for key, svc in registry_services.items(): if key not in discovered_keys: result["missing"].append({ "name": svc["name"], "host": svc.get("host", ""), "port": svc.get("port"), }) return result def generate_service_entry(container: dict) -> dict: """生成 service-registry.yaml entry""" return { "name": container["name"], "type": "docker", "host": container["host"], "port": container.get("port", 8080), "health_endpoint": "/health", "monitoring": { "prometheus": True, "sentry": False, "otel": False, }, "alerts": ["service_down"], "auto_repair": { "enabled": False, }, "owner": f"{container['role']}-team", "criticality": "P2", } def print_report(comparison: dict): """輸出報告""" print("\n" + "=" * 60) print(" AWOOOI Docker Container Discovery Report") print("=" * 60) print(f"\n ✅ 已監控容器 ({len(comparison['monitored'])})") for c in comparison["monitored"]: print(f" - {c['name']} @ {c['host']}:{c.get('port', 'N/A')}") if comparison["new"]: print(f"\n 🆕 新發現容器 ({len(comparison['new'])})") for c in comparison["new"]: print(f" - {c['name']} @ {c['host']}:{c.get('port', 'N/A')}") print(f" Image: {c['image']}") if comparison["no_prometheus"]: print(f"\n ⚠️ 未啟用 Prometheus ({len(comparison['no_prometheus'])})") for c in comparison["no_prometheus"]: print(f" - {c['name']} @ {c['host']}") if comparison["missing"]: print(f"\n ❌ Registry 有但容器不存在 ({len(comparison['missing'])})") for c in comparison["missing"]: print(f" - {c['name']} @ {c['host']}") print("\n" + "=" * 60) # 統計 total = ( len(comparison["monitored"]) + len(comparison["new"]) + len(comparison["no_prometheus"]) ) monitored = len(comparison["monitored"]) coverage = round(100 * monitored / total, 1) if total > 0 else 0 print(f"\n 總容器數: {total}") print(f" 已監控: {monitored}") print(f" 覆蓋率: {coverage}%") return len(comparison["new"]) == 0 # 沒有新發現 = 通過 def main(): parser = argparse.ArgumentParser(description="AWOOOI Docker Container Discovery") parser.add_argument("--update", action="store_true", help="Auto-update service-registry.yaml") parser.add_argument("--json", action="store_true", help="Output as JSON") parser.add_argument("--host", type=str, help="Only scan specific host") args = parser.parse_args() print("Discovering Docker containers...") # 發現容器 all_containers = [] for host_info in DOCKER_HOSTS: if args.host and host_info["host"] != args.host: continue print(f" Scanning {host_info['name']} ({host_info['host']})...") containers = discover_containers(host_info) print(f" Found {len(containers)} containers") all_containers.extend(containers) # 載入 registry 並比對 registry = load_registry() comparison = compare_with_registry(all_containers, registry) if args.json: print(json.dumps(comparison, indent=2, default=str)) return # 輸出報告 all_good = print_report(comparison) # 自動更新 if args.update and comparison["new"]: print("\nUpdating service-registry.yaml...") for container in comparison["new"]: entry = generate_service_entry(container) registry["services"].append(entry) print(f" Added: {entry['name']}") with open(REGISTRY_FILE, "w") as f: yaml.dump(registry, f, default_flow_style=False, allow_unicode=True) print(f"Updated: {REGISTRY_FILE}") if not all_good: print("\n⚠️ 有新發現的容器未加入監控,請檢查上方報告") sys.exit(1) if __name__ == "__main__": main()