315 lines
9.1 KiB
Python
Executable File
315 lines
9.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
AWOOOI Docker 容器自動發現
|
|
===========================
|
|
|
|
ADR-037 Wave C.3: 掃描 Docker 主機,自動更新 service-registry.yaml
|
|
|
|
功能:
|
|
1. SSH 連線 Docker 主機 (188, 110)
|
|
2. 取得執行中容器清單
|
|
3. 比對 service-registry.yaml
|
|
4. 輸出差異報告 (新增/移除/未監控)
|
|
|
|
用法:
|
|
python ops/monitoring/discover_docker.py
|
|
python ops/monitoring/discover_docker.py --update # 自動更新 registry
|
|
python ops/monitoring/discover_docker.py --json # JSON 輸出
|
|
|
|
版本: v1.0
|
|
建立: 2026-03-29 (台北時區)
|
|
建立者: Claude Code (Phase 21 ADR-037)
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
|
|
# 配置
|
|
SCRIPT_DIR = Path(__file__).parent
|
|
REGISTRY_FILE = SCRIPT_DIR / "service-registry.yaml"
|
|
|
|
# Docker 主機清單
|
|
DOCKER_HOSTS = [
|
|
{
|
|
"host": "192.168.0.188",
|
|
"name": "pg",
|
|
"role": "ai-web",
|
|
"ssh_user": "ogt",
|
|
},
|
|
{
|
|
"host": "192.168.0.110",
|
|
"name": "harbor",
|
|
"role": "devops",
|
|
"ssh_user": "wooo",
|
|
},
|
|
]
|
|
|
|
# 忽略的容器 (系統容器)
|
|
IGNORE_CONTAINERS = {
|
|
"k3s-agent",
|
|
"k3s",
|
|
"rancher-",
|
|
"portainer",
|
|
}
|
|
|
|
|
|
def run_ssh_command(host: str, user: str, command: str) -> tuple[bool, str]:
|
|
"""執行 SSH 命令"""
|
|
try:
|
|
result = subprocess.run(
|
|
[
|
|
"ssh",
|
|
"-o",
|
|
"BatchMode=yes",
|
|
"-o",
|
|
"ConnectTimeout=5",
|
|
"-o",
|
|
"StrictHostKeyChecking=accept-new",
|
|
f"{user}@{host}",
|
|
command,
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=30
|
|
)
|
|
return result.returncode == 0, result.stdout
|
|
except subprocess.TimeoutExpired:
|
|
return False, "SSH timeout"
|
|
except Exception as e:
|
|
return False, str(e)
|
|
|
|
|
|
def discover_containers(host_info: dict) -> list[dict]:
|
|
"""發現 Docker 容器"""
|
|
host = host_info["host"]
|
|
user = host_info["ssh_user"]
|
|
|
|
# 取得容器清單 (JSON 格式)
|
|
cmd = 'docker ps --format "{{json .}}"'
|
|
success, output = run_ssh_command(host, user, cmd)
|
|
|
|
if not success:
|
|
print(f" ❌ 無法連線 {host}: {output}")
|
|
return []
|
|
|
|
containers = []
|
|
for line in output.strip().split("\n"):
|
|
if not line:
|
|
continue
|
|
try:
|
|
data = json.loads(line)
|
|
name = data.get("Names", "")
|
|
|
|
# 過濾系統容器
|
|
if any(name.startswith(ignore) for ignore in IGNORE_CONTAINERS):
|
|
continue
|
|
|
|
# 解析 Port
|
|
ports = data.get("Ports", "")
|
|
exposed_port = None
|
|
if ports:
|
|
# 格式: "0.0.0.0:8080->8080/tcp"
|
|
for port_part in ports.split(","):
|
|
if "->" in port_part:
|
|
host_port = port_part.split("->")[0].split(":")[-1]
|
|
try:
|
|
exposed_port = int(host_port)
|
|
except ValueError:
|
|
pass
|
|
break
|
|
|
|
containers.append({
|
|
"name": name,
|
|
"image": data.get("Image", ""),
|
|
"status": data.get("Status", ""),
|
|
"port": exposed_port,
|
|
"host": host,
|
|
"host_name": host_info["name"],
|
|
"role": host_info["role"],
|
|
})
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
return containers
|
|
|
|
|
|
def load_registry() -> dict:
|
|
"""載入服務註冊表"""
|
|
if not REGISTRY_FILE.exists():
|
|
return {"services": []}
|
|
|
|
with open(REGISTRY_FILE) as f:
|
|
return yaml.safe_load(f) or {"services": []}
|
|
|
|
|
|
def compare_with_registry(containers: list[dict], registry: dict) -> dict:
|
|
"""比對容器與 registry"""
|
|
# 建立 registry 服務索引 (按 host + name)
|
|
registry_services = {}
|
|
for svc in registry.get("services", []):
|
|
if svc.get("type") == "docker":
|
|
key = f"{svc.get('host', '')}:{svc.get('name', '')}"
|
|
registry_services[key] = svc
|
|
|
|
# 分類
|
|
result = {
|
|
"monitored": [], # 已監控
|
|
"new": [], # 新發現 (未在 registry)
|
|
"missing": [], # registry 有但容器不存在
|
|
"no_prometheus": [], # 在 registry 但未啟用 Prometheus
|
|
}
|
|
|
|
discovered_keys = set()
|
|
|
|
for container in containers:
|
|
key = f"{container['host']}:{container['name']}"
|
|
discovered_keys.add(key)
|
|
|
|
if key in registry_services:
|
|
svc = registry_services[key]
|
|
if svc.get("monitoring", {}).get("prometheus"):
|
|
result["monitored"].append({
|
|
**container,
|
|
"registry_name": svc["name"]
|
|
})
|
|
else:
|
|
result["no_prometheus"].append({
|
|
**container,
|
|
"registry_name": svc["name"]
|
|
})
|
|
else:
|
|
result["new"].append(container)
|
|
|
|
# 找出 registry 有但容器不存在的
|
|
for key, svc in registry_services.items():
|
|
if key not in discovered_keys:
|
|
result["missing"].append({
|
|
"name": svc["name"],
|
|
"host": svc.get("host", ""),
|
|
"port": svc.get("port"),
|
|
})
|
|
|
|
return result
|
|
|
|
|
|
def generate_service_entry(container: dict) -> dict:
|
|
"""生成 service-registry.yaml entry"""
|
|
return {
|
|
"name": container["name"],
|
|
"type": "docker",
|
|
"host": container["host"],
|
|
"port": container.get("port", 8080),
|
|
"health_endpoint": "/health",
|
|
"monitoring": {
|
|
"prometheus": True,
|
|
"sentry": False,
|
|
"otel": False,
|
|
},
|
|
"alerts": ["service_down"],
|
|
"auto_repair": {
|
|
"enabled": False,
|
|
},
|
|
"owner": f"{container['role']}-team",
|
|
"criticality": "P2",
|
|
}
|
|
|
|
|
|
def print_report(comparison: dict):
|
|
"""輸出報告"""
|
|
print("\n" + "=" * 60)
|
|
print(" AWOOOI Docker Container Discovery Report")
|
|
print("=" * 60)
|
|
|
|
print(f"\n ✅ 已監控容器 ({len(comparison['monitored'])})")
|
|
for c in comparison["monitored"]:
|
|
print(f" - {c['name']} @ {c['host']}:{c.get('port', 'N/A')}")
|
|
|
|
if comparison["new"]:
|
|
print(f"\n 🆕 新發現容器 ({len(comparison['new'])})")
|
|
for c in comparison["new"]:
|
|
print(f" - {c['name']} @ {c['host']}:{c.get('port', 'N/A')}")
|
|
print(f" Image: {c['image']}")
|
|
|
|
if comparison["no_prometheus"]:
|
|
print(f"\n ⚠️ 未啟用 Prometheus ({len(comparison['no_prometheus'])})")
|
|
for c in comparison["no_prometheus"]:
|
|
print(f" - {c['name']} @ {c['host']}")
|
|
|
|
if comparison["missing"]:
|
|
print(f"\n ❌ Registry 有但容器不存在 ({len(comparison['missing'])})")
|
|
for c in comparison["missing"]:
|
|
print(f" - {c['name']} @ {c['host']}")
|
|
|
|
print("\n" + "=" * 60)
|
|
|
|
# 統計
|
|
total = (
|
|
len(comparison["monitored"]) +
|
|
len(comparison["new"]) +
|
|
len(comparison["no_prometheus"])
|
|
)
|
|
monitored = len(comparison["monitored"])
|
|
coverage = round(100 * monitored / total, 1) if total > 0 else 0
|
|
|
|
print(f"\n 總容器數: {total}")
|
|
print(f" 已監控: {monitored}")
|
|
print(f" 覆蓋率: {coverage}%")
|
|
|
|
return len(comparison["new"]) == 0 # 沒有新發現 = 通過
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="AWOOOI Docker Container Discovery")
|
|
parser.add_argument("--update", action="store_true", help="Auto-update service-registry.yaml")
|
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
parser.add_argument("--host", type=str, help="Only scan specific host")
|
|
args = parser.parse_args()
|
|
|
|
print("Discovering Docker containers...")
|
|
|
|
# 發現容器
|
|
all_containers = []
|
|
for host_info in DOCKER_HOSTS:
|
|
if args.host and host_info["host"] != args.host:
|
|
continue
|
|
print(f" Scanning {host_info['name']} ({host_info['host']})...")
|
|
containers = discover_containers(host_info)
|
|
print(f" Found {len(containers)} containers")
|
|
all_containers.extend(containers)
|
|
|
|
# 載入 registry 並比對
|
|
registry = load_registry()
|
|
comparison = compare_with_registry(all_containers, registry)
|
|
|
|
if args.json:
|
|
print(json.dumps(comparison, indent=2, default=str))
|
|
return
|
|
|
|
# 輸出報告
|
|
all_good = print_report(comparison)
|
|
|
|
# 自動更新
|
|
if args.update and comparison["new"]:
|
|
print("\nUpdating service-registry.yaml...")
|
|
for container in comparison["new"]:
|
|
entry = generate_service_entry(container)
|
|
registry["services"].append(entry)
|
|
print(f" Added: {entry['name']}")
|
|
|
|
with open(REGISTRY_FILE, "w") as f:
|
|
yaml.dump(registry, f, default_flow_style=False, allow_unicode=True)
|
|
print(f"Updated: {REGISTRY_FILE}")
|
|
|
|
if not all_good:
|
|
print("\n⚠️ 有新發現的容器未加入監控,請檢查上方報告")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|