diff --git a/k8s/monitoring/docker-compose-110.yml b/k8s/monitoring/docker-compose-110.yml new file mode 100644 index 00000000..3116b227 --- /dev/null +++ b/k8s/monitoring/docker-compose-110.yml @@ -0,0 +1,138 @@ +version: '3.8' + +services: + cadvisor: + image: gcr.io/cadvisor/cadvisor:latest + container_name: cadvisor + restart: unless-stopped + command: + - --logtostderr + - --disable_metrics=disk,diskIO,tcp,udp,percpu,sched,process + - --housekeeping_interval=30s + - --max_housekeeping_interval=60s + - --docker_only=true + ports: + - "9180:8080" + volumes: + - /:/rootfs:ro + - /var/run:/var/run:ro + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + - /dev/disk/:/dev/disk:ro + - /etc/localtime:/etc/localtime:ro + environment: + - TZ=Asia/Taipei + privileged: true + devices: + - /dev/kmsg + networks: + - monitoring + + prometheus: + image: prom/prometheus:latest + container_name: prometheus + restart: unless-stopped + ports: + - "9090:9090" + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro + - ./alerts.yml:/etc/prometheus/alerts.yml:ro + - prometheus_data:/prometheus + - /etc/localtime:/etc/localtime:ro + environment: + - TZ=Asia/Taipei + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--storage.tsdb.retention.time=30d' + - '--web.console.libraries=/usr/share/prometheus/console_libraries' + - '--web.console.templates=/usr/share/prometheus/consoles' + - '--web.enable-lifecycle' + extra_hosts: + - "host.docker.internal:host-gateway" + networks: + - monitoring + + grafana: + image: grafana/grafana:latest + container_name: grafana + restart: unless-stopped + ports: + - "3002:3000" + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=WoooTech2026 + - GF_SERVER_ROOT_URL=http://192.168.0.110:3002 + - TZ=Asia/Taipei + volumes: + - grafana_data:/var/lib/grafana + - /etc/localtime:/etc/localtime:ro + networks: + - monitoring + depends_on: + - prometheus + + blackbox-exporter: + image: prom/blackbox-exporter:latest + container_name: blackbox-exporter + restart: unless-stopped + ports: + - "9115:9115" + volumes: + - ./blackbox.yml:/etc/blackbox_exporter/config.yml:ro + - /etc/localtime:/etc/localtime:ro + environment: + - TZ=Asia/Taipei + command: + - '--config.file=/etc/blackbox_exporter/config.yml' + networks: + - monitoring + + alertmanager: + image: prom/alertmanager:latest + container_name: alertmanager + restart: unless-stopped + ports: + - "9093:9093" + volumes: + - ./alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro + - alertmanager_data:/alertmanager + - /etc/localtime:/etc/localtime:ro + environment: + - TZ=Asia/Taipei + command: + - '--config.file=/etc/alertmanager/alertmanager.yml' + - '--storage.path=/alertmanager' + networks: + - monitoring + + # === Phase 5: GitHub Exporter (OPS.176) === + github-exporter: + image: promhippie/github-exporter:latest + container_name: github-exporter + restart: unless-stopped + ports: + - '9504:9504' + environment: + - GITHUB_EXPORTER_TOKEN=${GITHUB_TOKEN} + - GITHUB_EXPORTER_REPOS=owenhytsai/wooo-aiops,owenhytsai/clawbot-v5 + - GITHUB_EXPORTER_LOG_LEVEL=info + networks: + - monitoring + labels: + - 'com.wooo.service=github-exporter' + - 'com.wooo.phase=phase-5' + logging: + driver: json-file + options: + max-size: '10m' + max-file: '3' + +networks: + monitoring: + driver: bridge + +volumes: + prometheus_data: + grafana_data: + alertmanager_data: diff --git a/k8s/monitoring/prometheus.yml b/k8s/monitoring/prometheus.yml index cffef73a..5f716740 100644 --- a/k8s/monitoring/prometheus.yml +++ b/k8s/monitoring/prometheus.yml @@ -22,6 +22,8 @@ scrape_configs: # === Node Exporters (5 台主機) === - job_name: "node-exporter-110" + scrape_interval: 30s + scrape_timeout: 25s static_configs: - targets: ["192.168.0.110:9100"] labels: