From ab3e266a238b17f28a4d5af6e73a30c68ea078ba Mon Sep 17 00:00:00 2001 From: OG T Date: Fri, 10 Apr 2026 10:44:36 +0800 Subject: [PATCH] =?UTF-8?q?fix(monitoring):=20Phase=20O-6.2=20service-regi?= =?UTF-8?q?stry=20=E8=A3=9C=E9=BD=8A=209=20=E5=80=8B=E7=BC=BA=E5=A4=B1=20K?= =?UTF-8?q?8s=20=E9=83=A8=E7=BD=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增: - argocd 5個元件 (applicationset/dex/notifications/redis/repo-server) - awoooi-dev/awoooi-api - kube-state-metrics - observability/event-exporter - velero/velero 結果: prometheus 覆蓋率 94%→96%, errors 9→0 Co-Authored-By: Claude Sonnet 4.6 --- ops/monitoring/service-registry.yaml | 114 +++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/ops/monitoring/service-registry.yaml b/ops/monitoring/service-registry.yaml index aac5ea77..e566ab8c 100644 --- a/ops/monitoring/service-registry.yaml +++ b/ops/monitoring/service-registry.yaml @@ -126,6 +126,120 @@ services: owner: devops-team criticality: P0 + # --- ArgoCD 完整元件 (Phase O-6 2026-04-10) --- + - name: argocd-applicationset-controller + type: k8s-deployment + namespace: argocd + monitoring: + prometheus: true + sentry: false + otel: false + alerts: + - service_down + owner: devops-team + criticality: P2 + + - name: argocd-dex-server + type: k8s-deployment + namespace: argocd + monitoring: + prometheus: true + sentry: false + otel: false + alerts: + - service_down + owner: devops-team + criticality: P2 + + - name: argocd-notifications-controller + type: k8s-deployment + namespace: argocd + monitoring: + prometheus: true + sentry: false + otel: false + alerts: + - service_down + owner: devops-team + criticality: P2 + + - name: argocd-redis + type: k8s-deployment + namespace: argocd + monitoring: + prometheus: true + sentry: false + otel: false + alerts: + - service_down + owner: devops-team + criticality: P2 + + - name: argocd-repo-server + type: k8s-deployment + namespace: argocd + monitoring: + prometheus: true + sentry: false + otel: false + alerts: + - service_down + owner: devops-team + criticality: P2 + + # --- AWOOOI Dev 環境 --- + - name: awoooi-api + type: k8s-deployment + namespace: awoooi-dev + monitoring: + prometheus: true + sentry: false + otel: false + alerts: + - pod_crash + owner: backend-team + criticality: P3 + + # --- kube-state-metrics --- + - name: kube-state-metrics + type: k8s-deployment + namespace: kube-state-metrics + monitoring: + prometheus: true + sentry: false + otel: false + alerts: + - service_down + owner: devops-team + criticality: P1 + + # --- OTEL Event Exporter --- + - name: event-exporter + type: k8s-deployment + namespace: observability + monitoring: + prometheus: true + sentry: false + otel: false + alerts: + - service_down + owner: devops-team + criticality: P1 + + # --- Velero 備份 --- + - name: velero + type: k8s-deployment + namespace: velero + monitoring: + prometheus: true + sentry: false + otel: false + alerts: + - service_down + - backup_failed + owner: devops-team + criticality: P1 + # ============================================================================= # Docker 容器 (192.168.0.188 - AI/Web 中心) # =============================================================================