feat(k8s): Sprint 5.1 Guardrail — service-registry ConfigMap 掛載到 API 容器
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 16m36s
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 16m36s
問題: Docker 容器無 ops/ 目錄,service_registry.py 找不到 YAML → 全部降級 AUTO 解法: ConfigMap 掛載 service-registry.yaml 到 /app/ops/config/ 變更: - k8s/awoooi-prod/15-service-registry-configmap.yaml (新增 ConfigMap) - k8s/awoooi-prod/06-deployment-api.yaml (volumeMount + volume) - .gitea/workflows/cd.yaml (Step 1c apply ConfigMap) 效果: _find_registry_path() 可找到 YAML → BLOCK/CRITICAL_HITL/STANDARD_HITL 生效 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -299,6 +299,13 @@ jobs:
|
||||
"export KUBECONFIG=/etc/rancher/k3s/k3s.yaml && sudo kubectl apply -f -"
|
||||
echo "✅ ConfigMap 已更新"
|
||||
|
||||
# Step 1c: Apply Service Registry ConfigMap (Sprint 5.1 Guardrail)
|
||||
# 2026-04-08 Claude Sonnet 4.6: 掛載 service-registry.yaml 到容器 /app/ops/config/
|
||||
cat k8s/awoooi-prod/15-service-registry-configmap.yaml | \
|
||||
ssh -i ~/.ssh/deploy_key wooo@192.168.0.121 \
|
||||
"export KUBECONFIG=/etc/rancher/k3s/k3s.yaml && sudo kubectl apply -f -"
|
||||
echo "✅ Service Registry ConfigMap 已更新"
|
||||
|
||||
# Step 1b: Apply Deployment yamls (套用 volumes/resources/probe 等非 image 設定)
|
||||
# 2026-04-05 Claude Code: 確保 deployment 結構變更(如 SSH key mount)持久化到 K8s
|
||||
# C3 修正 2026-04-05: 先 sed 替換 IMAGE_TAG_PLACEHOLDER 為正確 sha,
|
||||
|
||||
@@ -60,6 +60,12 @@ spec:
|
||||
- name: repair-known-hosts
|
||||
mountPath: /etc/repair-known-hosts
|
||||
readOnly: true
|
||||
# 2026-04-08 Claude Sonnet 4.6: Sprint 5.1 Guardrail — service registry YAML
|
||||
# 掛載到 /app/ops/config/ 讓 _find_registry_path() 可找到
|
||||
- name: service-registry
|
||||
mountPath: /app/ops/config/service-registry.yaml
|
||||
subPath: service-registry.yaml
|
||||
readOnly: true
|
||||
resources:
|
||||
requests:
|
||||
cpu: "200m"
|
||||
@@ -114,6 +120,10 @@ spec:
|
||||
secret:
|
||||
secretName: awoooi-repair-known-hosts
|
||||
optional: true
|
||||
# 2026-04-08 Claude Sonnet 4.6: Sprint 5.1 Guardrail — service registry
|
||||
- name: service-registry
|
||||
configMap:
|
||||
name: service-registry
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
|
||||
217
k8s/awoooi-prod/15-service-registry-configmap.yaml
Normal file
217
k8s/awoooi-prod/15-service-registry-configmap.yaml
Normal file
@@ -0,0 +1,217 @@
|
||||
# k8s/awoooi-prod/15-service-registry-configmap.yaml
|
||||
# Service Registry ConfigMap — 掛載 ops/config/service-registry.yaml 到 K8s 容器
|
||||
# 建立: 2026-04-08 Claude Sonnet 4.6
|
||||
# 目的: 解決 Docker 容器無法找到 service-registry.yaml 導致 Guardrail 降級 AUTO 問題
|
||||
# 掛載路徑: /app/ops/config/service-registry.yaml
|
||||
# 參考: ADR-062, ADR-063
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: service-registry
|
||||
namespace: awoooi-prod
|
||||
labels:
|
||||
app: awoooi
|
||||
component: service-registry
|
||||
data:
|
||||
service-registry.yaml: |
|
||||
# ops/config/service-registry.yaml
|
||||
# Service Registry — 服務 Stateful 分級清單
|
||||
# 版本: 1.0.0
|
||||
# 建立: Claude Sonnet 4.6 / 2026-04-08 Asia/Taipei
|
||||
# 維護: 修改需 PR + 統帥審核,禁止直接 push
|
||||
# 說明:
|
||||
# BLOCK = 系統禁止自動修復,僅告警(資料風險最高)
|
||||
# CRITICAL_HITL = 允許 Playbook,但需 MultiSig 2票
|
||||
# STANDARD_HITL = 允許 Playbook,需 1票審核
|
||||
# AUTO = 允許自動執行(無狀態服務)
|
||||
# 參考: ADR-062, ADR-063
|
||||
|
||||
services:
|
||||
# ─── BLOCK:系統禁止(連 Playbook 都不提供)────────────────────────────
|
||||
- name: postgres
|
||||
display_name: "PostgreSQL 主庫 (awoooi_prod)"
|
||||
host: "192.168.0.188"
|
||||
stateful_level: BLOCK
|
||||
reason: "主要業務資料庫,重啟可能導致 WAL 截斷、事務回滾"
|
||||
alert_only: true
|
||||
containers: ["postgres"]
|
||||
|
||||
- name: momo-db
|
||||
display_name: "PostgreSQL (momo_db)"
|
||||
host: "192.168.0.188"
|
||||
stateful_level: BLOCK
|
||||
reason: "momo 產品資料庫,禁止自動操作"
|
||||
alert_only: true
|
||||
containers: ["momo-db"]
|
||||
|
||||
- name: langfuse-db
|
||||
display_name: "PostgreSQL (Langfuse)"
|
||||
host: "192.168.0.110"
|
||||
stateful_level: BLOCK
|
||||
reason: "LLM trace 資料庫,重啟導致追蹤資料遺失"
|
||||
alert_only: true
|
||||
containers: ["langfuse-db"]
|
||||
|
||||
- name: harbor-db
|
||||
display_name: "PostgreSQL (Harbor Registry)"
|
||||
host: "192.168.0.110"
|
||||
stateful_level: BLOCK
|
||||
reason: "Harbor Registry 資料庫,重啟可能損壞 image layer 索引"
|
||||
alert_only: true
|
||||
containers: ["harbor-db"]
|
||||
|
||||
- name: sentry-postgres
|
||||
display_name: "PostgreSQL (Sentry)"
|
||||
host: "192.168.0.110"
|
||||
stateful_level: BLOCK
|
||||
reason: "Sentry 錯誤追蹤資料庫"
|
||||
alert_only: true
|
||||
containers: ["sentry-postgres"]
|
||||
|
||||
- name: signoz-clickhouse
|
||||
display_name: "ClickHouse (SignOz)"
|
||||
host: "192.168.0.188"
|
||||
stateful_level: BLOCK
|
||||
reason: "列欄式 OLAP 資料庫,寫入中重啟可能損壞列欄檔案"
|
||||
alert_only: true
|
||||
containers: ["signoz-clickhouse"]
|
||||
|
||||
# ─── CRITICAL_HITL:高風險,需 MultiSig 2票 ──────────────────────────
|
||||
- name: redis
|
||||
display_name: "Redis (AWOOOI)"
|
||||
host: "192.168.0.188"
|
||||
stateful_level: CRITICAL_HITL
|
||||
reason: "AWOOOI 依賴 Redis 做冪等鎖與快取,重啟丟失鎖狀態"
|
||||
requires_pre_backup: false
|
||||
containers: ["redis"]
|
||||
|
||||
- name: harbor-redis
|
||||
display_name: "Redis (Harbor)"
|
||||
host: "192.168.0.110"
|
||||
stateful_level: CRITICAL_HITL
|
||||
reason: "Harbor session 快取"
|
||||
containers: ["harbor-redis"]
|
||||
|
||||
- name: sentry-redis
|
||||
display_name: "Redis (Sentry)"
|
||||
host: "192.168.0.110"
|
||||
stateful_level: CRITICAL_HITL
|
||||
reason: "Sentry 任務佇列"
|
||||
containers: ["sentry-redis"]
|
||||
|
||||
- name: gitea
|
||||
display_name: "Gitea (程式碼倉庫)"
|
||||
host: "192.168.0.110"
|
||||
stateful_level: CRITICAL_HITL
|
||||
reason: "restart 會殺掉活躍 SSH session,Git push 中斷可能損壞 working copy"
|
||||
requires_pre_backup: false
|
||||
containers: ["gitea"]
|
||||
|
||||
- name: harbor
|
||||
display_name: "Harbor (Container Registry)"
|
||||
host: "192.168.0.110"
|
||||
stateful_level: CRITICAL_HITL
|
||||
reason: "重啟中斷 pull/push;GC 進行中重啟可能損壞 layer"
|
||||
requires_pre_backup: false
|
||||
containers: ["harbor-core", "harbor-jobservice", "harbor-portal"]
|
||||
|
||||
- name: minio
|
||||
display_name: "MinIO (物件存儲)"
|
||||
host: "192.168.0.188"
|
||||
stateful_level: CRITICAL_HITL
|
||||
reason: "寫入中重啟可能導致 multipart upload 中斷"
|
||||
requires_pre_backup: false
|
||||
containers: ["minio"]
|
||||
|
||||
# ─── STANDARD_HITL:中風險,需 1票審核 ──────────────────────────────
|
||||
- name: prometheus
|
||||
display_name: "Prometheus"
|
||||
host: "192.168.0.110"
|
||||
stateful_level: STANDARD_HITL
|
||||
reason: "有 TSDB WAL,exited 狀態用 docker start(非 restart)"
|
||||
restart_command: "docker start"
|
||||
containers: ["prometheus"]
|
||||
|
||||
- name: grafana
|
||||
display_name: "Grafana"
|
||||
host: "192.168.0.110"
|
||||
stateful_level: STANDARD_HITL
|
||||
reason: "有 SQLite 設定儲存,exited 用 docker start"
|
||||
restart_command: "docker start"
|
||||
containers: ["grafana"]
|
||||
|
||||
- name: alertmanager
|
||||
display_name: "Alertmanager"
|
||||
host: "192.168.0.110"
|
||||
stateful_level: STANDARD_HITL
|
||||
reason: "有 silence 狀態,exited 用 docker start"
|
||||
restart_command: "docker start"
|
||||
containers: ["alertmanager"]
|
||||
|
||||
# ─── AUTO:無狀態,允許自動修復 ──────────────────────────────────────
|
||||
- name: nginx
|
||||
display_name: "Nginx (反向代理)"
|
||||
host: "192.168.0.110"
|
||||
stateful_level: AUTO
|
||||
containers: ["nginx", "nginx-188"]
|
||||
|
||||
- name: awoooi-api
|
||||
display_name: "AWOOOI API (K3s)"
|
||||
host: "k3s"
|
||||
stateful_level: AUTO
|
||||
containers: []
|
||||
|
||||
- name: awoooi-web
|
||||
display_name: "AWOOOI Web (K3s)"
|
||||
host: "k3s"
|
||||
stateful_level: AUTO
|
||||
containers: []
|
||||
|
||||
- name: blackbox-exporter
|
||||
display_name: "Blackbox Exporter"
|
||||
host: "192.168.0.110"
|
||||
stateful_level: AUTO
|
||||
containers: ["blackbox-exporter"]
|
||||
|
||||
- name: langfuse
|
||||
display_name: "Langfuse (LLMOps)"
|
||||
host: "192.168.0.110"
|
||||
stateful_level: AUTO
|
||||
containers: ["langfuse-web", "langfuse-worker"]
|
||||
|
||||
- name: ollama
|
||||
display_name: "Ollama (Local LLM)"
|
||||
host: "192.168.0.188"
|
||||
stateful_level: AUTO
|
||||
containers: ["ollama"]
|
||||
|
||||
- name: momo-app
|
||||
display_name: "momo Web App"
|
||||
host: "192.168.0.188"
|
||||
stateful_level: AUTO
|
||||
containers: ["momo-app"]
|
||||
|
||||
- name: tsenyang-website
|
||||
display_name: "Tsenyang Website"
|
||||
host: "192.168.0.188"
|
||||
stateful_level: AUTO
|
||||
containers: ["tsenyang-website"]
|
||||
|
||||
- name: stock-platform
|
||||
display_name: "Stock Platform"
|
||||
host: "192.168.0.110"
|
||||
stateful_level: AUTO
|
||||
containers: ["stock-platform"]
|
||||
|
||||
# ─── 備份策略參考 ────────────────────────────────────────────────────────
|
||||
backup_policies:
|
||||
velero_max_age_hours: 4
|
||||
emergency_backup_timeout: 600
|
||||
block_backup_on_high_io: true
|
||||
io_threshold_percent: 80
|
||||
|
||||
# ─── MultiSig 設定 ───────────────────────────────────────────────────────
|
||||
multisig:
|
||||
critical_required_votes: 2
|
||||
standard_required_votes: 1
|
||||
vote_expiry_minutes: 30
|
||||
Reference in New Issue
Block a user