fix(ci): 修正測試與 Sprint 5.2 部署腳本
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 1m38s

tests/test_auto_repair_service.py:
  - 更新 3個測試符合 2026-04-07 統帥指令移除門檻
  - APPROVED Playbook 直接通過 (低相似度/低品質/高風險均通過)

tests/test_phase22_nemotron_collab.py:
  - 更新 log key: nemotron_collaboration_failed → exhausted

ops/monitoring/docker-compose.exporters.yaml:
  - 修正 postgres DSN: awoooi:awoooi_prod_2026@localhost:5432/awoooi_prod

Sprint 5.2 新增腳本:
  - scripts/sprint51_e2e_validation.py: L7 E2E 驗收腳本 (T1-T5)
  - scripts/ops/deploy-docker-health-monitor.sh: Plan A 一鍵部署腳本

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-04-08 18:17:48 +08:00
parent 4f2f9e176f
commit 170ce2f11d
5 changed files with 479 additions and 22 deletions

View File

@@ -163,22 +163,29 @@ class TestAutoRepairService:
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_evaluate_low_similarity(self, service, mock_playbook_service): async def test_evaluate_low_similarity(self, service, mock_playbook_service):
"""Test when similarity is too low""" """Test that low similarity no longer blocks auto-repair.
2026-04-07: 統帥指令移除相似度門檻 — 只要 APPROVED Playbook 匹配即執行。
2026-04-08 Claude Sonnet 4.6: 更新測試預期以符合當前設計。
"""
playbook = create_high_quality_playbook() playbook = create_high_quality_playbook()
mock_playbook_service.add_playbook(playbook) mock_playbook_service.add_playbook(playbook)
mock_playbook_service.set_recommendations([ mock_playbook_service.set_recommendations([
MockPlaybookRecommendation(playbook, similarity_score=0.5) # Below 0.7 MockPlaybookRecommendation(playbook, similarity_score=0.5) # Below old 0.7 threshold
]) ])
incident = create_test_incident(severity=Severity.P2) incident = create_test_incident(severity=Severity.P2)
decision = await service.evaluate_auto_repair(incident) decision = await service.evaluate_auto_repair(incident)
assert decision.can_auto_repair is False # 相似度門檻已移除 — APPROVED Playbook 即使低相似度也應通過
assert decision.blocked_by == "LOW_SIMILARITY" assert decision.can_auto_repair is True
assert decision.blocked_by is None
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_evaluate_not_high_quality(self, service, mock_playbook_service): async def test_evaluate_not_high_quality(self, service, mock_playbook_service):
"""Test when playbook is not high quality and not cold-start eligible (MEDIUM risk)""" """Test low-quality playbook is now approved (gates removed 2026-04-07).
2026-04-07: 統帥指令移除品質門檻 — 只要 APPROVED 狀態即可執行。
2026-04-08 Claude Sonnet 4.6: 更新測試預期以符合當前設計。
"""
playbook = Playbook( playbook = Playbook(
playbook_id="PB-LOW-QUALITY", playbook_id="PB-LOW-QUALITY",
name="Low quality playbook", name="Low quality playbook",
@@ -193,11 +200,11 @@ class TestAutoRepairService:
step_number=1, step_number=1,
action_type=ActionType.KUBECTL, action_type=ActionType.KUBECTL,
command="kubectl rollout restart", command="kubectl rollout restart",
risk_level=RiskLevel.MEDIUM, # MEDIUM → 不符合冷啟動 (需 LOW) risk_level=RiskLevel.MEDIUM,
description="restart deployment", description="restart deployment",
), ),
], ],
success_count=2, # < 3 (冷啟動門檻 2026-04-05) success_count=2,
failure_count=0, failure_count=0,
) )
mock_playbook_service.add_playbook(playbook) mock_playbook_service.add_playbook(playbook)
@@ -208,12 +215,16 @@ class TestAutoRepairService:
incident = create_test_incident(severity=Severity.P2) incident = create_test_incident(severity=Severity.P2)
decision = await service.evaluate_auto_repair(incident) decision = await service.evaluate_auto_repair(incident)
assert decision.can_auto_repair is False # 品質門檻已移除 — APPROVED Playbook 直接通過
assert decision.blocked_by == "NOT_HIGH_QUALITY" assert decision.can_auto_repair is True
assert decision.blocked_by is None
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_evaluate_high_risk_blocked(self, service, mock_playbook_service): async def test_evaluate_high_risk_blocked(self, service, mock_playbook_service):
"""Test when playbook contains HIGH risk actions""" """Test HIGH risk playbook is now approved (gates removed 2026-04-07).
2026-04-07: 統帥指令移除風險等級門檻 — 只要 APPROVED 狀態即可執行。
2026-04-08 Claude Sonnet 4.6: 更新測試預期以符合當前設計。
"""
playbook = create_high_quality_playbook(risk_level=RiskLevel.HIGH) playbook = create_high_quality_playbook(risk_level=RiskLevel.HIGH)
mock_playbook_service.add_playbook(playbook) mock_playbook_service.add_playbook(playbook)
mock_playbook_service.set_recommendations([ mock_playbook_service.set_recommendations([
@@ -223,12 +234,16 @@ class TestAutoRepairService:
incident = create_test_incident(severity=Severity.P2) incident = create_test_incident(severity=Severity.P2)
decision = await service.evaluate_auto_repair(incident) decision = await service.evaluate_auto_repair(incident)
assert decision.can_auto_repair is False # 風險等級門檻已移除 — HIGH risk APPROVED Playbook 也通過
assert decision.blocked_by == "HIGH_RISK" assert decision.can_auto_repair is True
assert decision.blocked_by is None
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_evaluate_critical_risk_blocked(self, service, mock_playbook_service): async def test_evaluate_critical_risk_blocked(self, service, mock_playbook_service):
"""Test when playbook contains CRITICAL risk actions""" """Test CRITICAL risk playbook is now approved (gates removed 2026-04-07).
2026-04-07: 統帥指令移除風險等級門檻。
2026-04-08 Claude Sonnet 4.6: 更新測試預期以符合當前設計。
"""
playbook = create_high_quality_playbook(risk_level=RiskLevel.CRITICAL) playbook = create_high_quality_playbook(risk_level=RiskLevel.CRITICAL)
mock_playbook_service.add_playbook(playbook) mock_playbook_service.add_playbook(playbook)
mock_playbook_service.set_recommendations([ mock_playbook_service.set_recommendations([
@@ -238,8 +253,9 @@ class TestAutoRepairService:
incident = create_test_incident(severity=Severity.P2) incident = create_test_incident(severity=Severity.P2)
decision = await service.evaluate_auto_repair(incident) decision = await service.evaluate_auto_repair(incident)
assert decision.can_auto_repair is False # 風險等級門檻已移除 — CRITICAL risk APPROVED Playbook 也通過
assert decision.blocked_by == "HIGH_RISK" assert decision.can_auto_repair is True
assert decision.blocked_by is None
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_evaluate_success(self, service, mock_playbook_service): async def test_evaluate_success(self, service, mock_playbook_service):

View File

@@ -156,16 +156,20 @@ class TestNemotronFailureFallback:
"""#213: Nemotron 失敗降級為純 OpenClaw""" """#213: Nemotron 失敗降級為純 OpenClaw"""
def test_nemotron_failure_does_not_raise(self): def test_nemotron_failure_does_not_raise(self):
"""Nemotron 失敗有 except 捕捉,不拋出""" """Nemotron 失敗有 except 捕捉,不拋出
2026-04-08 Claude Sonnet 4.6: 更新 log key — 改為 nemotron_collaboration_exhausted
(失敗時仍顯示區塊讓統帥知悉nemotron_enabled=True)
"""
with open("src/services/openclaw.py") as f: with open("src/services/openclaw.py") as f:
source = f.read() source = f.read()
idx_func = source.find("async def generate_incident_proposal_with_tools") idx_func = source.find("async def generate_incident_proposal_with_tools")
func_body = source[idx_func:idx_func + 5000] func_body = source[idx_func:idx_func + 5000]
# except 區塊捕捉 nemotron 失敗 # except 區塊捕捉 nemotron 失敗 (exhausted 為重試耗盡的 log key)
assert "nemotron_collaboration_failed" in func_body assert "nemotron_collaboration_exhausted" in func_body
assert "nemotron_enabled = False" in func_body or 'proposal["nemotron_enabled"] = False' in func_body # 失敗時 nemotron_enabled=True (讓統帥看到失敗狀態)
assert 'proposal["nemotron_enabled"] = True' in func_body
def test_nemotron_failure_still_returns_proposal(self): def test_nemotron_failure_still_returns_proposal(self):
"""Nemotron 失敗後仍 return (proposal, provider, True)""" """Nemotron 失敗後仍 return (proposal, provider, True)"""
@@ -189,11 +193,13 @@ class TestNemotronFailureFallback:
assert 'proposal["nemotron_validation"]' in source assert 'proposal["nemotron_validation"]' in source
def test_nemotron_failure_logs_warning(self): def test_nemotron_failure_logs_warning(self):
"""Nemotron 失敗時記錄 warning log""" """Nemotron 失敗時記錄 warning/error log.
2026-04-08 Claude Sonnet 4.6: 改為 nemotron_collaboration_exhausted
"""
with open("src/services/openclaw.py") as f: with open("src/services/openclaw.py") as f:
source = f.read() source = f.read()
assert "nemotron_collaboration_failed" in source assert "nemotron_collaboration_exhausted" in source
# ============================================================================= # =============================================================================

View File

@@ -26,7 +26,8 @@ services:
- "9187:9187" - "9187:9187"
environment: environment:
# 連線字串 (使用環境變數注入密碼) # 連線字串 (使用環境變數注入密碼)
DATA_SOURCE_NAME: "postgresql://postgres:${POSTGRES_PASSWORD:-awoooi}@localhost:5432/awoooi?sslmode=disable" # 2026-04-08 Claude Sonnet 4.6: 修正用戶名/資料庫名 (awoooi user, awoooi_prod db)
DATA_SOURCE_NAME: "postgresql://awoooi:${POSTGRES_PASSWORD:-awoooi_prod_2026}@localhost:5432/awoooi_prod?sslmode=disable"
# 自訂查詢配置 # 自訂查詢配置
PG_EXPORTER_EXTEND_QUERY_PATH: "/etc/postgres_exporter/queries.yaml" PG_EXPORTER_EXTEND_QUERY_PATH: "/etc/postgres_exporter/queries.yaml"
# 日誌等級 # 日誌等級

View File

@@ -0,0 +1,154 @@
#!/usr/bin/env bash
# scripts/ops/deploy-docker-health-monitor.sh
# Sprint 5.2 Plan A: 部署 docker-health-monitor.sh 到 110 和 188 主機
#
# 用法:
# bash scripts/ops/deploy-docker-health-monitor.sh [110|188|all]
#
# 前置條件:
# 1. AWOOOI_HMAC_SECRET 已在各主機 /etc/awoooi-ops/secrets.env 設定
# 2. SSH key (~/.ssh/deploy_key) 可連線 110/188透過 121 跳板)
#
# 部署項目:
# - /opt/awoooi-ops/docker-health-monitor.sh (可執行)
# - /etc/awoooi-ops/secrets.env.template (template不覆蓋現有)
# - logrotate: /etc/logrotate.d/docker-health-monitor
# - cron: */5 * * * * (wooo user)
#
# 2026-04-08 Claude Sonnet 4.6 Asia/Taipei (Sprint 5.2 Plan A)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
MONITOR_SCRIPT="${REPO_ROOT}/scripts/ops/docker-health-monitor.sh"
TARGET="${1:-all}"
SSH_KEY="${HOME}/.ssh/deploy_key"
JUMP_HOST="wooo@192.168.0.121"
# 透過 K3s master 跳板連到 110/188
ssh_cmd() {
local host="$1"
shift
ssh -i "$SSH_KEY" -J "$JUMP_HOST" -o StrictHostKeyChecking=no "wooo@${host}" "$@"
}
scp_cmd() {
local src="$1"
local host="$2"
local dst="$3"
# 先透過 scp 到跳板,再 ssh 到目標act runner 限制)
scp -i "$SSH_KEY" -o StrictHostKeyChecking=no -J "$JUMP_HOST" "$src" "wooo@${host}:${dst}"
}
deploy_to_host() {
local host="$1"
local host_label="$2"
echo ""
echo "══════════════════════════════════════════"
echo " 部署到 ${host_label} (${host})"
echo "══════════════════════════════════════════"
# 1. 建立目錄
echo "→ 建立 /opt/awoooi-ops/ 和 /etc/awoooi-ops/"
ssh_cmd "$host" "sudo mkdir -p /opt/awoooi-ops /etc/awoooi-ops && sudo chown wooo:wooo /opt/awoooi-ops"
# 2. 上傳腳本
echo "→ 上傳 docker-health-monitor.sh"
scp_cmd "$MONITOR_SCRIPT" "$host" "/tmp/docker-health-monitor.sh"
ssh_cmd "$host" "sudo mv /tmp/docker-health-monitor.sh /opt/awoooi-ops/docker-health-monitor.sh && sudo chmod +x /opt/awoooi-ops/docker-health-monitor.sh"
# 3. 建立 secrets.env.template不覆蓋現有
echo "→ 建立 secrets.env.template"
ssh_cmd "$host" "
if [ ! -f /etc/awoooi-ops/secrets.env ]; then
sudo tee /etc/awoooi-ops/secrets.env.template > /dev/null << 'SECRETS_TEMPLATE'
# /etc/awoooi-ops/secrets.env
# Sprint 5.1 docker-health-monitor 設定
# 填寫後複製為 secrets.env: cp secrets.env.template secrets.env
# 權限: chmod 600 /etc/awoooi-ops/secrets.env
AWOOOI_API_URL=https://awoooi.wooo.work
TELEGRAM_BOT_TOKEN=CHANGE_ME
TELEGRAM_CHAT_ID=CHANGE_ME
SEND_COOLDOWN_SECONDS=300
SECRETS_TEMPLATE
echo ' ⚠️ 請填寫 /etc/awoooi-ops/secrets.env.template 後重命名為 secrets.env'
else
echo ' ✅ /etc/awoooi-ops/secrets.env 已存在,保留現有設定'
fi
"
# 4. logrotate 設定
echo "→ 設定 logrotate"
ssh_cmd "$host" "
sudo tee /etc/logrotate.d/docker-health-monitor > /dev/null << 'LOGROTATE'
/var/log/docker-health-monitor.log {
daily
rotate 7
compress
delaycompress
missingok
notifempty
create 644 wooo wooo
}
LOGROTATE
"
# 5. cron 設定idempotent
echo "→ 設定 cron (*/5 * * * *)"
local cron_line="*/5 * * * * /opt/awoooi-ops/docker-health-monitor.sh >> /var/log/docker-health-monitor.log 2>&1"
ssh_cmd "$host" "
(crontab -l 2>/dev/null | grep -v 'docker-health-monitor' ; echo '$cron_line') | crontab -
echo ' ✅ cron 已設定'
crontab -l | grep docker-health
"
# 6. 測試執行dry run
echo "→ 測試執行(不傳送 webhook"
ssh_cmd "$host" "
LOG_FILE=/tmp/docker-health-monitor-test.log \
AWOOOI_API_URL='' \
TELEGRAM_BOT_TOKEN='' \
bash /opt/awoooi-ops/docker-health-monitor.sh 2>&1 | head -20 || true
"
echo "${host_label} 部署完成"
}
# 確認腳本存在
if [ ! -f "$MONITOR_SCRIPT" ]; then
echo "❌ 找不到: $MONITOR_SCRIPT"
exit 1
fi
echo "🚀 docker-health-monitor.sh 部署"
echo " 來源: $MONITOR_SCRIPT"
echo " 目標: $TARGET"
case "$TARGET" in
110)
deploy_to_host "192.168.0.110" "HOST-110 (Gitea/Harbor/Sentry)"
;;
188)
deploy_to_host "192.168.0.188" "HOST-188 (OpenClaw/PostgreSQL/Redis)"
;;
all)
deploy_to_host "192.168.0.110" "HOST-110 (Gitea/Harbor/Sentry)"
deploy_to_host "192.168.0.188" "HOST-188 (OpenClaw/PostgreSQL/Redis)"
;;
*)
echo "用法: $0 [110|188|all]"
exit 1
;;
esac
echo ""
echo "🎉 部署完成!"
echo ""
echo "後續步驟:"
echo " 1. 確認 /etc/awoooi-ops/secrets.env 已填寫正確 Token"
echo " 2. 手動執行一次: /opt/awoooi-ops/docker-health-monitor.sh"
echo " 3. 確認 /var/log/docker-health-monitor.log 輸出正常"
echo " 4. 確認 AWOOOI op_log 有 ALERT_RECEIVED 記錄"

View File

@@ -0,0 +1,280 @@
#!/usr/bin/env python3
"""
Sprint 5.1 Data Safety Guardrails — E2E 驗收腳本
用法: python3 scripts/sprint51_e2e_validation.py --api-url http://192.168.0.121:32334
測試情境:
T1: BLOCK 服務告警 → GUARDRAIL_BLOCKED + 無背景修復任務
T2: auto_repair=false Prometheus flag → GUARDRAIL_BLOCKED log + 無背景修復
T3: AUTO 服務告警 → 正常流程(不被阻擋)
T4: docker-health-monitor.sh webhook 格式 → ALERT_RECEIVED 記錄
T5: /api/v1/guardrail/status 端點Service Registry 查詢)
"""
import argparse
import json
import time
import sys
import urllib.request
import urllib.error
from datetime import datetime, timezone
# 2026-04-08 Claude Sonnet 4.6 Asia/Taipei
def _post(url: str, payload: dict) -> tuple[int, dict]:
data = json.dumps(payload).encode()
req = urllib.request.Request(
url,
data=data,
headers={"Content-Type": "application/json"},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=15) as resp:
return resp.status, json.loads(resp.read())
except urllib.error.HTTPError as e:
body = {}
try:
body = json.loads(e.read())
except Exception:
pass
return e.code, body
def _get(url: str) -> tuple[int, dict]:
req = urllib.request.Request(url, method="GET")
try:
with urllib.request.urlopen(req, timeout=15) as resp:
return resp.status, json.loads(resp.read())
except urllib.error.HTTPError as e:
return e.code, {}
def _alertmanager_payload(alertname: str, labels: dict, instance: str = "test") -> dict:
"""建立 Alertmanager 格式 payload"""
merged_labels = {
"alertname": alertname,
"instance": instance,
"severity": "warning",
**labels,
}
return {
"version": "4",
"groupKey": f"test-{alertname}",
"status": "firing",
"receiver": "awoooi-api",
"groupLabels": {"alertname": alertname},
"commonLabels": merged_labels,
"commonAnnotations": {"summary": f"[E2E Test] {alertname}"},
"externalURL": "http://192.168.0.110:9093",
"alerts": [
{
"status": "firing",
"labels": merged_labels,
"annotations": {"summary": f"[E2E Test] {alertname}"},
"startsAt": datetime.now(timezone.utc).isoformat(),
"endsAt": "0001-01-01T00:00:00Z",
"generatorURL": "http://prometheus:9090",
"fingerprint": f"e2e-{alertname}-{int(time.time())}",
}
],
}
def _check_op_log(api_url: str, event_type: str, lookback_secs: int = 30) -> bool:
"""檢查 alert_operation_log 是否有最近的指定 event_type"""
code, data = _get(f"{api_url}/api/v1/operation-log?limit=20")
if code != 200:
return False
items = data.get("items", [])
cutoff = time.time() - lookback_secs
for item in items:
ts = item.get("created_at", "")
try:
t = datetime.fromisoformat(ts.replace("Z", "+00:00")).timestamp()
if t > cutoff and item.get("event_type") == event_type:
return True
except Exception:
pass
return False
PASS = ""
FAIL = ""
SKIP = "⏭️"
def run_t1_block_service(api_url: str) -> bool:
"""T1: PostgreSQL (BLOCK) 告警 → GUARDRAIL_BLOCKED不觸發修復"""
print("\n── T1: BLOCK 服務告警 (PostgreSQL) ──")
payload = _alertmanager_payload(
"PostgreSQLDown",
{
"job": "postgres-exporter",
"auto_repair": "true", # rule 標為 true但 Service Registry 應覆寫
"layer": "systemd-188",
"component": "postgres",
},
)
code, resp = _post(f"{api_url}/api/v1/webhooks/alertmanager", payload)
print(f" POST /webhooks/alertmanager → HTTP {code}")
# 等待非同步處理
time.sleep(3)
# 驗證 op_log 有 GUARDRAIL_BLOCKED
found = _check_op_log(api_url, "GUARDRAIL_BLOCKED")
if found:
print(f" {PASS} alert_operation_log 有 GUARDRAIL_BLOCKED 記錄")
else:
print(f" {FAIL} 未找到 GUARDRAIL_BLOCKED 記錄(可能未部署 Sprint 5.1")
return code == 200 and found
def run_t2_auto_repair_false_flag(api_url: str) -> bool:
"""T2: auto_repair=false Prometheus flag → 不觸發修復"""
print("\n── T2: auto_repair=false flag (KaliScannerDown) ──")
payload = _alertmanager_payload(
"KaliScannerDown",
{
"auto_repair": "false", # Prometheus rule 設 false
"layer": "docker-188",
"component": "kali",
"severity": "info",
},
)
code, resp = _post(f"{api_url}/api/v1/webhooks/alertmanager", payload)
print(f" POST /webhooks/alertmanager → HTTP {code}")
time.sleep(3)
# 驗證 ALERT_RECEIVED 存在
received = _check_op_log(api_url, "ALERT_RECEIVED")
# 驗證 GUARDRAIL_BLOCKED 存在(由 auto_repair=false flag 觸發)
blocked = _check_op_log(api_url, "GUARDRAIL_BLOCKED")
if received:
print(f" {PASS} ALERT_RECEIVED 已記錄")
else:
print(f" {FAIL} 未找到 ALERT_RECEIVED 記錄")
if blocked:
print(f" {PASS} GUARDRAIL_BLOCKED 已記錄auto_repair=false flag 生效)")
else:
print(f" {FAIL} 未找到 GUARDRAIL_BLOCKEDflag 未生效)")
return code == 200 and received
def run_t3_auto_service(api_url: str) -> bool:
"""T3: AUTO 服務告警awoooi-api→ 正常流程,不被阻擋"""
print("\n── T3: AUTO 服務告警 (KubePodNotReady) ──")
payload = _alertmanager_payload(
"KubePodNotReady",
{
"auto_repair": "true",
"layer": "k8s",
"namespace": "awoooi-prod",
"pod": "test-pod-e2e",
},
)
code, resp = _post(f"{api_url}/api/v1/webhooks/alertmanager", payload)
print(f" POST /webhooks/alertmanager → HTTP {code}")
time.sleep(3)
# 應有 ALERT_RECEIVED不應有 GUARDRAIL_BLOCKED除非沒有對應 playbook
received = _check_op_log(api_url, "ALERT_RECEIVED")
if received:
print(f" {PASS} ALERT_RECEIVED 已記錄AUTO 服務進入正常流程")
else:
print(f" {FAIL} 未找到 ALERT_RECEIVED")
return code == 200 and received
def run_t4_docker_health_monitor(api_url: str) -> bool:
"""T4: docker-health-monitor.sh 格式 webhook → ALERT_RECEIVED"""
print("\n── T4: docker-health-monitor webhook 格式 ──")
# 模擬 docker-health-monitor.sh 的 send_to_awoooi() 格式
payload = _alertmanager_payload(
"DockerContainerExited",
{
"auto_repair": "true",
"layer": "docker",
"host": "188",
"container": "test-container-e2e",
"source": "docker-health-monitor",
},
)
code, resp = _post(f"{api_url}/api/v1/webhooks/alertmanager", payload)
print(f" POST /webhooks/alertmanager → HTTP {code}")
time.sleep(2)
received = _check_op_log(api_url, "ALERT_RECEIVED")
if received:
print(f" {PASS} ALERT_RECEIVED 已記錄docker-health-monitor 格式相容")
else:
print(f" {FAIL} 未找到 ALERT_RECEIVED")
return code == 200
def run_t5_service_registry_api(api_url: str) -> bool:
"""T5: Service Registry 查詢 API若有暴露"""
print("\n── T5: Health Check + 系統狀態 ──")
code, data = _get(f"{api_url}/api/v1/health")
print(f" GET /api/v1/health → HTTP {code}")
if code == 200:
print(f" {PASS} API 健康")
version = data.get("version", data.get("git_sha", "unknown"))
print(f" 版本: {version}")
else:
print(f" {FAIL} API 健康檢查失敗")
return False
# 確認 auto_repair_executions 表存在(查看統計)
code2, data2 = _get(f"{api_url}/api/v1/stats/auto-repair")
if code2 == 200:
print(f" {PASS} auto_repair stats 端點正常")
else:
print(f" {SKIP} auto_repair stats 端點: HTTP {code2}(可接受)")
return code == 200
def main():
parser = argparse.ArgumentParser(description="Sprint 5.1 E2E 驗收")
parser.add_argument("--api-url", default="http://192.168.0.121:32334")
parser.add_argument("--json", action="store_true")
args = parser.parse_args()
print(f"🧪 Sprint 5.1 Data Safety Guardrails E2E 驗收")
print(f" API: {args.api_url}")
print(f" 時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
results = {
"T1_block_service": run_t1_block_service(args.api_url),
"T2_auto_repair_false": run_t2_auto_repair_false_flag(args.api_url),
"T3_auto_service": run_t3_auto_service(args.api_url),
"T4_docker_health_monitor": run_t4_docker_health_monitor(args.api_url),
"T5_health_check": run_t5_service_registry_api(args.api_url),
}
passed = sum(1 for v in results.values() if v)
total = len(results)
print(f"\n{''*50}")
print(f" 結果: {passed}/{total} 通過")
for name, ok in results.items():
print(f" {'' if ok else ''} {name}")
print(f"{''*50}")
if args.json:
print(json.dumps({"passed": passed, "total": total, "results": results}))
sys.exit(0 if passed == total else 1)
if __name__ == "__main__":
main()