Files
awoooi/apps/api/tests/test_global_repair_cooldown.py
OG T 27509db212 feat(api): Wave 1 安全網 - Circuit Breaker + Global Repair Cooldown
ADR-038: OpenClaw 雙層保護
- Layer 1: Circuit Breaker (5 failures → 60s cooldown)
- Layer 2: Concurrency Semaphore (max 3 concurrent)
- 新增 src/core/circuit_breaker.py

ADR-039: 全域修復熔斷
- Global Cooldown: 5 repairs/15min → freeze
- StatefulSet Blacklist: postgres/redis/clickhouse 禁止自動重啟
- 新增 src/services/global_repair_cooldown.py
- 整合到 auto_repair_service.py

測試:
- test_circuit_breaker.py (狀態轉換 + Semaphore)
- test_global_repair_cooldown.py (黑名單 + 計數閾值)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-29 15:48:03 +08:00

167 lines
5.2 KiB
Python

"""
Global Repair Cooldown 測試
===========================
ADR-039: 全域修復熔斷機制
測試項目:
- 有狀態服務黑名單檢查
- 全域計數閾值
- Redis 故障降級
注意:需要 Redis 環境,測試會使用獨立的 key 前綴
"""
import pytest
from src.services.global_repair_cooldown import (
GLOBAL_COOLDOWN_KEY,
GLOBAL_COOLDOWN_THRESHOLD,
STATEFUL_SERVICE_BLACKLIST,
check_global_repair_cooldown,
get_global_repair_status,
record_global_repair_action,
)
class TestStatefulServiceBlacklist:
"""有狀態服務黑名單測試"""
@pytest.mark.asyncio
async def test_postgres_blocked(self):
"""PostgreSQL 服務應該被阻擋"""
can_repair, reason = await check_global_repair_cooldown(
incident_id="test-001",
affected_services=["awoooi-postgres"],
)
assert not can_repair
assert "有狀態服務" in reason
assert "禁止自動重啟" in reason
@pytest.mark.asyncio
async def test_redis_blocked(self):
"""Redis 服務應該被阻擋"""
can_repair, reason = await check_global_repair_cooldown(
incident_id="test-002",
affected_services=["redis-stack"],
)
assert not can_repair
assert "有狀態服務" in reason
@pytest.mark.asyncio
async def test_clickhouse_blocked(self):
"""ClickHouse 服務應該被阻擋"""
can_repair, reason = await check_global_repair_cooldown(
incident_id="test-003",
affected_services=["signoz-clickhouse-0"],
)
assert not can_repair
assert "有狀態服務" in reason
@pytest.mark.asyncio
async def test_stateless_service_allowed(self):
"""無狀態服務應該被允許"""
can_repair, reason = await check_global_repair_cooldown(
incident_id="test-004",
affected_services=["awoooi-api-deployment"],
)
assert can_repair
assert "允許" in reason
@pytest.mark.asyncio
async def test_empty_services_allowed(self):
"""空服務列表應該被允許"""
can_repair, reason = await check_global_repair_cooldown(
incident_id="test-005",
affected_services=[],
)
assert can_repair
@pytest.mark.asyncio
async def test_none_services_allowed(self):
"""None 服務列表應該被允許"""
can_repair, reason = await check_global_repair_cooldown(
incident_id="test-006",
affected_services=None,
)
assert can_repair
def test_blacklist_contains_common_stateful_services(self):
"""黑名單應該包含常見有狀態服務"""
assert "postgres" in STATEFUL_SERVICE_BLACKLIST
assert "redis" in STATEFUL_SERVICE_BLACKLIST
assert "clickhouse" in STATEFUL_SERVICE_BLACKLIST
assert "elasticsearch" in STATEFUL_SERVICE_BLACKLIST
assert "etcd" in STATEFUL_SERVICE_BLACKLIST
assert "minio" in STATEFUL_SERVICE_BLACKLIST
class TestGlobalCooldown:
"""全域冷卻期測試 - 需要 Redis"""
@pytest.fixture
async def clean_redis(self):
"""清理測試用 Redis key"""
from src.core.redis_client import get_redis
redis = get_redis()
await redis.delete(GLOBAL_COOLDOWN_KEY)
yield
await redis.delete(GLOBAL_COOLDOWN_KEY)
@pytest.mark.asyncio
async def test_record_increments_counter(self, clean_redis):
"""記錄應該增加計數"""
from src.core.redis_client import get_redis
redis = get_redis()
# 記錄一次
await record_global_repair_action()
count = await redis.get(GLOBAL_COOLDOWN_KEY)
assert int(count) == 1
@pytest.mark.asyncio
async def test_record_sets_ttl(self, clean_redis):
"""第一次記錄應該設定 TTL"""
from src.core.redis_client import get_redis
redis = get_redis()
await record_global_repair_action()
ttl = await redis.ttl(GLOBAL_COOLDOWN_KEY)
assert ttl > 0
assert ttl <= 900 # 15 分鐘
@pytest.mark.asyncio
async def test_cooldown_triggers_after_threshold(self, clean_redis):
"""超過閾值後應該觸發凍結"""
# 記錄 5 次(達到閾值)
for _ in range(GLOBAL_COOLDOWN_THRESHOLD):
await record_global_repair_action()
can_repair, reason = await check_global_repair_cooldown(
incident_id="test-threshold",
affected_services=["awoooi-api"],
)
assert not can_repair
assert "超出安全閾值" in reason
assert str(GLOBAL_COOLDOWN_THRESHOLD) in reason
@pytest.mark.asyncio
async def test_get_status_returns_correct_info(self, clean_redis):
"""狀態 API 應該返回正確資訊"""
# 記錄 2 次
await record_global_repair_action()
await record_global_repair_action()
status = await get_global_repair_status()
assert status["current_count"] == 2
assert status["threshold"] == GLOBAL_COOLDOWN_THRESHOLD
assert not status["is_frozen"]
assert status["ttl_remaining"] is not None
assert status["ttl_remaining"] > 0