""" Global Repair Cooldown 測試 =========================== ADR-039: 全域修復熔斷機制 測試項目: - 有狀態服務黑名單檢查 - 全域計數閾值 - Redis 故障降級 注意:需要 Redis 環境,測試會使用獨立的 key 前綴 """ import pytest from src.services.global_repair_cooldown import ( GLOBAL_COOLDOWN_KEY, GLOBAL_COOLDOWN_THRESHOLD, STATEFUL_SERVICE_BLACKLIST, check_global_repair_cooldown, get_global_repair_status, record_global_repair_action, ) class TestStatefulServiceBlacklist: """有狀態服務黑名單測試""" @pytest.mark.asyncio async def test_postgres_blocked(self): """PostgreSQL 服務應該被阻擋""" can_repair, reason = await check_global_repair_cooldown( incident_id="test-001", affected_services=["awoooi-postgres"], ) assert not can_repair assert "有狀態服務" in reason assert "禁止自動重啟" in reason @pytest.mark.asyncio async def test_redis_blocked(self): """Redis 服務應該被阻擋""" can_repair, reason = await check_global_repair_cooldown( incident_id="test-002", affected_services=["redis-stack"], ) assert not can_repair assert "有狀態服務" in reason @pytest.mark.asyncio async def test_clickhouse_blocked(self): """ClickHouse 服務應該被阻擋""" can_repair, reason = await check_global_repair_cooldown( incident_id="test-003", affected_services=["signoz-clickhouse-0"], ) assert not can_repair assert "有狀態服務" in reason @pytest.mark.asyncio @pytest.mark.integration async def test_stateless_service_allowed(self): """無狀態服務應該被允許 (需要 Redis - 必須通過冷卻計數檢查)""" can_repair, reason = await check_global_repair_cooldown( incident_id="test-004", affected_services=["awoooi-api-deployment"], ) assert can_repair assert "允許" in reason @pytest.mark.asyncio @pytest.mark.integration async def test_empty_services_allowed(self): """空服務列表應該被允許 (需要 Redis)""" can_repair, reason = await check_global_repair_cooldown( incident_id="test-005", affected_services=[], ) assert can_repair @pytest.mark.asyncio @pytest.mark.integration async def test_none_services_allowed(self): """None 服務列表應該被允許 (需要 Redis)""" can_repair, reason = await check_global_repair_cooldown( incident_id="test-006", affected_services=None, ) assert can_repair def test_blacklist_contains_common_stateful_services(self): """黑名單應該包含常見有狀態服務""" assert "postgres" in STATEFUL_SERVICE_BLACKLIST assert "redis" in STATEFUL_SERVICE_BLACKLIST assert "clickhouse" in STATEFUL_SERVICE_BLACKLIST assert "elasticsearch" in STATEFUL_SERVICE_BLACKLIST assert "etcd" in STATEFUL_SERVICE_BLACKLIST assert "minio" in STATEFUL_SERVICE_BLACKLIST @pytest.mark.integration class TestGlobalCooldown: """全域冷卻期測試 - 需要 Redis""" @pytest.fixture async def clean_redis(self): """清理測試用 Redis key""" from src.core.redis_client import get_redis redis = get_redis() await redis.delete(GLOBAL_COOLDOWN_KEY) yield await redis.delete(GLOBAL_COOLDOWN_KEY) @pytest.mark.asyncio async def test_record_increments_counter(self, clean_redis): """記錄應該增加計數""" from src.core.redis_client import get_redis redis = get_redis() # 記錄一次 await record_global_repair_action() count = await redis.get(GLOBAL_COOLDOWN_KEY) assert int(count) == 1 @pytest.mark.asyncio async def test_record_sets_ttl(self, clean_redis): """第一次記錄應該設定 TTL""" from src.core.redis_client import get_redis redis = get_redis() await record_global_repair_action() ttl = await redis.ttl(GLOBAL_COOLDOWN_KEY) assert ttl > 0 assert ttl <= 900 # 15 分鐘 @pytest.mark.asyncio async def test_cooldown_triggers_after_threshold(self, clean_redis): """超過閾值後應該觸發凍結""" # 記錄 5 次(達到閾值) for _ in range(GLOBAL_COOLDOWN_THRESHOLD): await record_global_repair_action() can_repair, reason = await check_global_repair_cooldown( incident_id="test-threshold", affected_services=["awoooi-api"], ) assert not can_repair assert "超出安全閾值" in reason assert str(GLOBAL_COOLDOWN_THRESHOLD) in reason @pytest.mark.asyncio async def test_get_status_returns_correct_info(self, clean_redis): """狀態 API 應該返回正確資訊""" # 記錄 2 次 await record_global_repair_action() await record_global_repair_action() status = await get_global_repair_status() assert status["current_count"] == 2 assert status["threshold"] == GLOBAL_COOLDOWN_THRESHOLD assert not status["is_frozen"] assert status["ttl_remaining"] is not None assert status["ttl_remaining"] > 0