Files
awoooi/apps/api/tests/test_global_repair_cooldown.py
OG T 59902f270d fix(tests): 首席架構師審查修復 - 測試套件 + DI 強化 (96/100 OUTSTANDING)
P1 測試修復:
- test_smart_router.py: 更新至當前 API (IntentResult + DIAGNOSE/CONFIG 規範化)
- test_auto_repair_service.py: 注入 _no_cooldown fixture 隔離 Redis 依賴
- test_global_repair_cooldown.py: 加 @pytest.mark.integration 標記

P2 架構改進:
- AutoRepairService: 新增 cooldown_checker DI 參數 (Callable | None)
- global_repair_cooldown: get_redis() 移入 try-except 防止未捕獲 RuntimeError

P3 配置:
- pyproject.toml: 登記 integration pytest marker

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-01 11:11:50 +08:00

171 lines
5.4 KiB
Python

"""
Global Repair Cooldown 測試
===========================
ADR-039: 全域修復熔斷機制
測試項目:
- 有狀態服務黑名單檢查
- 全域計數閾值
- Redis 故障降級
注意:需要 Redis 環境,測試會使用獨立的 key 前綴
"""
import pytest
from src.services.global_repair_cooldown import (
GLOBAL_COOLDOWN_KEY,
GLOBAL_COOLDOWN_THRESHOLD,
STATEFUL_SERVICE_BLACKLIST,
check_global_repair_cooldown,
get_global_repair_status,
record_global_repair_action,
)
class TestStatefulServiceBlacklist:
"""有狀態服務黑名單測試"""
@pytest.mark.asyncio
async def test_postgres_blocked(self):
"""PostgreSQL 服務應該被阻擋"""
can_repair, reason = await check_global_repair_cooldown(
incident_id="test-001",
affected_services=["awoooi-postgres"],
)
assert not can_repair
assert "有狀態服務" in reason
assert "禁止自動重啟" in reason
@pytest.mark.asyncio
async def test_redis_blocked(self):
"""Redis 服務應該被阻擋"""
can_repair, reason = await check_global_repair_cooldown(
incident_id="test-002",
affected_services=["redis-stack"],
)
assert not can_repair
assert "有狀態服務" in reason
@pytest.mark.asyncio
async def test_clickhouse_blocked(self):
"""ClickHouse 服務應該被阻擋"""
can_repair, reason = await check_global_repair_cooldown(
incident_id="test-003",
affected_services=["signoz-clickhouse-0"],
)
assert not can_repair
assert "有狀態服務" in reason
@pytest.mark.asyncio
@pytest.mark.integration
async def test_stateless_service_allowed(self):
"""無狀態服務應該被允許 (需要 Redis - 必須通過冷卻計數檢查)"""
can_repair, reason = await check_global_repair_cooldown(
incident_id="test-004",
affected_services=["awoooi-api-deployment"],
)
assert can_repair
assert "允許" in reason
@pytest.mark.asyncio
@pytest.mark.integration
async def test_empty_services_allowed(self):
"""空服務列表應該被允許 (需要 Redis)"""
can_repair, reason = await check_global_repair_cooldown(
incident_id="test-005",
affected_services=[],
)
assert can_repair
@pytest.mark.asyncio
@pytest.mark.integration
async def test_none_services_allowed(self):
"""None 服務列表應該被允許 (需要 Redis)"""
can_repair, reason = await check_global_repair_cooldown(
incident_id="test-006",
affected_services=None,
)
assert can_repair
def test_blacklist_contains_common_stateful_services(self):
"""黑名單應該包含常見有狀態服務"""
assert "postgres" in STATEFUL_SERVICE_BLACKLIST
assert "redis" in STATEFUL_SERVICE_BLACKLIST
assert "clickhouse" in STATEFUL_SERVICE_BLACKLIST
assert "elasticsearch" in STATEFUL_SERVICE_BLACKLIST
assert "etcd" in STATEFUL_SERVICE_BLACKLIST
assert "minio" in STATEFUL_SERVICE_BLACKLIST
@pytest.mark.integration
class TestGlobalCooldown:
"""全域冷卻期測試 - 需要 Redis"""
@pytest.fixture
async def clean_redis(self):
"""清理測試用 Redis key"""
from src.core.redis_client import get_redis
redis = get_redis()
await redis.delete(GLOBAL_COOLDOWN_KEY)
yield
await redis.delete(GLOBAL_COOLDOWN_KEY)
@pytest.mark.asyncio
async def test_record_increments_counter(self, clean_redis):
"""記錄應該增加計數"""
from src.core.redis_client import get_redis
redis = get_redis()
# 記錄一次
await record_global_repair_action()
count = await redis.get(GLOBAL_COOLDOWN_KEY)
assert int(count) == 1
@pytest.mark.asyncio
async def test_record_sets_ttl(self, clean_redis):
"""第一次記錄應該設定 TTL"""
from src.core.redis_client import get_redis
redis = get_redis()
await record_global_repair_action()
ttl = await redis.ttl(GLOBAL_COOLDOWN_KEY)
assert ttl > 0
assert ttl <= 900 # 15 分鐘
@pytest.mark.asyncio
async def test_cooldown_triggers_after_threshold(self, clean_redis):
"""超過閾值後應該觸發凍結"""
# 記錄 5 次(達到閾值)
for _ in range(GLOBAL_COOLDOWN_THRESHOLD):
await record_global_repair_action()
can_repair, reason = await check_global_repair_cooldown(
incident_id="test-threshold",
affected_services=["awoooi-api"],
)
assert not can_repair
assert "超出安全閾值" in reason
assert str(GLOBAL_COOLDOWN_THRESHOLD) in reason
@pytest.mark.asyncio
async def test_get_status_returns_correct_info(self, clean_redis):
"""狀態 API 應該返回正確資訊"""
# 記錄 2 次
await record_global_repair_action()
await record_global_repair_action()
status = await get_global_repair_status()
assert status["current_count"] == 2
assert status["threshold"] == GLOBAL_COOLDOWN_THRESHOLD
assert not status["is_frozen"]
assert status["ttl_remaining"] is not None
assert status["ttl_remaining"] > 0