P1 測試修復: - test_smart_router.py: 更新至當前 API (IntentResult + DIAGNOSE/CONFIG 規範化) - test_auto_repair_service.py: 注入 _no_cooldown fixture 隔離 Redis 依賴 - test_global_repair_cooldown.py: 加 @pytest.mark.integration 標記 P2 架構改進: - AutoRepairService: 新增 cooldown_checker DI 參數 (Callable | None) - global_repair_cooldown: get_redis() 移入 try-except 防止未捕獲 RuntimeError P3 配置: - pyproject.toml: 登記 integration pytest marker Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
171 lines
5.4 KiB
Python
171 lines
5.4 KiB
Python
"""
|
|
Global Repair Cooldown 測試
|
|
===========================
|
|
ADR-039: 全域修復熔斷機制
|
|
|
|
測試項目:
|
|
- 有狀態服務黑名單檢查
|
|
- 全域計數閾值
|
|
- Redis 故障降級
|
|
|
|
注意:需要 Redis 環境,測試會使用獨立的 key 前綴
|
|
"""
|
|
|
|
import pytest
|
|
|
|
from src.services.global_repair_cooldown import (
|
|
GLOBAL_COOLDOWN_KEY,
|
|
GLOBAL_COOLDOWN_THRESHOLD,
|
|
STATEFUL_SERVICE_BLACKLIST,
|
|
check_global_repair_cooldown,
|
|
get_global_repair_status,
|
|
record_global_repair_action,
|
|
)
|
|
|
|
|
|
class TestStatefulServiceBlacklist:
|
|
"""有狀態服務黑名單測試"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_postgres_blocked(self):
|
|
"""PostgreSQL 服務應該被阻擋"""
|
|
can_repair, reason = await check_global_repair_cooldown(
|
|
incident_id="test-001",
|
|
affected_services=["awoooi-postgres"],
|
|
)
|
|
assert not can_repair
|
|
assert "有狀態服務" in reason
|
|
assert "禁止自動重啟" in reason
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_redis_blocked(self):
|
|
"""Redis 服務應該被阻擋"""
|
|
can_repair, reason = await check_global_repair_cooldown(
|
|
incident_id="test-002",
|
|
affected_services=["redis-stack"],
|
|
)
|
|
assert not can_repair
|
|
assert "有狀態服務" in reason
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_clickhouse_blocked(self):
|
|
"""ClickHouse 服務應該被阻擋"""
|
|
can_repair, reason = await check_global_repair_cooldown(
|
|
incident_id="test-003",
|
|
affected_services=["signoz-clickhouse-0"],
|
|
)
|
|
assert not can_repair
|
|
assert "有狀態服務" in reason
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.integration
|
|
async def test_stateless_service_allowed(self):
|
|
"""無狀態服務應該被允許 (需要 Redis - 必須通過冷卻計數檢查)"""
|
|
can_repair, reason = await check_global_repair_cooldown(
|
|
incident_id="test-004",
|
|
affected_services=["awoooi-api-deployment"],
|
|
)
|
|
assert can_repair
|
|
assert "允許" in reason
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.integration
|
|
async def test_empty_services_allowed(self):
|
|
"""空服務列表應該被允許 (需要 Redis)"""
|
|
can_repair, reason = await check_global_repair_cooldown(
|
|
incident_id="test-005",
|
|
affected_services=[],
|
|
)
|
|
assert can_repair
|
|
|
|
@pytest.mark.asyncio
|
|
@pytest.mark.integration
|
|
async def test_none_services_allowed(self):
|
|
"""None 服務列表應該被允許 (需要 Redis)"""
|
|
can_repair, reason = await check_global_repair_cooldown(
|
|
incident_id="test-006",
|
|
affected_services=None,
|
|
)
|
|
assert can_repair
|
|
|
|
def test_blacklist_contains_common_stateful_services(self):
|
|
"""黑名單應該包含常見有狀態服務"""
|
|
assert "postgres" in STATEFUL_SERVICE_BLACKLIST
|
|
assert "redis" in STATEFUL_SERVICE_BLACKLIST
|
|
assert "clickhouse" in STATEFUL_SERVICE_BLACKLIST
|
|
assert "elasticsearch" in STATEFUL_SERVICE_BLACKLIST
|
|
assert "etcd" in STATEFUL_SERVICE_BLACKLIST
|
|
assert "minio" in STATEFUL_SERVICE_BLACKLIST
|
|
|
|
|
|
@pytest.mark.integration
|
|
class TestGlobalCooldown:
|
|
"""全域冷卻期測試 - 需要 Redis"""
|
|
|
|
@pytest.fixture
|
|
async def clean_redis(self):
|
|
"""清理測試用 Redis key"""
|
|
from src.core.redis_client import get_redis
|
|
|
|
redis = get_redis()
|
|
await redis.delete(GLOBAL_COOLDOWN_KEY)
|
|
yield
|
|
await redis.delete(GLOBAL_COOLDOWN_KEY)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_record_increments_counter(self, clean_redis):
|
|
"""記錄應該增加計數"""
|
|
from src.core.redis_client import get_redis
|
|
|
|
redis = get_redis()
|
|
|
|
# 記錄一次
|
|
await record_global_repair_action()
|
|
|
|
count = await redis.get(GLOBAL_COOLDOWN_KEY)
|
|
assert int(count) == 1
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_record_sets_ttl(self, clean_redis):
|
|
"""第一次記錄應該設定 TTL"""
|
|
from src.core.redis_client import get_redis
|
|
|
|
redis = get_redis()
|
|
|
|
await record_global_repair_action()
|
|
|
|
ttl = await redis.ttl(GLOBAL_COOLDOWN_KEY)
|
|
assert ttl > 0
|
|
assert ttl <= 900 # 15 分鐘
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cooldown_triggers_after_threshold(self, clean_redis):
|
|
"""超過閾值後應該觸發凍結"""
|
|
# 記錄 5 次(達到閾值)
|
|
for _ in range(GLOBAL_COOLDOWN_THRESHOLD):
|
|
await record_global_repair_action()
|
|
|
|
can_repair, reason = await check_global_repair_cooldown(
|
|
incident_id="test-threshold",
|
|
affected_services=["awoooi-api"],
|
|
)
|
|
|
|
assert not can_repair
|
|
assert "超出安全閾值" in reason
|
|
assert str(GLOBAL_COOLDOWN_THRESHOLD) in reason
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_get_status_returns_correct_info(self, clean_redis):
|
|
"""狀態 API 應該返回正確資訊"""
|
|
# 記錄 2 次
|
|
await record_global_repair_action()
|
|
await record_global_repair_action()
|
|
|
|
status = await get_global_repair_status()
|
|
|
|
assert status["current_count"] == 2
|
|
assert status["threshold"] == GLOBAL_COOLDOWN_THRESHOLD
|
|
assert not status["is_frozen"]
|
|
assert status["ttl_remaining"] is not None
|
|
assert status["ttl_remaining"] > 0
|