ADR-038: OpenClaw 雙層保護 - Layer 1: Circuit Breaker (5 failures → 60s cooldown) - Layer 2: Concurrency Semaphore (max 3 concurrent) - 新增 src/core/circuit_breaker.py ADR-039: 全域修復熔斷 - Global Cooldown: 5 repairs/15min → freeze - StatefulSet Blacklist: postgres/redis/clickhouse 禁止自動重啟 - 新增 src/services/global_repair_cooldown.py - 整合到 auto_repair_service.py 測試: - test_circuit_breaker.py (狀態轉換 + Semaphore) - test_global_repair_cooldown.py (黑名單 + 計數閾值) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
165 lines
4.7 KiB
Python
165 lines
4.7 KiB
Python
"""
|
|
Circuit Breaker 測試
|
|
====================
|
|
ADR-038: OpenClaw 雙層保護機制
|
|
|
|
測試項目:
|
|
- Circuit Breaker 狀態轉換
|
|
- Semaphore 並發控制
|
|
- Graceful Degradation
|
|
"""
|
|
|
|
import asyncio
|
|
|
|
import pytest
|
|
|
|
from src.core.circuit_breaker import (
|
|
CircuitBreakerConfig,
|
|
CircuitState,
|
|
OpenClawGuard,
|
|
get_openclaw_guard,
|
|
reset_openclaw_guard,
|
|
)
|
|
|
|
|
|
class TestCircuitBreaker:
|
|
"""Circuit Breaker 核心功能測試"""
|
|
|
|
def setup_method(self):
|
|
"""每個測試前重置全域 Guard"""
|
|
reset_openclaw_guard()
|
|
|
|
def test_initial_state_is_closed(self):
|
|
"""初始狀態應該是 CLOSED"""
|
|
guard = OpenClawGuard()
|
|
assert guard.state == CircuitState.CLOSED
|
|
assert guard.failure_count == 0
|
|
assert not guard.is_circuit_open()
|
|
|
|
def test_record_success_resets_failure_count(self):
|
|
"""成功應該重置失敗計數"""
|
|
guard = OpenClawGuard()
|
|
guard.failure_count = 3
|
|
guard.record_success()
|
|
assert guard.failure_count == 0
|
|
|
|
def test_record_failure_increments_count(self):
|
|
"""失敗應該增加計數"""
|
|
guard = OpenClawGuard()
|
|
guard.record_failure()
|
|
assert guard.failure_count == 1
|
|
guard.record_failure()
|
|
assert guard.failure_count == 2
|
|
|
|
def test_circuit_opens_after_threshold(self):
|
|
"""連續失敗達到閾值後應該觸發斷路"""
|
|
config = CircuitBreakerConfig(failure_threshold=3)
|
|
guard = OpenClawGuard(config)
|
|
|
|
guard.record_failure()
|
|
guard.record_failure()
|
|
assert guard.state == CircuitState.CLOSED
|
|
|
|
guard.record_failure()
|
|
assert guard.state == CircuitState.OPEN
|
|
assert guard.is_circuit_open()
|
|
|
|
def test_circuit_half_open_after_timeout(self):
|
|
"""冷卻期後應該切換到 HALF_OPEN"""
|
|
config = CircuitBreakerConfig(failure_threshold=2, timeout_s=0.1)
|
|
guard = OpenClawGuard(config)
|
|
|
|
# 觸發斷路
|
|
guard.record_failure()
|
|
guard.record_failure()
|
|
assert guard.state == CircuitState.OPEN
|
|
assert guard.is_circuit_open()
|
|
|
|
# 等待冷卻
|
|
import time
|
|
|
|
time.sleep(0.15)
|
|
|
|
# 應該切換到 HALF_OPEN
|
|
assert not guard.is_circuit_open()
|
|
assert guard.state == CircuitState.HALF_OPEN
|
|
|
|
def test_circuit_closes_after_success_in_half_open(self):
|
|
"""HALF_OPEN 狀態下成功應該恢復 CLOSED"""
|
|
guard = OpenClawGuard()
|
|
guard.state = CircuitState.HALF_OPEN
|
|
|
|
guard.record_success()
|
|
assert guard.state == CircuitState.CLOSED
|
|
assert guard.failure_count == 0
|
|
|
|
def test_get_metrics(self):
|
|
"""應該正確返回指標"""
|
|
guard = OpenClawGuard()
|
|
guard.record_failure()
|
|
guard.record_failure()
|
|
|
|
metrics = guard.get_metrics()
|
|
assert metrics["state"] == "closed"
|
|
assert metrics["failure_count"] == 2
|
|
assert metrics["max_concurrent"] == 3
|
|
|
|
def test_singleton_pattern(self):
|
|
"""全域 Guard 應該是單例"""
|
|
guard1 = get_openclaw_guard()
|
|
guard2 = get_openclaw_guard()
|
|
assert guard1 is guard2
|
|
|
|
def test_reset_clears_singleton(self):
|
|
"""reset 應該清除單例"""
|
|
guard1 = get_openclaw_guard()
|
|
reset_openclaw_guard()
|
|
guard2 = get_openclaw_guard()
|
|
assert guard1 is not guard2
|
|
|
|
|
|
class TestSemaphore:
|
|
"""Semaphore 並發控制測試"""
|
|
|
|
def setup_method(self):
|
|
reset_openclaw_guard()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_semaphore_limits_concurrency(self):
|
|
"""Semaphore 應該限制並發數"""
|
|
config = CircuitBreakerConfig(max_concurrent=2)
|
|
guard = OpenClawGuard(config)
|
|
|
|
concurrent_count = 0
|
|
max_concurrent_seen = 0
|
|
results = []
|
|
|
|
async def worker(worker_id: int):
|
|
nonlocal concurrent_count, max_concurrent_seen
|
|
|
|
async with guard.semaphore:
|
|
concurrent_count += 1
|
|
max_concurrent_seen = max(max_concurrent_seen, concurrent_count)
|
|
await asyncio.sleep(0.05)
|
|
results.append(worker_id)
|
|
concurrent_count -= 1
|
|
|
|
# 啟動 5 個並發任務
|
|
tasks = [asyncio.create_task(worker(i)) for i in range(5)]
|
|
await asyncio.gather(*tasks)
|
|
|
|
# 確認最大並發數不超過限制
|
|
assert max_concurrent_seen <= 2
|
|
assert len(results) == 5
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_semaphore_created_lazily(self):
|
|
"""Semaphore 應該延遲建立"""
|
|
guard = OpenClawGuard()
|
|
assert guard._semaphore is None
|
|
|
|
# 存取 semaphore 屬性會觸發建立
|
|
sem = guard.semaphore
|
|
assert guard._semaphore is not None
|
|
assert sem is guard._semaphore
|