Files
awoooi/apps/api/tests/test_auto_repair_service.py
OG T 170ce2f11d
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 1m38s
fix(ci): 修正測試與 Sprint 5.2 部署腳本
tests/test_auto_repair_service.py:
  - 更新 3個測試符合 2026-04-07 統帥指令移除門檻
  - APPROVED Playbook 直接通過 (低相似度/低品質/高風險均通過)

tests/test_phase22_nemotron_collab.py:
  - 更新 log key: nemotron_collaboration_failed → exhausted

ops/monitoring/docker-compose.exporters.yaml:
  - 修正 postgres DSN: awoooi:awoooi_prod_2026@localhost:5432/awoooi_prod

Sprint 5.2 新增腳本:
  - scripts/sprint51_e2e_validation.py: L7 E2E 驗收腳本 (T1-T5)
  - scripts/ops/deploy-docker-health-monitor.sh: Plan A 一鍵部署腳本

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-08 18:17:48 +08:00

319 lines
12 KiB
Python

"""
Auto Repair Service Tests - #8 自動升級決策
==========================================
測試自動修復服務層功能
版本: v1.0
建立: 2026-03-26 (台北時區)
建立者: Claude Code (#8 自動升級決策)
"""
import pytest
from src.models.incident import Incident, IncidentStatus, Severity, Signal
from src.models.playbook import (
ActionType,
Playbook,
PlaybookStatus,
RepairStep,
RiskLevel,
SymptomPattern,
)
from src.services.auto_repair_service import AutoRepairService
from src.utils.timezone import now_taipei
class MockPlaybookService:
"""Mock playbook service for testing"""
def __init__(self):
self._playbooks: dict[str, Playbook] = {}
self._recommendations: list = []
def add_playbook(self, playbook: Playbook):
self._playbooks[playbook.playbook_id] = playbook
def set_recommendations(self, recommendations: list):
self._recommendations = recommendations
async def get_recommendations(self, symptoms, top_k=3):
return self._recommendations
async def get_by_id(self, playbook_id: str):
return self._playbooks.get(playbook_id)
async def record_execution(self, playbook_id: str, success: bool):
playbook = self._playbooks.get(playbook_id)
if playbook:
if success:
playbook.success_count += 1
else:
playbook.failure_count += 1
return playbook is not None
def create_test_incident(
incident_id: str = "INC-TEST-001",
severity: Severity = Severity.P2,
) -> Incident:
"""Create a test incident"""
now = now_taipei()
return Incident(
incident_id=incident_id,
status=IncidentStatus.INVESTIGATING,
severity=severity,
affected_services=["test-service"],
signals=[
Signal(
alert_name="HighCPU",
severity=severity,
source="prometheus",
fired_at=now,
labels={"namespace": "prod"},
),
],
)
def create_high_quality_playbook(
playbook_id: str = "PB-TEST-001",
risk_level: RiskLevel = RiskLevel.MEDIUM,
) -> Playbook:
"""Create a high quality playbook (success_rate >= 95%, count >= 10)"""
return Playbook(
playbook_id=playbook_id,
name="HighCPU - test-service 修復劇本",
description="High quality playbook for auto repair",
status=PlaybookStatus.APPROVED,
symptom_pattern=SymptomPattern(
alert_names=["HighCPU"],
affected_services=["test-service"],
severity_range=["P2"],
),
repair_steps=[
RepairStep(
step_number=1,
action_type=ActionType.KUBECTL,
command="kubectl rollout restart deployment/{target}",
risk_level=risk_level,
),
],
success_count=20, # >= 10
failure_count=1, # success_rate = 95.2%
ai_confidence=0.9,
)
class MockPlaybookRecommendation:
"""Mock recommendation for testing"""
def __init__(self, playbook: Playbook, similarity_score: float):
self.playbook = playbook
self.similarity_score = similarity_score
async def _no_cooldown(*args, **kwargs) -> tuple[bool, str]:
"""單元測試用 cooldown: 永遠允許 (不需要 Redis)"""
return True, "允許自動修復 (test bypass)"
class TestAutoRepairService:
"""Auto Repair Service unit tests"""
@pytest.fixture
def mock_playbook_service(self):
return MockPlaybookService()
@pytest.fixture
def service(self, mock_playbook_service):
# 2026-04-01 ogt: 注入 no-op cooldown 以隔離 Redis 依賴
return AutoRepairService(
playbook_service=mock_playbook_service,
cooldown_checker=_no_cooldown,
)
@pytest.mark.asyncio
async def test_evaluate_blocks_p1_severity(self, service):
"""Test that P1 severity incidents are blocked"""
incident = create_test_incident(severity=Severity.P1)
decision = await service.evaluate_auto_repair(incident)
assert decision.can_auto_repair is False
assert decision.blocked_by == "HIGH_SEVERITY"
@pytest.mark.asyncio
async def test_evaluate_blocks_p0_severity(self, service):
"""Test that P0 severity incidents are blocked"""
incident = create_test_incident(severity=Severity.P0)
decision = await service.evaluate_auto_repair(incident)
assert decision.can_auto_repair is False
assert decision.blocked_by == "HIGH_SEVERITY"
@pytest.mark.asyncio
async def test_evaluate_no_playbook_match(self, service, mock_playbook_service):
"""Test when no playbook matches"""
mock_playbook_service.set_recommendations([])
incident = create_test_incident(severity=Severity.P2)
decision = await service.evaluate_auto_repair(incident)
assert decision.can_auto_repair is False
assert decision.blocked_by == "NO_MATCH"
@pytest.mark.asyncio
async def test_evaluate_low_similarity(self, service, mock_playbook_service):
"""Test that low similarity no longer blocks auto-repair.
2026-04-07: 統帥指令移除相似度門檻 — 只要 APPROVED Playbook 匹配即執行。
2026-04-08 Claude Sonnet 4.6: 更新測試預期以符合當前設計。
"""
playbook = create_high_quality_playbook()
mock_playbook_service.add_playbook(playbook)
mock_playbook_service.set_recommendations([
MockPlaybookRecommendation(playbook, similarity_score=0.5) # Below old 0.7 threshold
])
incident = create_test_incident(severity=Severity.P2)
decision = await service.evaluate_auto_repair(incident)
# 相似度門檻已移除 — APPROVED Playbook 即使低相似度也應通過
assert decision.can_auto_repair is True
assert decision.blocked_by is None
@pytest.mark.asyncio
async def test_evaluate_not_high_quality(self, service, mock_playbook_service):
"""Test low-quality playbook is now approved (gates removed 2026-04-07).
2026-04-07: 統帥指令移除品質門檻 — 只要 APPROVED 狀態即可執行。
2026-04-08 Claude Sonnet 4.6: 更新測試預期以符合當前設計。
"""
playbook = Playbook(
playbook_id="PB-LOW-QUALITY",
name="Low quality playbook",
description="Not enough executions",
status=PlaybookStatus.APPROVED,
symptom_pattern=SymptomPattern(
alert_names=["HighCPU"],
affected_services=["test-service"],
),
repair_steps=[
RepairStep(
step_number=1,
action_type=ActionType.KUBECTL,
command="kubectl rollout restart",
risk_level=RiskLevel.MEDIUM,
description="restart deployment",
),
],
success_count=2,
failure_count=0,
)
mock_playbook_service.add_playbook(playbook)
mock_playbook_service.set_recommendations([
MockPlaybookRecommendation(playbook, similarity_score=0.9)
])
incident = create_test_incident(severity=Severity.P2)
decision = await service.evaluate_auto_repair(incident)
# 品質門檻已移除 — APPROVED Playbook 直接通過
assert decision.can_auto_repair is True
assert decision.blocked_by is None
@pytest.mark.asyncio
async def test_evaluate_high_risk_blocked(self, service, mock_playbook_service):
"""Test HIGH risk playbook is now approved (gates removed 2026-04-07).
2026-04-07: 統帥指令移除風險等級門檻 — 只要 APPROVED 狀態即可執行。
2026-04-08 Claude Sonnet 4.6: 更新測試預期以符合當前設計。
"""
playbook = create_high_quality_playbook(risk_level=RiskLevel.HIGH)
mock_playbook_service.add_playbook(playbook)
mock_playbook_service.set_recommendations([
MockPlaybookRecommendation(playbook, similarity_score=0.9)
])
incident = create_test_incident(severity=Severity.P2)
decision = await service.evaluate_auto_repair(incident)
# 風險等級門檻已移除 — HIGH risk APPROVED Playbook 也通過
assert decision.can_auto_repair is True
assert decision.blocked_by is None
@pytest.mark.asyncio
async def test_evaluate_critical_risk_blocked(self, service, mock_playbook_service):
"""Test CRITICAL risk playbook is now approved (gates removed 2026-04-07).
2026-04-07: 統帥指令移除風險等級門檻。
2026-04-08 Claude Sonnet 4.6: 更新測試預期以符合當前設計。
"""
playbook = create_high_quality_playbook(risk_level=RiskLevel.CRITICAL)
mock_playbook_service.add_playbook(playbook)
mock_playbook_service.set_recommendations([
MockPlaybookRecommendation(playbook, similarity_score=0.9)
])
incident = create_test_incident(severity=Severity.P2)
decision = await service.evaluate_auto_repair(incident)
# 風險等級門檻已移除 — CRITICAL risk APPROVED Playbook 也通過
assert decision.can_auto_repair is True
assert decision.blocked_by is None
@pytest.mark.asyncio
async def test_evaluate_success(self, service, mock_playbook_service):
"""Test successful auto repair evaluation"""
playbook = create_high_quality_playbook(risk_level=RiskLevel.MEDIUM)
mock_playbook_service.add_playbook(playbook)
mock_playbook_service.set_recommendations([
MockPlaybookRecommendation(playbook, similarity_score=0.85)
])
incident = create_test_incident(severity=Severity.P2)
decision = await service.evaluate_auto_repair(incident)
assert decision.can_auto_repair is True
assert decision.playbook is not None
assert decision.playbook.playbook_id == playbook.playbook_id
assert decision.blocked_by is None
@pytest.mark.asyncio
async def test_evaluate_low_risk_allowed(self, service, mock_playbook_service):
"""Test that LOW risk actions are allowed"""
playbook = create_high_quality_playbook(risk_level=RiskLevel.LOW)
mock_playbook_service.add_playbook(playbook)
mock_playbook_service.set_recommendations([
MockPlaybookRecommendation(playbook, similarity_score=0.85)
])
incident = create_test_incident(severity=Severity.P2)
decision = await service.evaluate_auto_repair(incident)
assert decision.can_auto_repair is True
assert decision.risk_level == RiskLevel.LOW
@pytest.mark.asyncio
async def test_is_high_quality_calculation(self):
"""Test is_high_quality property"""
# High quality: APPROVED + 95%+ success rate + 10+ successes
playbook = create_high_quality_playbook()
assert playbook.is_high_quality is True
assert playbook.success_rate >= 0.95
assert playbook.success_count >= 10
@pytest.mark.asyncio
async def test_not_high_quality_low_success_rate(self):
"""Test playbook with low success rate is not high quality"""
playbook = Playbook(
playbook_id="PB-LOW-RATE",
name="Low success rate",
description="Too many failures",
status=PlaybookStatus.APPROVED,
symptom_pattern=SymptomPattern(
alert_names=["Test"],
affected_services=["test"],
),
repair_steps=[],
success_count=15,
failure_count=5, # 75% success rate
)
assert playbook.is_high_quality is False
assert playbook.success_rate < 0.95