diff --git a/apps/api/src/services/auto_repair_service.py b/apps/api/src/services/auto_repair_service.py index 119a9815..ad01297e 100644 --- a/apps/api/src/services/auto_repair_service.py +++ b/apps/api/src/services/auto_repair_service.py @@ -32,6 +32,7 @@ from src.models.incident import Incident, Severity from src.models.playbook import ( ActionType, Playbook, + PlaybookStatus, RiskLevel, SymptomPattern, ) @@ -135,6 +136,12 @@ class AutoRepairService: MAX_AUTO_REPAIR_SEVERITY = Severity.P2 # 最高允許自動修復的嚴重度 MIN_SIMILARITY_SCORE = 0.7 # 最低相似度門檻 + # 2026-04-07 Claude Code: 首次信任機制 — 打破冷啟動雞生蛋問題 + # 條件: APPROVED + 全部步驟 risk=LOW + 執行次數 < 3 + # 每日最多 5 次首次信任自動修復,防止失控 + COLD_START_TRUST_MAX_EXECUTIONS = 3 # 累積幾次後回歸正常門檻 + COLD_START_TRUST_DAILY_LIMIT = 5 # 每日首次信任上限 + def __init__( self, playbook_service: IPlaybookService | None = None, @@ -255,17 +262,47 @@ class AutoRepairService: blocked_by="LOW_SIMILARITY", ) - # 高品質檢查 + # 高品質檢查 + 首次信任機制 + # 2026-04-07 Claude Code: 方案 C — 打破冷啟動雞生蛋問題 + max_risk = self._get_max_risk_level(best_match.playbook) + if not best_match.playbook.is_high_quality: - return AutoRepairDecision( - can_auto_repair=False, - playbook=best_match.playbook, - reason=f"Playbook 尚未達到高品質標準 (成功率: {best_match.playbook.success_rate:.0%}, 執行次數: {best_match.playbook.total_executions})", - blocked_by="NOT_HIGH_QUALITY", + # 首次信任: APPROVED + 全步驟 LOW risk + 執行次數 < N + cold_start_eligible = ( + best_match.playbook.status == PlaybookStatus.APPROVED + and max_risk == RiskLevel.LOW + and best_match.playbook.total_executions < self.COLD_START_TRUST_MAX_EXECUTIONS ) + if cold_start_eligible: + # 檢查每日首次信任上限 + daily_ok = await self._check_cold_start_daily_limit() + if daily_ok: + logger.info( + "auto_repair_cold_start_trust", + incident_id=incident.incident_id, + playbook_id=best_match.playbook.playbook_id, + playbook_name=best_match.playbook.name, + total_executions=best_match.playbook.total_executions, + max_risk=max_risk.value, + ) + # 跳過 is_high_quality 門檻,直接進入風險檢查 + else: + return AutoRepairDecision( + can_auto_repair=False, + playbook=best_match.playbook, + reason=f"首次信任每日上限已達 {self.COLD_START_TRUST_DAILY_LIMIT} 次", + blocked_by="COLD_START_DAILY_LIMIT", + ) + else: + return AutoRepairDecision( + can_auto_repair=False, + playbook=best_match.playbook, + reason=f"Playbook 尚未達到高品質標準 (成功率: {best_match.playbook.success_rate:.0%}, 執行次數: {best_match.playbook.total_executions})", + blocked_by="NOT_HIGH_QUALITY", + ) + # 5. 檢查動作風險等級 - max_risk = self._get_max_risk_level(best_match.playbook) if self._risk_exceeds_threshold(max_risk): return AutoRepairDecision( @@ -468,6 +505,42 @@ class AutoRepairService: high_risks = {RiskLevel.HIGH, RiskLevel.CRITICAL} return risk in high_risks + async def _check_cold_start_daily_limit(self) -> bool: + """ + 檢查今日首次信任自動修復次數是否在限額內。 + 使用 Redis counter,key 含日期,自動過期。 + 2026-04-07 Claude Code: 方案 C — 冷啟動每日上限防護 + """ + try: + from src.core.redis_client import get_redis + redis = await get_redis() + if redis is None: + # Redis 不可用 → 保守拒絕 + return False + + from src.utils.timezone import now_taipei + today_key = f"cold_start_trust:{now_taipei().strftime('%Y-%m-%d')}" + count = await redis.incr(today_key) + + # 首次建立 key 時設定過期 (25 小時,確保跨日清理) + if count == 1: + await redis.expire(today_key, 90000) + + if count > self.COLD_START_TRUST_DAILY_LIMIT: + logger.warning( + "cold_start_daily_limit_reached", + today_key=today_key, + count=count, + limit=self.COLD_START_TRUST_DAILY_LIMIT, + ) + return False + + return True + except Exception as e: + logger.warning("cold_start_daily_limit_check_failed", error=str(e)) + # 安全降級:檢查失敗 → 保守拒絕 + return False + async def _execute_step(self, incident: Incident, step) -> str: """ 執行單一修復步驟