feat(api): 首次信任機制 — 打破自動修復冷啟動雞生蛋問題
問題: Playbook 需要 success_count >= 3 才算 is_high_quality, 但沒有自動修復就不會有成功紀錄 → 永遠達不到門檻。 方案 C: 首次信任 (Cold Start Trust) - APPROVED 狀態 + 全步驟 risk=LOW + 執行次數 < 3 → 自動放行 - Redis counter 限制每日最多 5 次首次信任自動修復 - 累積 3 次成功後自動回歸正常 is_high_quality 門檻 安全邊界: - 只有 LOW risk 步驟才能首次信任 (重啟容器等) - HIGH/CRITICAL 仍需人工審核 - P0/P1 嚴重度仍需人工審核 - 每日上限防止失控 Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -32,6 +32,7 @@ from src.models.incident import Incident, Severity
|
||||
from src.models.playbook import (
|
||||
ActionType,
|
||||
Playbook,
|
||||
PlaybookStatus,
|
||||
RiskLevel,
|
||||
SymptomPattern,
|
||||
)
|
||||
@@ -135,6 +136,12 @@ class AutoRepairService:
|
||||
MAX_AUTO_REPAIR_SEVERITY = Severity.P2 # 最高允許自動修復的嚴重度
|
||||
MIN_SIMILARITY_SCORE = 0.7 # 最低相似度門檻
|
||||
|
||||
# 2026-04-07 Claude Code: 首次信任機制 — 打破冷啟動雞生蛋問題
|
||||
# 條件: APPROVED + 全部步驟 risk=LOW + 執行次數 < 3
|
||||
# 每日最多 5 次首次信任自動修復,防止失控
|
||||
COLD_START_TRUST_MAX_EXECUTIONS = 3 # 累積幾次後回歸正常門檻
|
||||
COLD_START_TRUST_DAILY_LIMIT = 5 # 每日首次信任上限
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
playbook_service: IPlaybookService | None = None,
|
||||
@@ -255,17 +262,47 @@ class AutoRepairService:
|
||||
blocked_by="LOW_SIMILARITY",
|
||||
)
|
||||
|
||||
# 高品質檢查
|
||||
# 高品質檢查 + 首次信任機制
|
||||
# 2026-04-07 Claude Code: 方案 C — 打破冷啟動雞生蛋問題
|
||||
max_risk = self._get_max_risk_level(best_match.playbook)
|
||||
|
||||
if not best_match.playbook.is_high_quality:
|
||||
return AutoRepairDecision(
|
||||
can_auto_repair=False,
|
||||
playbook=best_match.playbook,
|
||||
reason=f"Playbook 尚未達到高品質標準 (成功率: {best_match.playbook.success_rate:.0%}, 執行次數: {best_match.playbook.total_executions})",
|
||||
blocked_by="NOT_HIGH_QUALITY",
|
||||
# 首次信任: APPROVED + 全步驟 LOW risk + 執行次數 < N
|
||||
cold_start_eligible = (
|
||||
best_match.playbook.status == PlaybookStatus.APPROVED
|
||||
and max_risk == RiskLevel.LOW
|
||||
and best_match.playbook.total_executions < self.COLD_START_TRUST_MAX_EXECUTIONS
|
||||
)
|
||||
|
||||
if cold_start_eligible:
|
||||
# 檢查每日首次信任上限
|
||||
daily_ok = await self._check_cold_start_daily_limit()
|
||||
if daily_ok:
|
||||
logger.info(
|
||||
"auto_repair_cold_start_trust",
|
||||
incident_id=incident.incident_id,
|
||||
playbook_id=best_match.playbook.playbook_id,
|
||||
playbook_name=best_match.playbook.name,
|
||||
total_executions=best_match.playbook.total_executions,
|
||||
max_risk=max_risk.value,
|
||||
)
|
||||
# 跳過 is_high_quality 門檻,直接進入風險檢查
|
||||
else:
|
||||
return AutoRepairDecision(
|
||||
can_auto_repair=False,
|
||||
playbook=best_match.playbook,
|
||||
reason=f"首次信任每日上限已達 {self.COLD_START_TRUST_DAILY_LIMIT} 次",
|
||||
blocked_by="COLD_START_DAILY_LIMIT",
|
||||
)
|
||||
else:
|
||||
return AutoRepairDecision(
|
||||
can_auto_repair=False,
|
||||
playbook=best_match.playbook,
|
||||
reason=f"Playbook 尚未達到高品質標準 (成功率: {best_match.playbook.success_rate:.0%}, 執行次數: {best_match.playbook.total_executions})",
|
||||
blocked_by="NOT_HIGH_QUALITY",
|
||||
)
|
||||
|
||||
# 5. 檢查動作風險等級
|
||||
max_risk = self._get_max_risk_level(best_match.playbook)
|
||||
|
||||
if self._risk_exceeds_threshold(max_risk):
|
||||
return AutoRepairDecision(
|
||||
@@ -468,6 +505,42 @@ class AutoRepairService:
|
||||
high_risks = {RiskLevel.HIGH, RiskLevel.CRITICAL}
|
||||
return risk in high_risks
|
||||
|
||||
async def _check_cold_start_daily_limit(self) -> bool:
|
||||
"""
|
||||
檢查今日首次信任自動修復次數是否在限額內。
|
||||
使用 Redis counter,key 含日期,自動過期。
|
||||
2026-04-07 Claude Code: 方案 C — 冷啟動每日上限防護
|
||||
"""
|
||||
try:
|
||||
from src.core.redis_client import get_redis
|
||||
redis = await get_redis()
|
||||
if redis is None:
|
||||
# Redis 不可用 → 保守拒絕
|
||||
return False
|
||||
|
||||
from src.utils.timezone import now_taipei
|
||||
today_key = f"cold_start_trust:{now_taipei().strftime('%Y-%m-%d')}"
|
||||
count = await redis.incr(today_key)
|
||||
|
||||
# 首次建立 key 時設定過期 (25 小時,確保跨日清理)
|
||||
if count == 1:
|
||||
await redis.expire(today_key, 90000)
|
||||
|
||||
if count > self.COLD_START_TRUST_DAILY_LIMIT:
|
||||
logger.warning(
|
||||
"cold_start_daily_limit_reached",
|
||||
today_key=today_key,
|
||||
count=count,
|
||||
limit=self.COLD_START_TRUST_DAILY_LIMIT,
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning("cold_start_daily_limit_check_failed", error=str(e))
|
||||
# 安全降級:檢查失敗 → 保守拒絕
|
||||
return False
|
||||
|
||||
async def _execute_step(self, incident: Incident, step) -> str:
|
||||
"""
|
||||
執行單一修復步驟
|
||||
|
||||
Reference in New Issue
Block a user