1234 lines
44 KiB
Python
1234 lines
44 KiB
Python
"""
|
||
Database-based Approval Service
|
||
================================
|
||
Phase 5: 永久記憶植入
|
||
|
||
將 TrustEngine 的 in-memory 邏輯轉換為資料庫 CRUD 操作。
|
||
重啟後資料完好無缺。
|
||
|
||
Features:
|
||
- SQLAlchemy async CRUD
|
||
- ApprovalRecord 持久化
|
||
- TimelineEvent 持久化
|
||
- 與原有 API 契約相容
|
||
"""
|
||
|
||
from datetime import UTC, datetime, timedelta
|
||
from typing import Any
|
||
from uuid import UUID
|
||
|
||
import structlog
|
||
from sqlalchemy import and_, or_, select, update
|
||
|
||
from src.core.redis_client import get_redis
|
||
from src.core.trust_engine import classify_risk, get_required_signatures
|
||
from src.db.base import get_db_context
|
||
from src.db.models import ApprovalRecord, TimelineEvent
|
||
from src.models.approval import (
|
||
ApprovalRequest,
|
||
ApprovalRequestCreate,
|
||
ApprovalStatus,
|
||
BlastRadius,
|
||
DataImpact,
|
||
DryRunCheck,
|
||
RiskLevel,
|
||
Signature,
|
||
)
|
||
from src.services.approval_action_classifier import (
|
||
is_executable_repair_approval_action,
|
||
is_no_action_approval_action,
|
||
)
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
|
||
# =============================================================================
|
||
# Conversion Helpers
|
||
# =============================================================================
|
||
|
||
def approval_record_to_request(record: ApprovalRecord) -> ApprovalRequest:
|
||
"""
|
||
Convert DB ApprovalRecord to Pydantic ApprovalRequest
|
||
|
||
保持 API 契約相容性
|
||
"""
|
||
# Parse blast_radius from JSON
|
||
blast_radius = None
|
||
if record.blast_radius:
|
||
br = record.blast_radius
|
||
blast_radius = BlastRadius(
|
||
affected_pods=br.get("affected_pods", 0),
|
||
estimated_downtime=br.get("estimated_downtime", "0"),
|
||
related_services=br.get("related_services", []),
|
||
data_impact=DataImpact(br.get("data_impact", "none").lower())
|
||
if br.get("data_impact")
|
||
else DataImpact.NONE,
|
||
)
|
||
|
||
# Parse dry_run_checks from JSON
|
||
dry_run_checks = []
|
||
if record.dry_run_checks:
|
||
for check in record.dry_run_checks:
|
||
dry_run_checks.append(
|
||
DryRunCheck(
|
||
name=check.get("name", ""),
|
||
passed=check.get("passed", True),
|
||
message=check.get("message"),
|
||
)
|
||
)
|
||
|
||
# Parse signatures from JSON
|
||
signatures = []
|
||
if record.signatures:
|
||
for sig in record.signatures:
|
||
signatures.append(
|
||
Signature(
|
||
signer_id=sig.get("signer_id", ""),
|
||
signer_name=sig.get("signer_name", ""),
|
||
timestamp=datetime.fromisoformat(sig["timestamp"])
|
||
if sig.get("timestamp")
|
||
else datetime.now(UTC),
|
||
comment=sig.get("comment"),
|
||
)
|
||
)
|
||
|
||
return ApprovalRequest(
|
||
id=UUID(record.id),
|
||
action=record.action,
|
||
description=record.description,
|
||
status=ApprovalStatus(record.status.value if hasattr(record.status, 'value') else record.status),
|
||
risk_level=RiskLevel(record.risk_level.value if hasattr(record.risk_level, 'value') else record.risk_level),
|
||
blast_radius=blast_radius,
|
||
dry_run_checks=dry_run_checks,
|
||
required_signatures=record.required_signatures,
|
||
current_signatures=record.current_signatures,
|
||
signatures=signatures,
|
||
requested_by=record.requested_by,
|
||
created_at=record.created_at,
|
||
expires_at=record.expires_at,
|
||
resolved_at=record.resolved_at,
|
||
rejection_reason=record.rejection_reason,
|
||
metadata=record.extra_metadata,
|
||
# 戰略 B: 告警風暴收斂
|
||
fingerprint=record.fingerprint,
|
||
hit_count=record.hit_count,
|
||
last_seen_at=record.last_seen_at,
|
||
# B3 fix 2026-04-24 ogt + Claude Sonnet 4.6: 補回 DB 欄位(人工審核路徑讀回必要)
|
||
incident_id=getattr(record, "incident_id", None),
|
||
matched_playbook_id=getattr(record, "matched_playbook_id", None),
|
||
telegram_message_id=getattr(record, "telegram_message_id", None),
|
||
telegram_chat_id=getattr(record, "telegram_chat_id", None),
|
||
)
|
||
|
||
|
||
def approval_request_to_record_data(
|
||
request: ApprovalRequestCreate,
|
||
risk_level: RiskLevel,
|
||
required_sigs: int,
|
||
fingerprint: str | None = None, # 戰略 B: 告警指紋
|
||
) -> dict[str, Any]:
|
||
"""
|
||
Convert ApprovalRequestCreate to dict for ApprovalRecord creation
|
||
"""
|
||
metadata = dict(request.metadata or {})
|
||
preallocated_approval_id = str(metadata.pop("preallocated_approval_id", "") or "").strip()
|
||
if preallocated_approval_id:
|
||
UUID(preallocated_approval_id)
|
||
|
||
blast_radius_dict = None
|
||
if request.blast_radius:
|
||
blast_radius_dict = {
|
||
"affected_pods": request.blast_radius.affected_pods,
|
||
"estimated_downtime": request.blast_radius.estimated_downtime,
|
||
"related_services": request.blast_radius.related_services,
|
||
"data_impact": request.blast_radius.data_impact.value.lower()
|
||
if request.blast_radius.data_impact
|
||
else "none",
|
||
}
|
||
|
||
dry_run_checks_list = []
|
||
if request.dry_run_checks:
|
||
for check in request.dry_run_checks:
|
||
dry_run_checks_list.append({
|
||
"name": check.name,
|
||
"passed": check.passed,
|
||
"message": check.message,
|
||
})
|
||
|
||
now = datetime.now(UTC)
|
||
record_data = {
|
||
"action": request.action,
|
||
"description": request.description,
|
||
"status": ApprovalStatus.APPROVED if risk_level == RiskLevel.LOW else ApprovalStatus.PENDING,
|
||
"risk_level": risk_level,
|
||
"required_signatures": required_sigs,
|
||
"current_signatures": 0,
|
||
"signatures": [],
|
||
"blast_radius": blast_radius_dict or {},
|
||
"dry_run_checks": dry_run_checks_list,
|
||
"requested_by": request.requested_by,
|
||
"expires_at": request.expires_at,
|
||
"extra_metadata": metadata or None,
|
||
"resolved_at": now if risk_level == RiskLevel.LOW else None,
|
||
# 戰略 B: 告警風暴收斂
|
||
"fingerprint": fingerprint,
|
||
"hit_count": 1,
|
||
"last_seen_at": now,
|
||
# 2026-04-14 Claude Sonnet 4.6: 補漏 — 原本 incident_id/telegram_message_id
|
||
# 不在 dict 裡導致 DB 欄位永遠 NULL,Telegram 卡片顯示 INC 號是空白
|
||
# 用戶在 Telegram 根本認不出對應的告警,審核閉環名存實亡
|
||
"incident_id": request.incident_id,
|
||
# B5 fix 2026-04-24 ogt + Claude Sonnet 4.6: 補 matched_playbook_id
|
||
# 原本缺失 → Playbook 人工審核後 trust score 永遠不更新(學習閉環斷鏈)
|
||
"matched_playbook_id": getattr(request, "matched_playbook_id", None),
|
||
}
|
||
if preallocated_approval_id:
|
||
record_data["id"] = preallocated_approval_id
|
||
return record_data
|
||
|
||
|
||
def _record_value(value: Any) -> str:
|
||
if hasattr(value, "value"):
|
||
value = value.value
|
||
text = str(value or "").strip()
|
||
if "." in text:
|
||
text = text.rsplit(".", 1)[-1]
|
||
return text.lower()
|
||
|
||
|
||
def _record_int(value: Any) -> int:
|
||
try:
|
||
return int(value or 0)
|
||
except (TypeError, ValueError):
|
||
return 0
|
||
|
||
|
||
def _approval_gate_stage(record: ApprovalRecord) -> str:
|
||
status = _record_value(record.status)
|
||
current = _record_int(getattr(record, "current_signatures", 0))
|
||
required = _record_int(getattr(record, "required_signatures", 0))
|
||
|
||
if status == "pending":
|
||
return "approval_required"
|
||
if status == "approved" and current == 0 and required == 0:
|
||
return "approval_auto_approved"
|
||
if status == "approved":
|
||
return "approval_approved"
|
||
if status == "execution_success":
|
||
return "execution_verified"
|
||
if status == "execution_failed":
|
||
return "execution_failed"
|
||
if status in {"rejected", "expired"}:
|
||
return f"approval_{status}"
|
||
return "approval_status_recorded"
|
||
|
||
|
||
def _approval_gate_status(record: ApprovalRecord) -> str:
|
||
status = _record_value(record.status)
|
||
if status == "pending":
|
||
return "warning"
|
||
if status in {"approved", "execution_success"}:
|
||
return "success"
|
||
if status in {"rejected", "expired", "execution_failed"}:
|
||
return "error"
|
||
return "info"
|
||
|
||
|
||
def _approval_needs_human(record: ApprovalRecord) -> bool:
|
||
status = _record_value(record.status)
|
||
current = _record_int(getattr(record, "current_signatures", 0))
|
||
required = _record_int(getattr(record, "required_signatures", 0))
|
||
return status == "pending" and current < required
|
||
|
||
|
||
def _approval_next_action(record: ApprovalRecord) -> str:
|
||
status = _record_value(record.status)
|
||
if status == "pending":
|
||
return (
|
||
"operator_approve_or_reject"
|
||
if _approval_needs_human(record)
|
||
else "execute_or_verify"
|
||
)
|
||
if status == "approved":
|
||
return "execute_or_verify"
|
||
if status == "execution_success":
|
||
return "verify_or_close"
|
||
if status == "execution_failed":
|
||
return "manual_fix_or_rollback"
|
||
if status in {"rejected", "expired"}:
|
||
return "review_or_close"
|
||
return "review_status_chain"
|
||
|
||
|
||
def _approval_blocked_reason(record: ApprovalRecord) -> str:
|
||
status = _record_value(record.status)
|
||
if status == "pending" and _approval_needs_human(record):
|
||
return "waiting_for_required_signatures"
|
||
if status == "execution_failed":
|
||
return "execution_failed"
|
||
if status == "rejected":
|
||
return "operator_rejected"
|
||
if status == "expired":
|
||
return "approval_expired"
|
||
return "none"
|
||
|
||
|
||
def _approval_decision_mode(record: ApprovalRecord) -> str:
|
||
current = _record_int(getattr(record, "current_signatures", 0))
|
||
required = _record_int(getattr(record, "required_signatures", 0))
|
||
risk_level = _record_value(record.risk_level)
|
||
if _approval_needs_human(record) or current > 0:
|
||
return "manual"
|
||
if risk_level == "low" and required == 0:
|
||
return "auto"
|
||
return "manual"
|
||
|
||
|
||
def build_approval_created_timeline_event(record: ApprovalRecord) -> TimelineEvent:
|
||
"""Create the raw audit rail event that mirrors a newly-created approval gate."""
|
||
current = _record_int(getattr(record, "current_signatures", 0))
|
||
required = _record_int(getattr(record, "required_signatures", 0))
|
||
risk_level = _record_value(record.risk_level)
|
||
needs_human = _approval_needs_human(record)
|
||
stage = _approval_gate_stage(record)
|
||
next_action = _approval_next_action(record)
|
||
mode = _approval_decision_mode(record)
|
||
description = "; ".join(
|
||
[
|
||
f"stage={stage}",
|
||
f"next_action={next_action}",
|
||
f"blocked_reason={_approval_blocked_reason(record)}",
|
||
f"auto_or_manual={mode}",
|
||
f"needs_human={'yes' if needs_human else 'no'}",
|
||
f"risk_level={risk_level}",
|
||
f"signatures={current}/{required}",
|
||
f"action={str(getattr(record, 'action', '') or '')[:240]}",
|
||
]
|
||
)
|
||
|
||
title = (
|
||
"Approval gate waiting for human decision"
|
||
if needs_human
|
||
else "Approval gate passed"
|
||
)
|
||
return TimelineEvent(
|
||
event_type="human",
|
||
status=_approval_gate_status(record),
|
||
title=title,
|
||
description=description,
|
||
actor=getattr(record, "requested_by", None),
|
||
actor_role="approval_gate",
|
||
risk_level=risk_level,
|
||
approval_id=str(record.id),
|
||
incident_id=getattr(record, "incident_id", None),
|
||
)
|
||
|
||
|
||
def add_approval_created_timeline_event(db: Any, record: ApprovalRecord) -> TimelineEvent:
|
||
event = build_approval_created_timeline_event(record)
|
||
db.add(event)
|
||
return event
|
||
|
||
|
||
# =============================================================================
|
||
# Database Approval Service
|
||
# =============================================================================
|
||
|
||
class ApprovalDBService:
|
||
"""
|
||
資料庫授權服務 - 替代 in-memory TrustEngine
|
||
|
||
所有操作皆為資料庫 CRUD,重啟後資料保持
|
||
|
||
Phase 16 R3.4: 支援 Repository 注入
|
||
- 有注入 repository: 使用 Repository 層
|
||
- 無注入: 使用內嵌 DB 操作 (向下相容)
|
||
"""
|
||
|
||
def __init__(self, repository=None):
|
||
"""
|
||
初始化 ApprovalDBService
|
||
|
||
Args:
|
||
repository: IApprovalRepository 實例 (可選,Phase 16 DI)
|
||
"""
|
||
self._repository = repository
|
||
|
||
async def create_approval(
|
||
self,
|
||
request: ApprovalRequestCreate,
|
||
) -> ApprovalRequest:
|
||
"""
|
||
建立新授權請求 (寫入資料庫)
|
||
"""
|
||
# 分類風險
|
||
risk_level = classify_risk(
|
||
action=request.action,
|
||
blast_radius=request.blast_radius,
|
||
explicit_level=request.risk_level,
|
||
)
|
||
|
||
# 取得所需簽核數
|
||
required_sigs = get_required_signatures(risk_level)
|
||
|
||
# 準備資料
|
||
data = approval_request_to_record_data(request, risk_level, required_sigs)
|
||
|
||
async with get_db_context() as db:
|
||
record = ApprovalRecord(**data)
|
||
db.add(record)
|
||
await db.flush()
|
||
await db.refresh(record)
|
||
add_approval_created_timeline_event(db, record)
|
||
|
||
logger.info(
|
||
"approval_created_db",
|
||
id=record.id,
|
||
risk_level=risk_level.value,
|
||
status=record.status.value if hasattr(record.status, 'value') else record.status,
|
||
)
|
||
|
||
return approval_record_to_request(record)
|
||
|
||
# =========================================================================
|
||
# 戰略 B: 告警風暴收斂
|
||
# =========================================================================
|
||
|
||
async def create_approval_with_fingerprint(
|
||
self,
|
||
request: ApprovalRequestCreate,
|
||
fingerprint: str,
|
||
) -> ApprovalRequest:
|
||
"""
|
||
建立帶指紋的授權請求 (戰略 B)
|
||
|
||
用於告警收斂:相同指紋的告警會被聚合。
|
||
|
||
ADR-073 補丁 2026-04-15 ogt + Claude Sonnet 4.6:
|
||
所有 webhook 路徑都未傳 expires_at,導致 DB 欄位為 NULL,
|
||
get_pending_approvals() 的自動過期邏輯 (WHERE expires_at < now)
|
||
永遠不觸發,殭屍 PENDING 記錄無限堆積。
|
||
修正:凡未傳 expires_at,自動注入 48h 預設值。
|
||
"""
|
||
DEFAULT_APPROVAL_TTL_HOURS = 48 # 給人類 48h 決定視窗
|
||
|
||
if not request.expires_at:
|
||
now = datetime.now(UTC)
|
||
request = request.model_copy(
|
||
update={"expires_at": now + timedelta(hours=DEFAULT_APPROVAL_TTL_HOURS)}
|
||
)
|
||
|
||
risk_level = classify_risk(
|
||
action=request.action,
|
||
blast_radius=request.blast_radius,
|
||
explicit_level=request.risk_level,
|
||
)
|
||
required_sigs = get_required_signatures(risk_level)
|
||
data = approval_request_to_record_data(request, risk_level, required_sigs, fingerprint=fingerprint)
|
||
|
||
async with get_db_context() as db:
|
||
record = ApprovalRecord(**data)
|
||
db.add(record)
|
||
await db.flush()
|
||
await db.refresh(record)
|
||
add_approval_created_timeline_event(db, record)
|
||
|
||
logger.info(
|
||
"approval_created_with_fingerprint",
|
||
id=record.id,
|
||
fingerprint=fingerprint,
|
||
risk_level=risk_level.value,
|
||
)
|
||
|
||
return approval_record_to_request(record)
|
||
|
||
async def find_by_fingerprint(
|
||
self,
|
||
fingerprint: str,
|
||
debounce_minutes: int = 5,
|
||
) -> ApprovalRequest | None:
|
||
"""
|
||
根據指紋查詢現有的告警記錄 (戰略 B)
|
||
|
||
查詢條件:
|
||
1. 相同指紋
|
||
2. 狀態為 PENDING 且在 24 小時內建立(超過 24h 的 PENDING 視為過期,不再收斂)
|
||
3. 或在 debounce_minutes 分鐘內建立(不論狀態)
|
||
|
||
ADR-073 補丁 2026-04-15 ogt + Claude Sonnet 4.6:
|
||
原邏輯 PENDING 無 TTL → 3 天前 PENDING 記錄永久封鎖同指紋告警。
|
||
修正:PENDING 收斂窗口上限 PENDING_TTL_HOURS(24h)。
|
||
|
||
Returns:
|
||
ApprovalRequest if found, None otherwise
|
||
"""
|
||
PENDING_TTL_HOURS = 24 # PENDING 記錄最長收斂時效(超過則視為已過期)
|
||
|
||
now = datetime.now(UTC)
|
||
cutoff_time = now - timedelta(minutes=debounce_minutes)
|
||
pending_cutoff = now - timedelta(hours=PENDING_TTL_HOURS)
|
||
|
||
async with get_db_context() as db:
|
||
result = await db.execute(
|
||
select(ApprovalRecord)
|
||
.where(ApprovalRecord.fingerprint == fingerprint)
|
||
.where(
|
||
or_(
|
||
# PENDING 狀態但必須在 24h 內,防止老 PENDING 永久封鎖
|
||
and_(
|
||
ApprovalRecord.status == ApprovalStatus.PENDING,
|
||
ApprovalRecord.created_at >= pending_cutoff,
|
||
),
|
||
# 最近 debounce_minutes 分鐘內建立的任何記錄
|
||
ApprovalRecord.created_at >= cutoff_time,
|
||
)
|
||
)
|
||
.order_by(ApprovalRecord.created_at.desc())
|
||
.limit(1)
|
||
)
|
||
record = result.scalar_one_or_none()
|
||
|
||
if record:
|
||
logger.info(
|
||
"fingerprint_match_found",
|
||
fingerprint=fingerprint,
|
||
approval_id=record.id,
|
||
hit_count=record.hit_count,
|
||
status=record.status.value if hasattr(record.status, 'value') else record.status,
|
||
)
|
||
|
||
# 2026-04-20 ogt + Claude Opus 4.7: ADR-092 tg_sent Redis 驗證
|
||
# PENDING 記錄不代表 Telegram 已發送(可能因網路/Token錯誤而靜默失敗)
|
||
# 僅在 debounce 窗口外的 PENDING 收斂時,必須確認 Redis 有 tg_sent 標記
|
||
within_debounce = record.created_at >= cutoff_time
|
||
if not within_debounce:
|
||
try:
|
||
r = get_redis()
|
||
tg_confirmed = await r.exists(f"tg_sent:{fingerprint}")
|
||
except Exception as _re:
|
||
tg_confirmed = False
|
||
logger.warning("tg_sent_redis_check_failed", fingerprint=fingerprint, error=str(_re))
|
||
|
||
if not tg_confirmed:
|
||
logger.warning(
|
||
"fingerprint_pending_no_tg_confirmation",
|
||
fingerprint=fingerprint,
|
||
approval_id=str(record.id),
|
||
created_at=record.created_at.isoformat(),
|
||
)
|
||
return None # 視為新告警,重新發送 Telegram
|
||
|
||
return approval_record_to_request(record)
|
||
|
||
return None
|
||
|
||
async def mark_telegram_confirmed(self, fingerprint: str, ttl: int = 108000) -> None:
|
||
"""
|
||
2026-04-20 ogt + Claude Opus 4.7: ADR-092
|
||
記錄 Telegram 已成功發送,防止 PENDING 誤收斂造成永久靜默。
|
||
ADR-092 B3 (2026-04-24 ogt + Claude Sonnet 4.6 Asia/Taipei):
|
||
TTL 從 86400s(24h)延長至 108000s(30h = 24h + 6h buffer),
|
||
避免 PENDING 存活期與 tg_sent key 同時到期時的邊緣誤判。
|
||
W-2 現已改用 telegram_message_id IS NULL 判斷(不再依賴此 key),
|
||
此 TTL 保留供收斂去重邏輯(approval_db.py 第 342 行)使用。
|
||
"""
|
||
try:
|
||
r = get_redis()
|
||
await r.setex(f"tg_sent:{fingerprint}", ttl, "1")
|
||
logger.debug("tg_sent_marked", fingerprint=fingerprint, ttl=ttl)
|
||
except Exception as e:
|
||
logger.warning("tg_sent_mark_failed", fingerprint=fingerprint, error=str(e))
|
||
|
||
async def increment_hit_count(
|
||
self,
|
||
approval_id: UUID,
|
||
) -> ApprovalRequest | None:
|
||
"""
|
||
增加告警聚合次數 (戰略 B)
|
||
|
||
當相同指紋的告警再次觸發時:
|
||
1. hit_count += 1
|
||
2. last_seen_at = now
|
||
|
||
這樣可以跳過 LLM 分析,節省 API 成本!
|
||
"""
|
||
now = datetime.now(UTC)
|
||
|
||
async with get_db_context() as db:
|
||
# 更新 hit_count 和 last_seen_at
|
||
result = await db.execute(
|
||
update(ApprovalRecord)
|
||
.where(ApprovalRecord.id == str(approval_id))
|
||
.values(
|
||
hit_count=ApprovalRecord.hit_count + 1,
|
||
last_seen_at=now,
|
||
)
|
||
.returning(ApprovalRecord.hit_count)
|
||
)
|
||
new_count = result.scalar_one_or_none()
|
||
|
||
if new_count is None:
|
||
return None
|
||
|
||
# 重新讀取完整記錄
|
||
result = await db.execute(
|
||
select(ApprovalRecord).where(ApprovalRecord.id == str(approval_id))
|
||
)
|
||
record = result.scalar_one_or_none()
|
||
|
||
if record:
|
||
logger.info(
|
||
"hit_count_incremented",
|
||
approval_id=str(approval_id),
|
||
new_hit_count=new_count,
|
||
last_seen_at=now.isoformat(),
|
||
)
|
||
return approval_record_to_request(record)
|
||
|
||
return None
|
||
|
||
async def get_approval(self, approval_id: UUID) -> ApprovalRequest | None:
|
||
"""
|
||
取得單一授權請求
|
||
|
||
Phase 16 R3.4: 支援 Repository 注入
|
||
"""
|
||
# Phase 16: 使用 Repository (如果有注入)
|
||
if self._repository:
|
||
return await self._repository.get_by_id(approval_id)
|
||
|
||
# Legacy: 內嵌 DB 操作
|
||
async with get_db_context() as db:
|
||
result = await db.execute(
|
||
select(ApprovalRecord).where(ApprovalRecord.id == str(approval_id))
|
||
)
|
||
record = result.scalar_one_or_none()
|
||
|
||
if record is None:
|
||
return None
|
||
|
||
return approval_record_to_request(record)
|
||
|
||
async def get_pending_approvals(self) -> list[ApprovalRequest]:
|
||
"""
|
||
取得所有待簽核請求
|
||
|
||
Phase 16 R3.4: 支援 Repository 注入
|
||
"""
|
||
# Phase 16: 使用 Repository (如果有注入)
|
||
if self._repository:
|
||
return await self._repository.get_pending()
|
||
|
||
# Legacy: 內嵌 DB 操作
|
||
now = datetime.now(UTC)
|
||
|
||
async with get_db_context() as db:
|
||
# 先更新過期的請求
|
||
await db.execute(
|
||
update(ApprovalRecord)
|
||
.where(ApprovalRecord.status == ApprovalStatus.PENDING)
|
||
.where(ApprovalRecord.expires_at < now)
|
||
.values(status=ApprovalStatus.EXPIRED, resolved_at=now)
|
||
)
|
||
|
||
# 取得所有 PENDING
|
||
result = await db.execute(
|
||
select(ApprovalRecord)
|
||
.where(ApprovalRecord.status == ApprovalStatus.PENDING)
|
||
.order_by(ApprovalRecord.created_at.desc())
|
||
)
|
||
records = result.scalars().all()
|
||
|
||
return [approval_record_to_request(r) for r in records]
|
||
|
||
async def sign_approval(
|
||
self,
|
||
approval_id: UUID,
|
||
signer_id: str,
|
||
signer_name: str,
|
||
comment: str | None = None,
|
||
) -> tuple[ApprovalRequest | None, str, bool]:
|
||
"""
|
||
簽核授權請求
|
||
|
||
Phase 5: 使用 FOR UPDATE 行鎖防止 Race Condition
|
||
當多人同時簽核時,確保只有一人能成功取得鎖並更新
|
||
|
||
Returns:
|
||
(approval, message, execution_triggered)
|
||
"""
|
||
async with get_db_context() as db:
|
||
# Phase 5: FOR UPDATE 行級鎖 - 防止併發簽核競爭
|
||
# SQLite 不支援 FOR UPDATE,但 PostgreSQL 完整支援
|
||
result = await db.execute(
|
||
select(ApprovalRecord)
|
||
.where(ApprovalRecord.id == str(approval_id))
|
||
.with_for_update() # Row-Level Lock
|
||
)
|
||
record = result.scalar_one_or_none()
|
||
|
||
logger.info(
|
||
"sign_approval_lock_acquired",
|
||
approval_id=str(approval_id),
|
||
signer_id=signer_id,
|
||
)
|
||
|
||
if record is None:
|
||
return None, "Approval not found", False
|
||
|
||
# 檢查狀態
|
||
status_value = record.status.value if hasattr(record.status, 'value') else record.status
|
||
if status_value != "pending":
|
||
return (
|
||
approval_record_to_request(record),
|
||
f"Cannot sign: status is {status_value}",
|
||
False,
|
||
)
|
||
|
||
# 檢查是否已簽核
|
||
signatures = record.signatures or []
|
||
for sig in signatures:
|
||
if sig.get("signer_id") == signer_id:
|
||
return (
|
||
approval_record_to_request(record),
|
||
f"User {signer_name} has already signed this approval",
|
||
False,
|
||
)
|
||
|
||
# Phase 5: 樂觀鎖 - 記錄更新前的簽名數
|
||
old_sig_count = record.current_signatures
|
||
|
||
# 新增簽章
|
||
new_signature = {
|
||
"signer_id": signer_id,
|
||
"signer_name": signer_name,
|
||
"timestamp": datetime.now(UTC).isoformat(),
|
||
"comment": comment,
|
||
}
|
||
signatures.append(new_signature)
|
||
new_sig_count = len(signatures)
|
||
|
||
# 計算新狀態
|
||
execution_triggered = False
|
||
new_status = record.status
|
||
resolved_at = None
|
||
if new_sig_count >= record.required_signatures:
|
||
new_status = ApprovalStatus.APPROVED
|
||
resolved_at = datetime.now(UTC)
|
||
execution_triggered = is_executable_repair_approval_action(
|
||
record.action
|
||
)
|
||
|
||
# Phase 5: 樂觀鎖更新 - 使用 WHERE current_signatures = old_value
|
||
# 如果其他人已更新,這個 UPDATE 會更新 0 行
|
||
metadata = dict(record.extra_metadata or {})
|
||
if is_no_action_approval_action(record.action):
|
||
metadata["execution_kind"] = metadata.get("execution_kind") or "no_action"
|
||
metadata["repair_executed"] = False
|
||
metadata["repair_attempted"] = False
|
||
metadata["execution_suppressed_reason"] = (
|
||
"approval_action_has_no_executable_repair"
|
||
)
|
||
|
||
result = await db.execute(
|
||
update(ApprovalRecord)
|
||
.where(and_(
|
||
ApprovalRecord.id == str(approval_id),
|
||
ApprovalRecord.current_signatures == old_sig_count, # 樂觀鎖條件
|
||
))
|
||
.values(
|
||
signatures=signatures,
|
||
current_signatures=new_sig_count,
|
||
status=new_status,
|
||
resolved_at=resolved_at,
|
||
extra_metadata=metadata,
|
||
)
|
||
)
|
||
|
||
# 檢查是否更新成功
|
||
if result.rowcount == 0:
|
||
logger.warning(
|
||
"sign_approval_optimistic_lock_conflict",
|
||
approval_id=str(approval_id),
|
||
signer_id=signer_id,
|
||
old_sig_count=old_sig_count,
|
||
)
|
||
return (
|
||
approval_record_to_request(record),
|
||
"Concurrent modification detected. Please retry.",
|
||
False,
|
||
)
|
||
|
||
# 重新讀取更新後的記錄
|
||
result = await db.execute(
|
||
select(ApprovalRecord).where(ApprovalRecord.id == str(approval_id))
|
||
)
|
||
record = result.scalar_one()
|
||
|
||
if execution_triggered:
|
||
message = f"Approval complete! ({new_sig_count}/{record.required_signatures} signatures)"
|
||
else:
|
||
message = f"Signature added ({new_sig_count}/{record.required_signatures})"
|
||
|
||
logger.info(
|
||
"approval_signed_db",
|
||
id=record.id,
|
||
signer=signer_name,
|
||
current=record.current_signatures,
|
||
required=record.required_signatures,
|
||
execution_triggered=execution_triggered,
|
||
)
|
||
|
||
return approval_record_to_request(record), message, execution_triggered
|
||
|
||
async def reject_approval(
|
||
self,
|
||
approval_id: UUID,
|
||
rejector_id: str,
|
||
rejector_name: str,
|
||
reason: str,
|
||
) -> tuple[ApprovalRequest | None, str]:
|
||
"""
|
||
拒絕授權請求
|
||
"""
|
||
async with get_db_context() as db:
|
||
result = await db.execute(
|
||
select(ApprovalRecord).where(ApprovalRecord.id == str(approval_id))
|
||
)
|
||
record = result.scalar_one_or_none()
|
||
|
||
if record is None:
|
||
return None, "Approval not found"
|
||
|
||
status_value = record.status.value if hasattr(record.status, 'value') else record.status
|
||
if status_value != "pending":
|
||
return (
|
||
approval_record_to_request(record),
|
||
f"Cannot reject: status is {status_value}",
|
||
)
|
||
|
||
record.status = ApprovalStatus.REJECTED
|
||
record.rejection_reason = f"{rejector_name}: {reason}"
|
||
record.resolved_at = datetime.now(UTC)
|
||
|
||
await db.flush()
|
||
await db.refresh(record)
|
||
|
||
logger.info(
|
||
"approval_rejected_db",
|
||
id=record.id,
|
||
rejector=rejector_name,
|
||
reason=reason,
|
||
)
|
||
|
||
return approval_record_to_request(record), "Approval rejected"
|
||
|
||
async def update_execution_status(
|
||
self,
|
||
approval_id: UUID,
|
||
success: bool,
|
||
error_message: str | None = None,
|
||
execution_kind: str | None = None,
|
||
repair_executed: bool | None = None,
|
||
repair_attempted: bool | None = None,
|
||
) -> None:
|
||
"""
|
||
更新執行狀態
|
||
|
||
2026-04-18 ogt + Claude Opus 4.7: ADR-090 L5 斷鏈修復 — P0.2
|
||
失敗時必寫 rejection_reason,讓診斷不再黑盒
|
||
(之前 EXECUTION_FAILED 216 筆 reason 全空)
|
||
"""
|
||
async with get_db_context() as db:
|
||
status = ApprovalStatus.EXECUTION_SUCCESS if success else ApprovalStatus.EXECUTION_FAILED
|
||
result = await db.execute(
|
||
select(ApprovalRecord).where(ApprovalRecord.id == str(approval_id))
|
||
)
|
||
record = result.scalar_one_or_none()
|
||
if record is None:
|
||
logger.warning(
|
||
"approval_execution_status_update_missing",
|
||
id=str(approval_id),
|
||
success=success,
|
||
)
|
||
return
|
||
|
||
record.status = status
|
||
if not success and error_message:
|
||
# 截斷至合理長度,避免爆欄位
|
||
record.rejection_reason = str(error_message)[:2000]
|
||
if execution_kind:
|
||
# 2026-05-31 ogt + Codex: OBSERVE/NO_ACTION 仍需 terminal 狀態,
|
||
# 但前台/報表必須能分辨「未執行修復」而非真正 execution success。
|
||
metadata = dict(record.extra_metadata or {})
|
||
metadata["execution_kind"] = execution_kind
|
||
metadata["repair_executed"] = (
|
||
repair_executed
|
||
if repair_executed is not None
|
||
else execution_kind not in {
|
||
"no_action",
|
||
"diagnostic",
|
||
"parse_failed",
|
||
"unsupported_action",
|
||
}
|
||
)
|
||
if repair_attempted is not None:
|
||
metadata["repair_attempted"] = repair_attempted
|
||
record.extra_metadata = metadata
|
||
|
||
logger.info(
|
||
"approval_execution_status_updated",
|
||
id=str(approval_id),
|
||
success=success,
|
||
has_error=bool(error_message),
|
||
execution_kind=execution_kind,
|
||
)
|
||
|
||
async def update_incident_id(self, approval_id: UUID, incident_id: str) -> None:
|
||
"""
|
||
2026-04-06 ogt: Phase 26 — 回寫 incident_id 到 approval_records
|
||
讓 Playbook 萃取和 KM 寫入能找到對應的 Incident
|
||
|
||
2026-04-14 Claude Sonnet 4.6 診斷: Live-fire #7 發現 approval.incident_id 仍 NULL
|
||
加 rowcount 與 pre/post 值檢查,若 0 rows affected 則 log warning
|
||
"""
|
||
async with get_db_context() as db:
|
||
result = await db.execute(
|
||
update(ApprovalRecord)
|
||
.where(ApprovalRecord.id == str(approval_id))
|
||
.values(incident_id=incident_id)
|
||
)
|
||
rowcount = result.rowcount if hasattr(result, "rowcount") else -1
|
||
if rowcount == 0:
|
||
# 找不到對應 approval — 可能 id 型別或 session 不同步
|
||
logger.warning(
|
||
"update_incident_id_zero_rows",
|
||
approval_id=str(approval_id),
|
||
approval_id_type=type(approval_id).__name__,
|
||
incident_id=incident_id,
|
||
reason="UPDATE 0 rows affected — approval 不存在或 id mismatch",
|
||
)
|
||
else:
|
||
logger.info(
|
||
"update_incident_id_success",
|
||
approval_id=str(approval_id),
|
||
incident_id=incident_id,
|
||
rowcount=rowcount,
|
||
)
|
||
|
||
async def update_telegram_message(
|
||
self, incident_id: str, telegram_message_id: int, telegram_chat_id: int | None = None
|
||
) -> None:
|
||
"""
|
||
2026-04-09 Claude Sonnet 4.6: 持久化 Telegram message_id 到 DB
|
||
讓告警訊息 ID 不再只存 Redis(24h TTL),支援長期狀態追蹤和訊息更新。
|
||
以 incident_id 查找最新 PENDING approval record 並回填。
|
||
"""
|
||
async with get_db_context() as db:
|
||
from sqlalchemy import text as _text
|
||
params: dict = {
|
||
"incident_id": incident_id,
|
||
"telegram_message_id": telegram_message_id,
|
||
"status": "PENDING",
|
||
}
|
||
chat_clause = ""
|
||
if telegram_chat_id is not None:
|
||
params["telegram_chat_id"] = telegram_chat_id
|
||
chat_clause = ", telegram_chat_id = :telegram_chat_id"
|
||
await db.execute(
|
||
_text(f"""
|
||
UPDATE approval_records
|
||
SET telegram_message_id = :telegram_message_id{chat_clause}
|
||
WHERE incident_id = :incident_id
|
||
AND status = :status
|
||
"""),
|
||
params,
|
||
)
|
||
|
||
async def update_action_by_incident_id(self, incident_id: str, new_action: str) -> int:
|
||
"""
|
||
Agent Orchestrator 分析完成後覆寫 ApprovalRecord.action。
|
||
|
||
設計動機 (2026-04-16 ogt + Claude Sonnet 4.6):
|
||
- Webhook inline LLM 寫入垃圾 action(如 kubectl rollout restart for postgres disk)
|
||
- Agent 分析正確但只發新 Telegram 卡,未覆寫 ApprovalRecord
|
||
- 用戶批准 Agent 卡 → 系統查 incident_id → 執行舊 webhook 垃圾 action
|
||
- 修復:Agent 完成後呼叫此方法,讓用戶批准時執行正確 action
|
||
|
||
Args:
|
||
incident_id: INC-xxx 格式 Incident ID
|
||
new_action: Agent 決定的 action(空字串 → 不覆寫)
|
||
|
||
Returns:
|
||
int: rowcount(0 表示找不到對應 PENDING approval)
|
||
"""
|
||
if not new_action:
|
||
return 0
|
||
async with get_db_context() as db:
|
||
from sqlalchemy import text as _text
|
||
result = await db.execute(
|
||
_text("""
|
||
UPDATE approval_records
|
||
SET action = :new_action
|
||
WHERE incident_id = :incident_id
|
||
AND status = 'PENDING'
|
||
"""),
|
||
{"incident_id": incident_id, "new_action": new_action},
|
||
)
|
||
rowcount = result.rowcount if hasattr(result, "rowcount") else -1
|
||
logger.info(
|
||
"approval_action_updated_by_agent",
|
||
incident_id=incident_id,
|
||
new_action=new_action[:80],
|
||
rowcount=rowcount,
|
||
)
|
||
return rowcount
|
||
|
||
async def update_decision_fusion(
|
||
self,
|
||
incident_id: str,
|
||
composite_score: float,
|
||
complexity_tier: str,
|
||
fusion_details: dict,
|
||
) -> int:
|
||
"""
|
||
P2.1 DecisionFusionEngine 結果回寫到 approval_records。
|
||
|
||
2026-04-26 P2-DB-Fix by Claude — db-expert P0 三修(P0.3):
|
||
ADR-085 鐵律:fusion 分數必須落地 PG,不能只存 Redis token
|
||
|
||
Args:
|
||
incident_id: INC-xxx 格式 Incident ID
|
||
composite_score: FusionScore.composite(0.0-1.0)
|
||
complexity_tier: ComplexityTier.value(low/medium/high/critical)
|
||
fusion_details: FusionScore.to_dict() 完整 dict
|
||
|
||
Returns:
|
||
int: rowcount(0 表示找不到對應 PENDING approval)
|
||
"""
|
||
async with get_db_context() as db:
|
||
result = await db.execute(
|
||
update(ApprovalRecord)
|
||
.where(
|
||
and_(
|
||
ApprovalRecord.incident_id == incident_id,
|
||
ApprovalRecord.status == ApprovalStatus.PENDING,
|
||
)
|
||
)
|
||
.values(
|
||
composite_score=composite_score,
|
||
complexity_tier=complexity_tier,
|
||
decision_fusion_details=fusion_details,
|
||
)
|
||
)
|
||
rowcount = result.rowcount if hasattr(result, "rowcount") else -1
|
||
|
||
logger.info(
|
||
"approval_decision_fusion_updated",
|
||
incident_id=incident_id,
|
||
composite_score=composite_score,
|
||
complexity_tier=complexity_tier,
|
||
rowcount=rowcount,
|
||
)
|
||
return rowcount
|
||
|
||
# =========================================================================
|
||
# Phase 6.4h: Proposals API 支援方法
|
||
# =========================================================================
|
||
|
||
async def get_approval_by_id(self, approval_id: UUID) -> ApprovalRequest | None:
|
||
"""
|
||
根據 ID 取得單一授權請求 (Phase 6.4h)
|
||
|
||
Args:
|
||
approval_id: 授權請求 UUID
|
||
|
||
Returns:
|
||
ApprovalRequest if found, None otherwise
|
||
"""
|
||
async with get_db_context() as db:
|
||
result = await db.execute(
|
||
select(ApprovalRecord).where(ApprovalRecord.id == str(approval_id))
|
||
)
|
||
record = result.scalar_one_or_none()
|
||
|
||
if record is None:
|
||
return None
|
||
|
||
return approval_record_to_request(record)
|
||
|
||
async def get_all_approvals(
|
||
self,
|
||
status: ApprovalStatus | None = None,
|
||
incident_id: str | None = None,
|
||
limit: int = 50,
|
||
offset: int = 0,
|
||
) -> list[ApprovalRequest]:
|
||
"""
|
||
取得所有授權請求 (Phase 6.4h)
|
||
|
||
Args:
|
||
status: 狀態篩選 (可選)
|
||
incident_id: Incident ID 篩選 (可選)
|
||
limit: 每頁數量
|
||
offset: 偏移量
|
||
|
||
Returns:
|
||
ApprovalRequest 清單
|
||
"""
|
||
async with get_db_context() as db:
|
||
query = select(ApprovalRecord)
|
||
|
||
# 狀態篩選
|
||
if status is not None:
|
||
query = query.where(ApprovalRecord.status == status)
|
||
|
||
# 2026-04-09 Claude Sonnet 4.6: 修復 incident_id 篩選 — 直接用 DB 欄位
|
||
# 舊版在應用層查 a.metadata.get("incident_id") 但 ApprovalRecord.incident_id
|
||
# 是直接欄位,不在 extra_metadata JSON 裡,導致 telegram_approval_not_found_by_incident
|
||
if incident_id:
|
||
query = query.where(ApprovalRecord.incident_id == incident_id)
|
||
|
||
query = query.order_by(ApprovalRecord.created_at.desc())
|
||
query = query.offset(offset).limit(limit)
|
||
|
||
result = await db.execute(query)
|
||
records = result.scalars().all()
|
||
|
||
approvals = [approval_record_to_request(r) for r in records]
|
||
|
||
return approvals
|
||
|
||
|
||
# =============================================================================
|
||
# Timeline Event Service
|
||
# =============================================================================
|
||
|
||
class TimelineDBService:
|
||
"""
|
||
時間軸事件服務 - Phase 4 Action Timeline 持久化
|
||
"""
|
||
|
||
async def add_event(
|
||
self,
|
||
event_type: str,
|
||
status: str,
|
||
title: str,
|
||
description: str | None = None,
|
||
actor: str | None = None,
|
||
actor_role: str | None = None,
|
||
risk_level: str | None = None,
|
||
approval_id: str | None = None,
|
||
incident_id: str | None = None,
|
||
) -> dict[str, Any]:
|
||
"""
|
||
新增時間軸事件
|
||
"""
|
||
async with get_db_context() as db:
|
||
event = TimelineEvent(
|
||
event_type=event_type,
|
||
status=status,
|
||
title=title,
|
||
description=description,
|
||
actor=actor,
|
||
actor_role=actor_role,
|
||
risk_level=risk_level,
|
||
approval_id=approval_id,
|
||
incident_id=incident_id,
|
||
)
|
||
db.add(event)
|
||
await db.flush()
|
||
await db.refresh(event)
|
||
|
||
logger.info(
|
||
"timeline_event_added",
|
||
id=event.id,
|
||
type=event_type,
|
||
title=title,
|
||
incident_id=incident_id,
|
||
)
|
||
|
||
return {
|
||
"id": event.id,
|
||
"type": event.event_type,
|
||
"status": event.status,
|
||
"title": event.title,
|
||
"incident_id": event.incident_id,
|
||
"created_at": event.created_at.isoformat(),
|
||
}
|
||
|
||
async def get_events(
|
||
self,
|
||
limit: int = 50,
|
||
incident_id: str | None = None,
|
||
approval_ids: list[str] | None = None,
|
||
) -> list[dict[str, Any]]:
|
||
"""
|
||
取得最近的時間軸事件
|
||
"""
|
||
async with get_db_context() as db:
|
||
query = select(TimelineEvent)
|
||
if incident_id:
|
||
from sqlalchemy import or_
|
||
filters = [TimelineEvent.incident_id == incident_id]
|
||
if approval_ids:
|
||
filters.append(TimelineEvent.approval_id.in_(approval_ids))
|
||
query = query.where(or_(*filters))
|
||
result = await db.execute(query.order_by(TimelineEvent.created_at.desc()).limit(limit))
|
||
events = result.scalars().all()
|
||
|
||
return [
|
||
{
|
||
"id": e.id,
|
||
"type": e.event_type,
|
||
"status": e.status,
|
||
"title": e.title,
|
||
"description": e.description,
|
||
"actor": e.actor,
|
||
"actor_role": e.actor_role,
|
||
"risk_level": e.risk_level,
|
||
"approval_id": e.approval_id,
|
||
"incident_id": e.incident_id,
|
||
"created_at": e.created_at.isoformat(),
|
||
}
|
||
for e in events
|
||
]
|
||
|
||
|
||
# =============================================================================
|
||
# Singleton Instances
|
||
# =============================================================================
|
||
|
||
_approval_service: ApprovalDBService | None = None
|
||
_timeline_service: TimelineDBService | None = None
|
||
|
||
|
||
def get_approval_service(use_repository: bool = False) -> ApprovalDBService:
|
||
"""
|
||
取得授權服務實例
|
||
|
||
Args:
|
||
use_repository: 是否使用 Repository 層 (Phase 16 R3.4)
|
||
|
||
Phase 16: 絞殺者模式
|
||
- use_repository=False: 使用內嵌 DB 操作 (預設,向下相容)
|
||
- use_repository=True: 使用 ApprovalDBRepository
|
||
"""
|
||
global _approval_service
|
||
if _approval_service is None:
|
||
if use_repository:
|
||
from src.repositories import get_approval_repository
|
||
_approval_service = ApprovalDBService(repository=get_approval_repository())
|
||
logger.info("approval_service_with_repository")
|
||
else:
|
||
_approval_service = ApprovalDBService()
|
||
return _approval_service
|
||
|
||
|
||
def get_timeline_service() -> TimelineDBService:
|
||
"""取得時間軸服務實例"""
|
||
global _timeline_service
|
||
if _timeline_service is None:
|
||
_timeline_service = TimelineDBService()
|
||
return _timeline_service
|