feat(trust): ADR-088 Trust Score 持久化 — L4 自動放行核心
All checks were successful
CD Pipeline / build-and-deploy (push) Successful in 10m40s

TrustScoreManager 從記憶體升級為 PostgreSQL 持久化,
Pod 重啟後信任分數不再歸零,AI 能真正累積到 L4 自動放行門檻。

變更:
- migrations/adr088_trust_score_persistence.sql: trust_records 表
- db/models.py: TrustRecordDB ORM model
- repositories/interfaces.py: ITrustRepository Protocol
- repositories/trust_repository.py: PG upsert ON CONFLICT DO UPDATE
- services/trust_engine.py: bulk_load() 啟動 warm-up
- services/learning_service.py: _persist_trust() + 2 call sites
- main.py: 啟動時 load_all() → bulk_load()

流程: 批准 5 次 → score=5 寫入 DB → Pod 重啟 → warm-up 讀回
      → evaluate_adjusted_risk MEDIUM→LOW → 自動執行

2026-04-17 ogt + Claude Sonnet 4.6(亞太): ADR-088

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-04-17 16:14:33 +08:00
parent 148d59a0e4
commit 9d6aa7ea45
7 changed files with 353 additions and 1 deletions

View File

@@ -0,0 +1,24 @@
-- ADR-088: Trust Score 持久化
-- Phase 4+: TrustScoreManager 從記憶體升級為 PostgreSQL 持久化
-- 解決問題: Pod 重啟後 AI 信任分數歸零,永遠無法累積到 L4 自動放行門檻
-- 2026-04-17 ogt + Claude Sonnet 4.6(亞太)
CREATE TABLE IF NOT EXISTS trust_records (
action_pattern VARCHAR(255) PRIMARY KEY,
score INTEGER NOT NULL DEFAULT 0,
total_approvals INTEGER NOT NULL DEFAULT 0,
total_rejections INTEGER NOT NULL DEFAULT 0,
last_approval_by VARCHAR(100),
last_approval_at TIMESTAMPTZ,
last_rejection_by VARCHAR(100),
last_rejection_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
COMMENT ON TABLE trust_records IS
'ADR-088: TrustScoreManager 持久化層。記錄每個 action_pattern 的累積信任分數,'
'跨 Pod 重啟存活。score >= 5 → MEDIUM 自動降 LOWscore >= 10 → HIGH 降 MEDIUM。';
CREATE INDEX IF NOT EXISTS ix_trust_records_score ON trust_records (score DESC);
CREATE INDEX IF NOT EXISTS ix_trust_records_updated ON trust_records (updated_at DESC);

View File

@@ -1175,3 +1175,56 @@ class AiGovernanceEvent(Base):
Index("ix_ai_governance_triggered_at", "triggered_at"),
Index("ix_ai_governance_resolved", "resolved"),
)
# =============================================================================
# TrustRecordDB - ADR-088 TrustScore 持久化
# =============================================================================
class TrustRecordDB(Base):
"""
Trust Score 持久化記錄
ADR-088: TrustScoreManager 從記憶體升級為 PostgreSQL 持久化。
Pod 重啟後分數不歸零AI 能真正累積信任達到 L4 自動放行。
score >= 5: MEDIUM → LOW (自動執行)
score >= 10: HIGH → MEDIUM (降一級)
2026-04-17 ogt + Claude Sonnet 4.6(亞太): Phase 4 信任持久化
"""
__tablename__ = "trust_records"
action_pattern: Mapped[str] = mapped_column(
String(255), primary_key=True,
comment="操作模式,例如 delete:nginx-frontend-*"
)
score: Mapped[int] = mapped_column(
Integer, nullable=False, default=0,
comment="累積信任分數。+1/approvereject 歸零"
)
total_approvals: Mapped[int] = mapped_column(
Integer, nullable=False, default=0,
)
total_rejections: Mapped[int] = mapped_column(
Integer, nullable=False, default=0,
)
last_approval_by: Mapped[str | None] = mapped_column(String(100), nullable=True)
last_approval_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True,
)
last_rejection_by: Mapped[str | None] = mapped_column(String(100), nullable=True)
last_rejection_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True,
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False, default=taipei_now,
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False, default=taipei_now, onupdate=taipei_now,
)
__table_args__ = (
Index("ix_trust_records_score", "score"),
Index("ix_trust_records_updated", "updated_at"),
)

View File

@@ -302,6 +302,19 @@ async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
except Exception as e:
logger.warning("working_memory_warmup_failed", error=str(e))
# ADR-088: Trust Score 冷啟動 — 從 PostgreSQL 恢復信任分數
# 解決: Pod 重啟後 TrustScoreManager 記憶歸零AI 永遠無法累積到 L4 自動放行
# 2026-04-17 ogt + Claude Sonnet 4.6(亞太): Phase 4 信任持久化
try:
from src.repositories.trust_repository import get_trust_repository
from src.services.trust_engine import get_trust_manager
trust_records = await get_trust_repository().load_all()
loaded = get_trust_manager().bulk_load(trust_records)
logger.info("trust_scores_warmed_up", loaded=loaded)
except Exception as e:
logger.warning("trust_scores_warmup_failed", error=str(e))
# Phase 4 飛輪修復: Playbook Embedding 冷啟動索引
# 2026-04-10 Claude Sonnet 4.6 Asia/Taipei
# 目的: 確保 playbook_embeddings 表有最新向量,供語義相似度查詢

View File

@@ -13,6 +13,7 @@ Phase 16 R3: Repository 層 Protocol 介面
建立者: Claude Code (Phase 16 架構重構)
"""
from datetime import datetime
from typing import Protocol, runtime_checkable
from uuid import UUID
@@ -639,3 +640,40 @@ class IFailureWatcher(Protocol):
(success, result_message)
"""
...
@runtime_checkable
class ITrustRepository(Protocol):
"""
Trust Repository Protocol
職責: TrustRecord 持久化 (PostgreSQL)
實作: TrustRepository
ADR-088: TrustScoreManager 持久化層
2026-04-17 ogt + Claude Sonnet 4.6(亞太): Phase 4 信任持久化
"""
async def upsert(
self,
action_pattern: str,
score: int,
total_approvals: int,
total_rejections: int,
last_approval_by: str | None = None,
last_approval_at: datetime | None = None,
last_rejection_by: str | None = None,
last_rejection_at: datetime | None = None,
) -> bool:
"""INSERT or UPDATE trust record (upsert by action_pattern)"""
...
async def load_all(self) -> list[dict]:
"""
載入所有 trust records 供啟動 warm-up
Returns:
list[{action_pattern, score, total_approvals, total_rejections,
last_approval_by, last_approval_at, last_rejection_by, last_rejection_at}]
"""
...

View File

@@ -0,0 +1,145 @@
"""
Trust Repository - PostgreSQL 持久化層
======================================
ADR-088: TrustScoreManager 持久化升級
職責: trust_records 表的 upsert / load_all
設計: 實作 ITrustRepository Protocol
Pod 重啟後信任分數不歸零AI 能真正累積到 L4 自動放行門檻。
2026-04-17 ogt + Claude Sonnet 4.6(亞太): Phase 4 信任持久化
"""
from datetime import datetime
import structlog
from sqlalchemy import select
from sqlalchemy.dialects.postgresql import insert as pg_insert
from src.db.base import get_db_context
from src.db.models import TrustRecordDB
from src.repositories.interfaces import ITrustRepository
logger = structlog.get_logger(__name__)
class TrustRepository:
"""
Trust Repository 實作PostgreSQL
使用 PostgreSQL ON CONFLICT DO UPDATE (upsert)
保證並行安全且無競爭條件。
"""
async def upsert(
self,
action_pattern: str,
score: int,
total_approvals: int,
total_rejections: int,
last_approval_by: str | None = None,
last_approval_at: datetime | None = None,
last_rejection_by: str | None = None,
last_rejection_at: datetime | None = None,
) -> bool:
"""
INSERT or UPDATE trust record。
使用 PostgreSQL upsert 保證並行安全。
action_pattern 是主鍵,衝突時更新除 created_at 外的所有欄位。
"""
from src.utils.timezone import now_taipei
try:
async with get_db_context() as db:
stmt = pg_insert(TrustRecordDB).values(
action_pattern=action_pattern,
score=score,
total_approvals=total_approvals,
total_rejections=total_rejections,
last_approval_by=last_approval_by,
last_approval_at=last_approval_at,
last_rejection_by=last_rejection_by,
last_rejection_at=last_rejection_at,
created_at=now_taipei(),
updated_at=now_taipei(),
).on_conflict_do_update(
index_elements=["action_pattern"],
set_={
"score": score,
"total_approvals": total_approvals,
"total_rejections": total_rejections,
"last_approval_by": last_approval_by,
"last_approval_at": last_approval_at,
"last_rejection_by": last_rejection_by,
"last_rejection_at": last_rejection_at,
"updated_at": now_taipei(),
},
)
await db.execute(stmt)
logger.debug(
"trust_record_upserted",
action_pattern=action_pattern,
score=score,
)
return True
except Exception as e:
logger.error(
"trust_record_upsert_failed",
action_pattern=action_pattern,
error=str(e),
)
return False
async def load_all(self) -> list[dict]:
"""
載入所有 trust records供啟動 warm-up。
Returns:
list[dict] — 每筆含 action_pattern, score, total_approvals,
total_rejections, last_approval_by, last_approval_at,
last_rejection_by, last_rejection_at
"""
try:
async with get_db_context() as db:
result = await db.execute(select(TrustRecordDB))
rows = result.scalars().all()
records = [
{
"action_pattern": r.action_pattern,
"score": r.score,
"total_approvals": r.total_approvals,
"total_rejections": r.total_rejections,
"last_approval_by": r.last_approval_by,
"last_approval_at": r.last_approval_at,
"last_rejection_by": r.last_rejection_by,
"last_rejection_at": r.last_rejection_at,
}
for r in rows
]
logger.info("trust_records_loaded", count=len(records))
return records
except Exception as e:
logger.error("trust_records_load_failed", error=str(e))
return []
# =============================================================================
# Singleton
# =============================================================================
_repository: TrustRepository | None = None
def get_trust_repository() -> ITrustRepository:
"""取得 TrustRepository 單例"""
global _repository
if _repository is None:
_repository = TrustRepository()
return _repository

View File

@@ -32,8 +32,9 @@ import structlog
from src.models.approval import ApprovalRequest
from src.models.incident import IncidentStatus
from src.repositories.interfaces import ILearningRepository
from src.repositories.interfaces import ILearningRepository, ITrustRepository
from src.repositories.learning_repository import get_learning_repository
from src.repositories.trust_repository import get_trust_repository
from src.services.trust_engine import get_trust_manager
logger = structlog.get_logger(__name__)
@@ -156,9 +157,11 @@ class LearningService:
def __init__(
self,
repository: ILearningRepository | None = None,
trust_repository: ITrustRepository | None = None,
):
self._trust_manager = get_trust_manager()
self._repository = repository or get_learning_repository()
self._trust_repo = trust_repository or get_trust_repository()
async def process_execution_result(
self,
@@ -200,6 +203,9 @@ class LearningService:
)
feedback_type = FeedbackType.EXECUTION_FAILURE
# ADR-088: 持久化信任分數到 PostgreSQL (Pod 重啟後不歸零)
await self._persist_trust(action_pattern)
# 取得更新後的信任分數
trust_record = self._trust_manager.get_trust_record(action_pattern)
trust_after = trust_record.score if trust_record else 0
@@ -325,6 +331,9 @@ class LearningService:
)
playbook_updated = await self._demote_playbook(feedback.incident_id)
# ADR-088: 持久化信任分數到 PostgreSQL (Pod 重啟後不歸零)
await self._persist_trust(action_pattern)
trust_record = self._trust_manager.get_trust_record(action_pattern)
trust_after = trust_record.score if trust_record else 0
@@ -955,6 +964,36 @@ class LearningService:
"learning_status": learning_status,
}
async def _persist_trust(self, action_pattern: str) -> None:
"""
将内存中的 TrustRecord 持久化到 PostgreSQL。
ADR-088: 每次 approve/reject 後同步寫入 DB
確保 Pod 重啟後信任分數不歸零。
2026-04-17 ogt + Claude Sonnet 4.6(亞太): Phase 4 信任持久化
"""
record = self._trust_manager.get_trust_record(action_pattern)
if not record:
return
try:
await self._trust_repo.upsert(
action_pattern=action_pattern,
score=record.score,
total_approvals=record.total_approvals,
total_rejections=record.total_rejections,
last_approval_by=record.last_approval_by,
last_approval_at=record.last_approval_at,
last_rejection_by=record.last_rejection_by,
last_rejection_at=record.last_rejection_at,
)
except Exception as e:
logger.warning(
"trust_persist_failed",
action_pattern=action_pattern,
error=str(e),
)
def _get_action_tier(self, action: str) -> int:
"""取得動作的 Tier"""
tier_actions = {

View File

@@ -362,6 +362,46 @@ class TrustScoreManager:
record.score = 0
logger.warning("[TrustEngine] All trust scores reset!")
def bulk_load(self, records: list[dict]) -> int:
"""
從 DB 批量載入 trust records 到記憶體(啟動 warm-up 用)。
ADR-088: Pod 重啟後從 PostgreSQL 恢復信任分數,
確保 AI 不會因重啟而失憶歸零。
Args:
records: list[dict] — 每筆含 action_pattern, score,
total_approvals, total_rejections,
last_approval_by, last_approval_at,
last_rejection_by, last_rejection_at
Returns:
int: 載入筆數
2026-04-17 ogt + Claude Sonnet 4.6(亞太): ADR-088
"""
loaded = 0
for r in records:
pattern = r.get("action_pattern")
if not pattern:
continue
record = TrustRecord(
action_pattern=pattern,
score=r.get("score", 0),
total_approvals=r.get("total_approvals", 0),
total_rejections=r.get("total_rejections", 0),
last_approval_by=r.get("last_approval_by"),
last_approval_at=r.get("last_approval_at"),
last_rejection_by=r.get("last_rejection_by"),
last_rejection_at=r.get("last_rejection_at"),
)
self._records[pattern] = record
loaded += 1
if loaded:
logger.info(f"[TrustEngine] Warm-up: loaded {loaded} trust records from DB")
return loaded
# ==================== Pattern Matching Utilities ====================