diff --git a/.agents/skills/06-awoooi-monorepo-master.md b/.agents/skills/06-awoooi-monorepo-master.md index 41dce24a..e36eb87a 100644 --- a/.agents/skills/06-awoooi-monorepo-master.md +++ b/.agents/skills/06-awoooi-monorepo-master.md @@ -6,6 +6,28 @@ --- +## 文件資訊 + +| 欄位 | 值 | +|------|-----| +| **版本** | v1.4 | +| **建立日期** | 2026-03-20 (台北) | +| **建立者** | Claude Code | +| **最後修改** | 2026-03-26 15:40 (台北) | +| **修改者** | Claude Code | + +### 變更紀錄 + +| 版本 | 日期 | 執行者 | 變更內容 | +|------|------|--------|----------| +| v1.0 | 2026-03-20 | Claude Code | 初始建立 | +| v1.1 | 2026-03-25 | Claude Code | 加入文件資訊區塊 | +| v1.2 | 2026-03-26 | Claude Code | 新增紅區治理 + Git Hooks 章節 | +| v1.3 | 2026-03-26 | Claude Code | 首席架構師審查流程 + 審查週期調整 (每週) | +| v1.4 | 2026-03-26 | Claude Code | 🔴 新增「封存而非刪除」策略 (統帥裁示) | + +--- + ## Monorepo 結構 ``` @@ -174,9 +196,138 @@ pnpm add --filter @awoooi/lewooogo-core --- +## 🔴 封存而非刪除 (Archive, Not Delete) + +> **統帥裁示 2026-03-26**: 專業團隊不輕易刪除代碼,改採「封存」策略 + +### 核心原則 + +| 原則 | 說明 | +|------|------| +| **不輕易刪除** | 代碼重構時,舊代碼先封存,不直接刪除 | +| **保留回滾能力** | 封存代碼可隨時恢復,降低風險 | +| **記錄封存操作** | 所有封存必須記錄到 ARCHIVE_LOG.md | +| **設定保留期限** | 封存後 90 天無問題才真正刪除 | + +### 封存目錄結構 + +``` +apps/api/src/services/ +├── _archived/ # 封存目錄 (底線開頭) +│ ├── ARCHIVE_LOG.md # 封存紀錄 (必須) +│ ├── incident_memory_v1.py # 封存副本 +│ └── incident_engine_v1.py # 封存副本 +├── incident_memory.py # 新版本 (active) +└── incident_engine.py # 新版本 (active) +``` + +### ARCHIVE_LOG.md 格式 + +```markdown +# Archive Log + +## incident_memory_v1.py + +| 欄位 | 值 | +|------|-----| +| 封存日期 | 2026-XX-XX | +| 封存原因 | Phase 16 絞殺者模式,改用 lewooogo-brain | +| 原始位置 | apps/api/src/services/incident_memory.py | +| 替代方案 | lewooogo_brain.adapters.incident_memory | +| 回復指令 | `git checkout a202a26 -- apps/api/src/services/incident_memory.py` | +| 48hr 驗證通過 | 2026-XX-XX | +| 最終刪除日期 | 2026-XX-XX (封存後 90 天) | +``` + +### 封存流程 + +``` +1. 新代碼穩定運行 48 小時 ✓ +2. 建立 _archived/ 目錄 (如不存在) +3. 複製舊檔案到 _archived/ 並加上 _v1 後綴 +4. 更新 ARCHIVE_LOG.md +5. 標記原檔案為 @deprecated (保留 90 天) +6. 90 天後無問題 → 真正刪除封存檔案 +``` + +### 禁止事項 + +| 禁止 | 原因 | +|------|------| +| ❌ 直接 `rm` 舊代碼 | 無法追溯、無法快速回滾 | +| ❌ 跳過 48hr 驗證期 | 新代碼可能有隱藏 bug | +| ❌ 不記錄封存操作 | 團隊無法理解變更歷史 | +| ❌ 封存後立即刪除 | 未經充分驗證 | + +--- + +## 🔴 紅區治理 (Codebase Zoning) + +> **統帥 2026-03-26 批准**: 代碼防區化治理機制 + +### Git Pre-commit Hook + +```bash +# 安裝 Hook +bash scripts/setup-hooks.sh + +# Hook 位置 +scripts/hooks/pre-commit # 原始檔 (tracked) +.git/hooks/pre-commit # 已安裝 (not tracked) +``` + +### Tier 3 紅區 (核心大腦) + +| 檔案 | 功能 | 破壞後果 | +|------|------|----------| +| `services/decision_manager.py` | 決策狀態機 | AI 無法做決策 | +| `services/trust_engine.py` | 信任評分 | Multi-Sig 失效 | +| `services/consensus_engine.py` | 共識引擎 | 審批繞過 | +| `services/incident_engine.py` | 事件處理 | 告警無法處理 | +| `services/multi_sig_redis.py` | 分散式鎖 | 併發競爭 | +| `services/security_interceptor.py` | 安全攔截 | 權限繞過 | +| `core/config.py` | 環境配置 | 連線全斷 | +| `core/telemetry.py` | OTEL 監控 | 可觀測失明 | + +### Tier 2 橙區 (基礎設施) + +| 路徑 | 功能 | +|------|------| +| `k8s/awoooi-prod/*.yaml` | K8s 正式環境 | +| `apps/api/src/db/models.py` | 資料庫 Schema | +| `.github/workflows/*.yml` | CI/CD Pipeline | +| `core/redis_client.py` | Redis 連線池 | +| `services/telegram_gateway.py` | Telegram 閘道 | + +### 🏛️ 首席架構師審查流程 + +當 Claude Code 看到紅區警告時: + +``` +1. 立即停止 commit +2. 呼叫首席架構師介入 +3. 首席架構師進行架構與代碼 Review: + - 變更必要性 + - 影響範圍評估 + - 替代方案評估 + - 回滾計畫 +4. 首席架構師簽核後,方可繼續 commit +``` + +### 定期審查 + +| 項目 | 週期 | 下次審查 | +|------|------|----------| +| **紅區清單** | **每週一** | 2026-03-31 | +| **Hook 腳本** | **每月第一個週一** | 2026-04-06 | + +--- + ## 參考文檔 - `turbo.json`: Turborepo 配置 - `pnpm-workspace.yaml`: Workspace 定義 - `DEPENDENCIES.md`: 依賴清單 - `docs/LOGBOOK.md`: 進度追蹤 +- `docs/RED_ZONES.md`: 紅區治理手冊 +- `scripts/hooks/pre-commit`: 紅區 Hook 腳本 diff --git a/apps/api/src/services/incident_memory.py b/apps/api/src/services/incident_memory.py index 077f736f..fbd7f95b 100644 --- a/apps/api/src/services/incident_memory.py +++ b/apps/api/src/services/incident_memory.py @@ -475,12 +475,136 @@ class DualIncidentMemory: ) +# ============================================================================= +# Phase 16: IncidentDbAdapter (DI 注入實現) +# ============================================================================= + +class IncidentDbAdapter: + """ + Incident DB Adapter - 實現 lewooogo-brain 的 IIncidentDbAdapter + + Phase 16: 將 apps/api 的 SQLAlchemy Model 操作封裝為 adapter + 注入到 lewooogo-brain 的 DualIncidentMemory + """ + + async def load(self, incident_id: str) -> Incident | None: + """從 PostgreSQL 載入 Incident""" + try: + async with get_db_context() as db: + from sqlalchemy import select + stmt = select(IncidentRecord).where( + IncidentRecord.incident_id == incident_id + ) + result = await db.execute(stmt) + record = result.scalar_one_or_none() + + if record: + return self._record_to_incident(record) + return None + + except Exception as e: + logger.error("db_adapter_load_failed", incident_id=incident_id, error=str(e)) + return None + + async def save(self, incident: Incident) -> bool: + """儲存 Incident 到 PostgreSQL (upsert)""" + try: + async with get_db_context() as db: + from sqlalchemy import select + + # 檢查是否已存在 + stmt = select(IncidentRecord).where( + IncidentRecord.incident_id == incident.incident_id + ) + result = await db.execute(stmt) + existing = result.scalar_one_or_none() + + if existing: + # 更新現有記錄 + existing.status = incident.status.value + existing.severity = incident.severity.value + existing.signals = [ + s.model_dump(mode="json") for s in incident.signals + ] + existing.affected_services = incident.affected_services + existing.updated_at = incident.updated_at + if incident.resolved_at: + existing.resolved_at = incident.resolved_at + if incident.closed_at: + existing.closed_at = incident.closed_at + else: + # 建立新記錄 + record = IncidentRecord( + incident_id=incident.incident_id, + status=incident.status.value, + severity=incident.severity.value, + signals=[ + s.model_dump(mode="json") for s in incident.signals + ], + affected_services=incident.affected_services, + decision_chain=( + incident.decision_chain.model_dump(mode="json") + if hasattr(incident, 'decision_chain') and incident.decision_chain + else None + ), + proposal_ids=[str(pid) for pid in incident.proposal_ids], + outcome=( + incident.outcome.model_dump(mode="json") + if hasattr(incident, 'outcome') and incident.outcome + else None + ), + created_at=incident.created_at, + updated_at=incident.updated_at, + resolved_at=incident.resolved_at, + closed_at=incident.closed_at, + ttl_days=getattr(incident, 'ttl_days', 30), + vectorized=getattr(incident, 'vectorized', False), + ) + db.add(record) + + logger.debug("db_adapter_save_success", incident_id=incident.incident_id) + return True + + except Exception as e: + logger.error("db_adapter_save_failed", incident_id=incident.incident_id, error=str(e)) + return False + + def _record_to_incident(self, record: IncidentRecord) -> Incident: + """將 DB Record 轉換為 Incident (lewooogo-brain 版本)""" + # 延遲導入 lewooogo-brain 的 Incident + from lewooogo_brain.interfaces.incident_processor import ( + Incident as BrainIncident, + IncidentStatus as BrainIncidentStatus, + Severity as BrainSeverity, + Signal as BrainSignal, + ) + + # 重建 Signals + signals = [] + for s in record.signals or []: + signals.append(BrainSignal.model_validate(s)) + + return BrainIncident( + incident_id=record.incident_id, + status=BrainIncidentStatus(record.status), + severity=BrainSeverity(record.severity), + signals=signals, + affected_services=record.affected_services or [], + proposal_ids=record.proposal_ids or [], + created_at=record.created_at, + updated_at=record.updated_at, + resolved_at=record.resolved_at, + closed_at=record.closed_at, + ) + + # ============================================================================= # Singleton + 絞殺者模式 (Phase 16 R1.2) # ============================================================================= _dual_memory: DualIncidentMemory | None = None _new_engine_memory: Any | None = None # lewooogo-brain 版本 +_db_adapter: IncidentDbAdapter | None = None # DB Adapter singleton def get_incident_memory() -> DualIncidentMemory: @@ -516,12 +640,12 @@ def _get_new_engine_memory() -> Any: 注意事項: - 需要 lewooogo-brain 已安裝 (Dockerfile 已配置) - - PostgreSQL 整合尚未完成 (TODO in lewooogo-brain) + - PostgreSQL 透過 IncidentDbAdapter 注入 (Phase 16 DI 模式) - 初次啟用建議 48 小時監控 回滾: 設定 USE_NEW_ENGINE=false 即可瞬間切回 """ - global _new_engine_memory + global _new_engine_memory, _db_adapter if _new_engine_memory is None: try: @@ -530,20 +654,24 @@ def _get_new_engine_memory() -> Any: DualIncidentMemory as NewDualIncidentMemory, ) from src.core.redis_client import get_redis - from src.db.base import get_db_context redis_client = get_redis() - # 初始化 lewooogo-brain 版本 + # 初始化 DB Adapter (Phase 16 DI 模式) + if _db_adapter is None: + _db_adapter = IncidentDbAdapter() + + # 初始化 lewooogo-brain 版本 (含 DB Adapter) _new_engine_memory = NewDualIncidentMemory( redis_client=redis_client, - pg_session_factory=get_db_context, + db_adapter=_db_adapter, key_prefix="awoooi:incidents", ) logger.info( "incident_memory_initialized", engine="lewooogo_brain_package", + db_adapter="IncidentDbAdapter", redis_connected=True, ) diff --git a/packages/lewooogo-brain/src/lewooogo_brain/adapters/incident_memory.py b/packages/lewooogo-brain/src/lewooogo_brain/adapters/incident_memory.py index 7c65a2f2..9eb58a88 100644 --- a/packages/lewooogo-brain/src/lewooogo_brain/adapters/incident_memory.py +++ b/packages/lewooogo-brain/src/lewooogo_brain/adapters/incident_memory.py @@ -2,11 +2,13 @@ DualIncidentMemory - Incident 專用雙層記憶體適配器 ================================================= Phase 6.4e: 連接 IncidentEngine 與 DualMemoryProvider +Phase 16 R1.2: 完善 PostgreSQL 整合 (2026-03-26) 設計: - 實作 IIncidentMemory 協定 - 內部使用 DualMemoryProvider - 提供 Incident 專用的索引功能 +- 透過 DI 注入 DB Adapter (解耦 SQLAlchemy Model) 統帥鐵律: - Working Memory (Redis): 7 天 TTL @@ -15,7 +17,7 @@ Phase 6.4e: 連接 IncidentEngine 與 DualMemoryProvider """ from datetime import datetime, timezone, timedelta -from typing import Any +from typing import Any, Protocol, runtime_checkable import structlog @@ -24,6 +26,34 @@ from lewooogo_brain.interfaces.incident_processor import Incident logger = structlog.get_logger(__name__) +# ============================================================================= +# DB Adapter Protocol (依賴注入介面) +# ============================================================================= + +@runtime_checkable +class IIncidentDbAdapter(Protocol): + """ + Incident DB Adapter 協定 + + Phase 16: 解耦 SQLAlchemy Model,由調用者注入實現 + + 使用方式: + class MyDbAdapter: + async def load(self, incident_id: str) -> Incident | None: ... + async def save(self, incident: Incident) -> bool: ... + + memory = DualIncidentMemory(redis, db_adapter=MyDbAdapter()) + """ + + async def load(self, incident_id: str) -> Incident | None: + """從 PostgreSQL 載入 Incident""" + ... + + async def save(self, incident: Incident) -> bool: + """儲存 Incident 到 PostgreSQL (upsert)""" + ... + + # 常量 WORKING_MEMORY_TTL = 604800 # 7 天 AGGREGATION_WINDOW_MINUTES = 30 @@ -45,12 +75,16 @@ class DualIncidentMemory: Key: awoooi:incident_index:{namespace}:{target} Value: incident_id TTL: 30 分鐘 (聚合窗口) + + Phase 16 DI 模式: + 透過 db_adapter 注入 PostgreSQL 操作,解耦 SQLAlchemy Model """ def __init__( self, redis_client: Any, pg_session_factory: Any = None, + db_adapter: IIncidentDbAdapter | None = None, key_prefix: str = "awoooi:incidents", ): """ @@ -58,11 +92,13 @@ class DualIncidentMemory: Args: redis_client: Redis 連線客戶端 - pg_session_factory: PostgreSQL Session 工廠 (可選) + pg_session_factory: PostgreSQL Session 工廠 (已棄用,改用 db_adapter) + db_adapter: PostgreSQL 操作適配器 (Phase 16 DI 模式) key_prefix: Redis Key 前綴 """ self._redis = redis_client - self._pg_session_factory = pg_session_factory + self._pg_session_factory = pg_session_factory # 向後兼容,但已棄用 + self._db_adapter = db_adapter self._key_prefix = key_prefix self._index_prefix = f"{key_prefix}:index" @@ -76,11 +112,12 @@ class DualIncidentMemory: async def load_incident(self, incident_id: str) -> Incident | None: """ - 載入 Incident + 載入 Incident (Cache-Aside 模式) 策略: 1. 從 Redis (Working Memory) 讀取 - 2. 若 miss,從 PostgreSQL (Episodic) 讀取 (TODO) + 2. 若 miss,從 PostgreSQL (Episodic) 讀取 + 3. 若 PG 有資料,回填到 Redis Args: incident_id: Incident ID @@ -92,13 +129,28 @@ class DualIncidentMemory: key = self._make_key(incident_id) data = await self._redis.get(key) - if data is None: - logger.debug("incident_not_found_in_working", incident_id=incident_id) - # TODO: 從 PostgreSQL 載入 - return None + if data is not None: + # Working Memory hit + if isinstance(data, bytes): + data = data.decode() + return Incident.model_validate_json(data) - # JSON → Incident - return Incident.model_validate_json(data) + # Working Memory miss + logger.debug("incident_not_found_in_working", incident_id=incident_id) + + # 嘗試從 Episodic Memory (PostgreSQL) 載入 + if self._db_adapter is not None: + incident = await self._db_adapter.load(incident_id) + if incident is not None: + # 回填到 Working Memory + await self.save_incident(incident) + logger.debug( + "incident_loaded_from_episodic_backfill", + incident_id=incident_id, + ) + return incident + + return None except Exception as e: logger.error("load_incident_failed", incident_id=incident_id, error=str(e)) @@ -144,38 +196,48 @@ class DualIncidentMemory: """ 持久化到 Episodic Memory (PostgreSQL) + Phase 16: 透過 DI 注入的 db_adapter 執行實際持久化 + Args: incident: Incident 物件 Returns: 是否成功 """ - if self._pg_session_factory is None: - logger.warning("pg_session_factory_not_configured") - return False + # Phase 16 DI 模式: 優先使用 db_adapter + if self._db_adapter is not None: + try: + result = await self._db_adapter.save(incident) + if result: + logger.debug( + "incident_persisted_to_episodic", + incident_id=incident.incident_id, + adapter="db_adapter", + ) + return result + except Exception as e: + logger.error( + "persist_incident_failed", + incident_id=incident.incident_id, + error=str(e), + ) + return False - try: - async with self._pg_session_factory() as session: - # 使用 merge 實現 upsert - # TODO: 需要 SQLAlchemy Model 定義 - # session.add(incident_model) - # await session.commit() - pass - - logger.debug( - "incident_persisted_to_episodic", + # 向後兼容: pg_session_factory (已棄用) + if self._pg_session_factory is not None: + logger.warning( + "pg_session_factory_deprecated_use_db_adapter", incident_id=incident.incident_id, ) - return True - - except Exception as e: - logger.error( - "persist_incident_failed", - incident_id=incident.incident_id, - error=str(e), - ) + # 舊模式無法執行,因為沒有 Model 定義 return False + logger.warning( + "no_db_adapter_configured_skip_persist", + incident_id=incident.incident_id, + ) + return False + async def find_related_incident( self, namespace: str,