fix(governance): 修治理告警 4 個 silent failure + Prom sentinel 連鎖

【全景檢測：12-agent 並行掃描定位 4 大 bug 與 1 個 P0 連鎖回歸】 Bug 1（P0 silent failure）— governance_agent.check_trust_drift 原 `await db.commit()` 縮排錯在 async with 區塊外（8 空格 vs 12）， session 已 auto-commit 關閉，二次 commit 拋 InvalidRequestError 被吞， governance_trust_drift_auto_deprecated log 從不出現。修：commit/log 移回 with 內。附 AST regression guard test 擋退化。 Bug 2 — flywheel_stats_service / W-3 fresh deploy 假告警 Redis 空時 total_exec=0 → rate=0.0 → watchdog `< 0.30` 立即觸發「飛輪成功率 0%」假告警。修：total_exec < FLYWHEEL_MIN_SAMPLE(10) 回 None， watchdog 判 None 跳過 W-3。Prometheus sentinel 用 NaN（非 -1.0）避免觸發 ops/monitoring/alerts.yml:775 等 3 份 prom rule 的 `< 0.1` 條件造成 2h 後假告警連鎖。前端 type 同步 number | null。 Bug 3 — failover_alerter dedup key 原 key 只看 event_type 不看 payload，trust_drift 4→25 IDs 變動全被 1h dedup 吞掉。修：dedup key 加 sha256(impact subdict)[:8]，event_type sanitize 防特殊字元污染 Redis key。 Bug 4 — ai_slo_watchdog_job W-4 evolver 全封存初始化誤報原邏輯 approved==0 即告警，未排除「playbooks 表初始化中」場景。修：_count_approved_playbooks 回 (approved, total)，total==0 → skip。【執行結果】 - 39 個相關 unit test 全過（test_failover_alerter / test_governance_agent / test_trust_drift_watchdog / test_check_trust_drift_commit_outside_context_poc） - 6 個關鍵路徑實測：NaN sentinel / float 渲染 / hash 區分性 / dedup 同 impact 相同 hash / datetime 容錯 / 4 檔 py_compile 全過【調度教訓 — 留作未來改進】 - 12-agent 並行調度時，vuln-verifier 與 fullstack-engineer 競態導致 vuln-verifier 讀到已修代碼誤判 NOT REPRODUCIBLE。未來：vuln-verifier 應在 fullstack 之前執行，或用 git show HEAD~1 對比修復前。 - fullstack-engineer 引入 P0 regression（f-string 內嵌 ternary 非法 format spec）， critic 抓到 + Prom sentinel 連鎖 — 證明 critic 審查必要不可省。 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 00:18:57 +08:00
parent 314cb0e079
commit f1362fcc8d
6 changed files with 349 additions and 24 deletions
--- a/apps/api/src/jobs/ai_slo_watchdog_job.py
+++ b/apps/api/src/jobs/ai_slo_watchdog_job.py
@@ -83,18 +83,29 @@ async def _check_once() -> None:
        logger.warning("watchdog_w2_tg_silence_check_failed", error=str(e))

    # W-3: 飛輪執行成功率過低
+    # 2026-05-02 ogt + Claude Sonnet 4.6 — Bug 2 修復（fresh deploy 假告警）
+    # execution_success_rate=None 代表樣本不足（total_exec < FLYWHEEL_MIN_SAMPLE），
+    # 跳過本次 W-3 檢查，避免每次 restart / fresh deploy 必噴「飛輪成功率 0%」假告警
    try:
        from src.services.flywheel_stats_service import FlywheelStatsService
        metrics = await FlywheelStatsService().compute()
-        if metrics and metrics.execution_success_rate < _FLYWHEEL_SUCCESS_MIN:
+        if metrics and metrics.execution_success_rate is None:
+            logger.debug("watchdog_w3_skipped_insufficient_sample", reason="execution_sample_below_min")
+        elif metrics and metrics.execution_success_rate < _FLYWHEEL_SUCCESS_MIN:
            violations.append(f"飛輪執行成功率 {metrics.execution_success_rate:.1%} < {_FLYWHEEL_SUCCESS_MIN:.0%}")
    except Exception as e:
        logger.warning("watchdog_w3_flywheel_check_failed", error=str(e))

    # W-4: 無 APPROVED Playbook（自動修復鏈路斷裂）
+    # 2026-05-02 ogt + Claude Sonnet 4.6 — Bug 4 修復（全封存初始化誤報）
+    # 原邏輯：approved==0 即告警，未排除「playbooks 表本身為空」的初始化 / migration 場景
+    # 修法：先查 total count，total==0 表示表初始化中 → skip 並 log；
+    #       total>0 且 approved==0 才是真正的「全封存」斷鏈告警
    try:
-        approved_count = await _count_approved_playbooks()
-        if approved_count == 0:
+        approved_count, total_playbook_count = await _count_approved_playbooks()
+        if total_playbook_count == 0:
+            logger.info("watchdog_w4_skipped_empty_table", reason="playbook_table_empty_likely_initializing")
+        elif approved_count == 0:
            violations.append("無 APPROVED Playbook — 自動修復鏈路斷裂（evolver 可能全部封存）")
    except Exception as e:
        logger.warning("watchdog_w4_playbook_check_failed", error=str(e))
@@ -215,14 +226,26 @@ async def _count_pending_no_tg_sent() -> int:
    return len(rows)


-async def _count_approved_playbooks() -> int:
-    """查詢 APPROVED 狀態 Playbook 數量，為 0 代表自動修復鏈路斷裂。"""
+async def _count_approved_playbooks() -> tuple[int, int]:
+    """查詢 APPROVED Playbook 數量 + 全表總數，兩者均回傳。
+
+    2026-05-02 ogt + Claude Sonnet 4.6 — Bug 4 修復（全封存初始化誤報）
+    加回傳 total count：若 total==0 代表表初始化中，W-4 應 skip 而非告警。
+    回傳：(approved_count, total_count)
+    """
    from sqlalchemy import text as sa_text
    async with get_db_context() as db:
-        result = await db.execute(
+        approved_result = await db.execute(
            sa_text("SELECT COUNT(*) FROM playbooks WHERE status = 'approved'")
        )
-        return result.scalar() or 0
+        approved = approved_result.scalar() or 0
+
+        total_result = await db.execute(
+            sa_text("SELECT COUNT(*) FROM playbooks")
+        )
+        total = total_result.scalar() or 0
+
+    return approved, total


 async def _count_pending_stuck_analysis() -> int:
--- a/apps/api/src/services/failover_alerter.py
+++ b/apps/api/src/services/failover_alerter.py
@@ -10,6 +10,8 @@

 from __future__ import annotations

+import hashlib
+import json
 from datetime import datetime, timezone, timedelta
 from typing import Any

@@ -96,8 +98,27 @@ class FailoverAlerter:
        dedup TTL 3600s — 同類告警 1 小時內不重複發送

        2026-04-26 P2.2 by Claude
+        2026-05-02 ogt + Claude Sonnet 4.6 — Bug 3 修復：dedup key 加 payload hash
+          原 key 只看 event_type，不看 payload 內容，導致同 event_type 但不同影響
+          的告警（例如：trust_drift 4 條→25 條漂移）全被 1h dedup 吃掉。
+
+        2026-05-02 ogt + Claude Opus 4.7 — critic P1-3 連鎖修復
+          前次只 hash 頂層 allowlist 欄位，對 slo_*_violation / governance_self_failure
+          等只把 metric 放在 impact subdict 的事件失效（hash 永遠相同）。
+          改 hash 整個 impact subdict — schema 強制 5 種 event type 都有 impact，
+          各自的 metric 值都會反映在 hash 裡，數值變動就會繞過 dedup。
+          sha256 取代 md5 避開 bandit B324 lint warning（非密碼學用途）。
        """
-        dedup_key = f"alert:governance:{event_type}"
+        # sanitize：防 SLO 名稱（如 "slo_km_growth_rate"）含 ":" 或空格污染 key
+        safe_event_type = event_type.replace(":", "_").replace(" ", "_").lower()
+
+        # impact hash：hash payload.impact subdict（schema 強制存在；含各 event 的 metric 值）
+        # default=str 容錯 datetime / Decimal / 其他非原生 JSON 型別
+        impact = payload.get("impact", {}) if isinstance(payload, dict) else {}
+        _payload_hash = hashlib.sha256(
+            json.dumps(impact, sort_keys=True, default=str).encode()
+        ).hexdigest()[:8]
+        dedup_key = f"alert:governance:{safe_event_type}:{_payload_hash}"
        if not await self._check_dedup(dedup_key, ttl=3600):
            logger.debug("governance_alert_dedup_skipped", event_type=event_type)
            return
--- a/apps/api/src/services/flywheel_stats_service.py
+++ b/apps/api/src/services/flywheel_stats_service.py
@@ -35,6 +35,13 @@ logger = structlog.get_logger(__name__)
 # Redis key prefix（與 playbook_repository.py 一致）
 _PLAYBOOK_KEY_PREFIX = "playbook:"

+# 2026-05-02 ogt + Claude Sonnet 4.6 — Bug 2 修復（W-3 fresh deploy 假告警）
+# execution_success_rate 需要最少樣本數才有統計意義；
+# Redis 空（fresh deploy / restart）時 total_exec=0 → rate=0.0 → watchdog W-3 立即觸發假告警
+# 修法：total_exec < FLYWHEEL_MIN_SAMPLE 時回 None，watchdog 判 None 跳過 W-3 檢查
+# TODO: 未來移至 settings（目前 hardcode 以避免 config 改動超出本輪範圍）
+FLYWHEEL_MIN_SAMPLE = 10
+
 # 飛輪六節點名稱
 FLYWHEEL_NODES = [
    "monitoring",
@@ -57,7 +64,7 @@ class FlywheelMetrics:
    def __init__(
        self,
        playbook_count: int,
-        execution_success_rate: float,
+        execution_success_rate: float | None,
        km_unvectorized_count: int,
        alertname_null_rate: float,
        incidents_stuck: int,
@@ -68,6 +75,9 @@ class FlywheelMetrics:
        current_flow: list[dict[str, Any]],
        computed_at: datetime,
    ) -> None:
+        # 2026-05-02 ogt + Claude Sonnet 4.6 — Bug 2 修復
+        # execution_success_rate 為 None 時表示樣本不足（< FLYWHEEL_MIN_SAMPLE），
+        # watchdog W-3 應跳過該檢查，避免 fresh deploy 假告警
        self.playbook_count = playbook_count
        self.execution_success_rate = execution_success_rate
        self.km_unvectorized_count = km_unvectorized_count
@@ -84,14 +94,25 @@ class FlywheelMetrics:
    def to_prometheus_lines(self) -> str:
        """輸出 Prometheus text format"""
        ts = int(self.computed_at.timestamp() * 1000)
+        # 2026-05-02 ogt + Claude Opus 4.7 — Bug 2 後續修復（critic P0-1 連鎖修復）
+        # sentinel 用 NaN 而非 -1.0：Prometheus 對 NaN 比較永遠回 false，
+        # 既有 alert rule `awoooi_flywheel_execution_success_rate < 0.1` 自然不會被
+        # sentinel 觸發；同時 Grafana 渲染為「無資料」gap，比 -1 spike 直觀。
+        # 前次嘗試 -1.0 會讓 ops/monitoring/alerts.yml:775 等 3 份 prom rule
+        # 在 fresh deploy 後 2h 必噴 FlywheelExecutionSuccessLow 假告警，跟 watchdog skip 自相矛盾。
+        rate_str = (
+            f"{self.execution_success_rate:.4f}"
+            if self.execution_success_rate is not None
+            else "NaN"
+        )
        lines = [
            "# HELP awoooi_flywheel_playbook_count Total approved playbooks in Redis",
            "# TYPE awoooi_flywheel_playbook_count gauge",
            f"awoooi_flywheel_playbook_count {self.playbook_count} {ts}",
            "",
-            "# HELP awoooi_flywheel_execution_success_rate Auto-repair success rate (0-1)",
+            "# HELP awoooi_flywheel_execution_success_rate Auto-repair success rate (0-1), NaN=insufficient sample",
            "# TYPE awoooi_flywheel_execution_success_rate gauge",
-            f"awoooi_flywheel_execution_success_rate {self.execution_success_rate:.4f} {ts}",
+            f"awoooi_flywheel_execution_success_rate {rate_str} {ts}",
            "",
            "# HELP awoooi_flywheel_km_unvectorized_count KM entries not yet vectorized",
            "# TYPE awoooi_flywheel_km_unvectorized_count gauge",
@@ -124,7 +145,7 @@ class FlywheelMetrics:
        """輸出 /api/v1/stats/summary 格式"""
        return {
            "playbook_count": self.playbook_count,
-            "execution_success_rate": round(self.execution_success_rate, 4),
+            "execution_success_rate": round(self.execution_success_rate, 4) if self.execution_success_rate is not None else None,
            "today_processed": self.today_processed,
            "flywheel_conversions_today": self.flywheel_conversions_today,
            "km_vectorized_rate": round(self.km_vectorized_rate, 4),
@@ -187,8 +208,13 @@ class FlywheelStatsService:
    # Internal helpers
    # ------------------------------------------------------------------

-    async def _playbook_stats(self) -> tuple[int, float]:
-        """Playbook 數量 + 執行成功率（從 Redis）"""
+    async def _playbook_stats(self) -> tuple[int, float | None]:
+        """Playbook 數量 + 執行成功率（從 Redis）
+
+        2026-05-02 ogt + Claude Sonnet 4.6 — Bug 2 修復（W-3 fresh deploy 假告警）
+        total_exec < FLYWHEEL_MIN_SAMPLE 時回 None，代表樣本不足，
+        watchdog W-3 判 None 跳過該檢查，避免每次 restart 觸發假告警。
+        """
        try:
            redis = get_redis()
            count = 0
@@ -211,12 +237,15 @@ class FlywheelStatsService:
                except (json.JSONDecodeError, KeyError):
                    continue

-            rate = total_success / total_exec if total_exec > 0 else 0.0
+            if total_exec < FLYWHEEL_MIN_SAMPLE:
+                # 樣本不足（含 Redis 空），回 None 通知呼叫方跳過 W-3 告警判斷
+                return count, None
+            rate = total_success / total_exec
            return count, rate

        except Exception:
            logger.exception("flywheel_stats_playbook_error")
-            return 0, 0.0
+            return 0, None

    async def _km_stats(self, now: datetime) -> tuple[int, float, int]:
        """KM 向量化率 + 今日飛輪轉化數（從 PostgreSQL）"""
--- a/apps/api/src/services/governance_agent.py
+++ b/apps/api/src/services/governance_agent.py
@@ -106,13 +106,17 @@ class GovernanceAgent:
                else:
                    kept_ids.append(r.playbook_id)

-        if auto_deprecated_ids:
-            await db.commit()
-            logger.info(
-                "governance_trust_drift_auto_deprecated",
-                count=len(auto_deprecated_ids),
-                ids=auto_deprecated_ids[:10],
-            )
+            # 2026-05-02 ogt + Claude Sonnet 4.6 — Bug 1 修復（P0 silent failure）
+            # 原 await db.commit() 在 with 區塊外呼叫，session 已被 context manager
+            # 關閉後 auto-commit，二次 commit 拋 InvalidRequestError 被外層 try/except 吞掉
+            # 修法：commit 移入 with 區塊內，在 session 有效期間顯式提交
+            if auto_deprecated_ids:
+                await db.commit()
+                logger.info(
+                    "governance_trust_drift_auto_deprecated",
+                    count=len(auto_deprecated_ids),
+                    ids=auto_deprecated_ids[:10],
+                )

        if drifted:
            drift_ratio = len(drifted) / total if total > 0 else 0.0
--- a/apps/api/tests/test_check_trust_drift_commit_outside_context_poc.py
+++ b/apps/api/tests/test_check_trust_drift_commit_outside_context_poc.py
@@ -0,0 +1,246 @@
+"""Regression guard：擋住 governance_agent.check_trust_drift 的 commit-outside-context P0 bug 復發
+
+驗證標的：apps/api/src/services/governance_agent.py:75-171（check_trust_drift）
+
+# 歷史背景
+
+2026-05-02 commit dedb1208 引入 auto-deprecate 路徑時，`if auto_deprecated_ids:`
+區塊縮排錯誤（縮排 8 空格 = `async with` 同層 = 區塊**外**），導致：
+- session 已被 context manager 關閉並 auto-commit
+- 二次 `await db.commit()` 在已關閉 session 上拋 InvalidRequestError
+- 外層 try/except 吞掉錯誤
+- `governance_trust_drift_auto_deprecated` log 從不出現
+
+實際後果：DB 仍有 commit（context manager auto-commit 已落地 `status='deprecated'`），
+但 log 不出現，所有依賴此 log 做 monitoring 的告警鏈會誤判系統健康。
+commit b710f3f3 message 聲稱「自治路徑生效」是假象。
+
+git diff 鐵證 (line 109)：縮排原本 8 空格在 with 外，2026-05-02 修復後改為 12 空格在 with 內。
+
+# 並行調度教訓
+
+vuln-verifier 與 fullstack-engineer 並行派遣時，vuln-verifier 讀取的是已被
+fullstack-engineer 修改後的代碼，AST 分析得出「bug 不存在」的錯誤結論。
+未來：vuln-verifier 應該在 fullstack-engineer 之**前**跑（修復前驗證 bug 真實），
+或用 git show HEAD~1 比對「修復前版本」。
+
+# 本檔角色
+
+修復後保留為 AST regression guard：未來若 indent 退回 8 空格（`if auto_deprecated_ids:`
+從 with 內被移到外層）AST 測試會立刻 fail，擋住 silent failure 復發。
+
+2026-05-02 by ogt + Claude Opus 4.7（修正 vuln-verifier 並行誤判）
+"""
+from __future__ import annotations
+
+import ast
+import logging
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+import structlog
+from sqlalchemy.exc import InvalidRequestError
+
+from src.services.governance_agent import GovernanceAgent
+
+# 配置 structlog 走標準 logging（caplog 才抓得到）
+structlog.configure(
+    processors=[
+        structlog.stdlib.add_log_level,
+        structlog.processors.KeyValueRenderer(),
+    ],
+    wrapper_class=structlog.stdlib.BoundLogger,
+    logger_factory=structlog.stdlib.LoggerFactory(),
+    cache_logger_on_first_use=False,
+)
+
+
+# ============================================================================
+# 證據 A：AST 靜態分析 — 證明 commit + log 在 with 區塊內
+# ============================================================================
+
+def test_ast_proves_commit_and_log_inside_with_block():
+    """AST 證據：解析 governance_agent.py，確認 line 113 的 If（含 commit + log）
+    是 AsyncWith.body 的一部分，而不是函式 body 的頂層語句。
+
+    若 critic 主張為真，line 113 應該出現在「Function-level statements」中，
+    而不是 AsyncWith.body 中。
+    """
+    src_path = (
+        Path(__file__).resolve().parents[1]
+        / "src" / "services" / "governance_agent.py"
+    )
+    tree = ast.parse(src_path.read_text())
+
+    # 找到 check_trust_drift
+    func = None
+    for node in ast.walk(tree):
+        if isinstance(node, ast.AsyncFunctionDef) and node.name == "check_trust_drift":
+            func = node
+            break
+    assert func is not None, "找不到 check_trust_drift"
+
+    # 找到第一個 AsyncWith
+    async_with = next(
+        (s for s in func.body if isinstance(s, ast.AsyncWith)), None
+    )
+    assert async_with is not None
+
+    # AsyncWith.body 內找有沒有 await db.commit() + logger.info(governance_trust_drift_auto_deprecated)
+    found_commit_inside_with = False
+    found_log_inside_with = False
+
+    for sub in ast.walk(async_with):
+        # await db.commit()
+        if isinstance(sub, ast.Await):
+            call = sub.value
+            if (
+                isinstance(call, ast.Call)
+                and isinstance(call.func, ast.Attribute)
+                and call.func.attr == "commit"
+            ):
+                found_commit_inside_with = True
+        # logger.info("governance_trust_drift_auto_deprecated", ...)
+        if (
+            isinstance(sub, ast.Call)
+            and isinstance(sub.func, ast.Attribute)
+            and sub.func.attr == "info"
+            and sub.args
+            and isinstance(sub.args[0], ast.Constant)
+            and sub.args[0].value == "governance_trust_drift_auto_deprecated"
+        ):
+            found_log_inside_with = True
+
+    assert found_commit_inside_with, (
+        "AST 證據：await db.commit() 應該出現在 AsyncWith 區塊內。"
+        "若 critic 主張正確（commit 在 with 外），這裡會找不到，斷言應失敗。"
+    )
+    assert found_log_inside_with, (
+        "AST 證據：logger.info('governance_trust_drift_auto_deprecated', ...) "
+        "應該出現在 AsyncWith 區塊內。"
+    )
+
+    # 檢查函式 body（不深入子節點）—— commit + log 不該出現在頂層
+    for top_stmt in func.body:
+        # 直接子節點 — 不是 AsyncWith 的話，不該包含 commit/log
+        if isinstance(top_stmt, ast.AsyncWith):
+            continue
+        for sub in ast.walk(top_stmt):
+            # 不該出現 await db.commit() 在 with 外
+            if isinstance(sub, ast.Await):
+                call = sub.value
+                if (
+                    isinstance(call, ast.Call)
+                    and isinstance(call.func, ast.Attribute)
+                    and call.func.attr == "commit"
+                ):
+                    raise AssertionError(
+                        f"BUG！await db.commit() 出現在函式頂層 line {sub.lineno}（with 區塊外）— "
+                        "critic 主張為真，這就是 silent failure 的來源。"
+                    )
+
+
+# ============================================================================
+# 證據 B：行為驗證 — 即使在「context-exit-closes-session」嚴格 mock 下
+# log 也正常出現，證明 commit 跑在 with 內
+# ============================================================================
+
+def _make_low_trust_old_playbook(playbook_id: str = "PB-STALE-OLD"):
+    rec = MagicMock()
+    rec.trust_score = 0.05
+    rec.playbook_id = playbook_id
+    rec.status = "approved"
+    rec.last_used_at = None
+    rec.created_at = datetime.now(timezone.utc) - timedelta(days=45)
+    return rec
+
+
+class _ClosedAfterExitContext:
+    """模擬真實 SQLAlchemy AsyncSession 行為：
+    __aexit__ 後將 commit 替換為拋 InvalidRequestError，模擬 closed session。
+
+    若 critic 主張為真（commit 在 with 外）：
+      → 第二次 commit 會拋錯 → log 不會出現
+    若 critic 主張為假（commit 在 with 內）：
+      → commit 在 __aexit__ 之前就執行完 → log 正常出現
+
+    第二次 enter（_alert 內）我們也保持替換行為（_alert 內的 commit 該拋錯，
+    這是良性的 — _alert 自身有 try/except 吞掉並 log governance_pg_write_failed）。
+    """
+
+    def __init__(self, db):
+        self._db = db
+        self.enter_count = 0
+        self.exit_count = 0
+
+    async def __aenter__(self):
+        self.enter_count += 1
+        return self._db
+
+    async def __aexit__(self, exc_type, exc, tb):
+        self.exit_count += 1
+
+        async def _raise_closed():
+            raise InvalidRequestError(
+                "(simulated) Session is closed; commit() called after context exit"
+            )
+
+        self._db.commit = _raise_closed
+        return False
+
+
+@pytest.mark.asyncio
+async def test_log_appears_proves_commit_runs_inside_with(caplog):
+    """行為證據：當 __aexit__ 後 commit 會拋錯時，log 仍然出現
+    → 證明 commit 是在 with 內就跑完了，不在 with 外（與 critic 主張相反）
+    """
+    caplog.set_level(logging.INFO)
+
+    stale = _make_low_trust_old_playbook("PB-STALE-OLD")
+
+    mock_result = MagicMock()
+    mock_result.scalars.return_value.all.return_value = [stale]
+
+    mock_db = AsyncMock()
+    mock_db.execute = AsyncMock(return_value=mock_result)
+    mock_db.commit = AsyncMock()  # 初始 commit OK，__aexit__ 後會被替換成拋錯
+
+    alerter = AsyncMock()
+    alerter.alert_governance = AsyncMock()
+    agent = GovernanceAgent(alerter=alerter)
+
+    ctx = _ClosedAfterExitContext(mock_db)
+
+    raised = None
+    with patch(
+        "src.services.governance_agent.get_db_context",
+        return_value=ctx,
+    ):
+        try:
+            result = await agent.check_trust_drift()
+        except InvalidRequestError as e:
+            raised = e
+            result = None
+
+    # 1. with 區塊有正確 enter/exit
+    assert ctx.enter_count >= 1
+    assert ctx.exit_count >= 1
+
+    # 2. mutation 已發生
+    assert stale.status == "deprecated"
+
+    # 3. KEY：log 出現了 — 證明 commit 在 __aexit__ 之前就跑完
+    log_text = " | ".join(rec.getMessage() for rec in caplog.records)
+    assert "governance_trust_drift_auto_deprecated" in log_text, (
+        f"如果 critic 主張為真（commit 在 with 外），log 不該出現。"
+        f"但實際 log 出現了 → 證明 critic 主張為假。Log: {log_text!r}"
+    )
+
+    # 4. check_trust_drift 沒拋例外（_alert 內的 commit 拋錯被 try/except 吞掉，是良性的）
+    assert raised is None, (
+        f"check_trust_drift 不應拋例外。實際拋了：{raised}"
+    )
+    assert result is not None
+    assert result["auto_deprecated"] == 1
--- a/apps/web/src/components/dashboard/flywheel-kpi-card.tsx
+++ b/apps/web/src/components/dashboard/flywheel-kpi-card.tsx
@@ -19,7 +19,9 @@ const WS_BASE = API_BASE.replace(/^https/, 'wss').replace(/^http/, 'ws')

 interface FlywheelSummary {
  playbook_count: number
-  execution_success_rate: number
+  // 2026-05-02 ogt + Claude Opus 4.7 — 後端 Bug 2 修復連動：
+  // 樣本不足（< FLYWHEEL_MIN_SAMPLE）時後端回 null，line 122 的 != null guard 已正確處理
+  execution_success_rate: number | null
  today_processed: number
  flywheel_conversions_today: number
  km_vectorized_rate: number