From 6810ab359d7d62fa956e6332ddae9e3b860b91ae Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 22 Apr 2026 09:03:40 +0800 Subject: [PATCH] =?UTF-8?q?fix(report):=20=E6=97=A5=E5=A0=B1=E9=87=8D?= =?UTF-8?q?=E7=99=BC=20+=20=E8=87=AA=E5=8B=95=E4=BF=AE=E5=BE=A9=200%=20?= =?UTF-8?q?=E5=85=A9=E5=A4=A7=E6=A0=B9=E5=9B=A0=E4=BF=AE=E5=BE=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 問題一:日度巡檢報告重複發送(多 Pod 各自跑 daily job) - 根因:run_daily_report_loop 沒有接 leader lock 其他 scanner(capacity/hermes/compliance)都有呼叫 try_acquire_daily_lock,唯獨日報 loop 缺失 - 修法:asyncio.sleep 後加 try_acquire_daily_lock("daily_report") 搶不到 lock 的 Pod 直接 continue,等下一個 08:00 問題二:自動修復成功率永遠 0.0% - 根因:_collect_repair_stats 查 incidents.outcome->>'execution_success' 但整條執行鏈路(approval_execution.py NO_ACTION + 真實執行) 從未將 execution_success 寫回 incidents.outcome JSON 導致查詢永遠回 0 - 修法:改查 approval_records.status(EXECUTION_SUCCESS / EXECUTION_FAILED) 這是唯一被穩定寫入的 source of truth Co-Authored-By: Claude Sonnet 4.6 --- .../src/services/report_generation_service.py | 44 ++++++++++--------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/apps/api/src/services/report_generation_service.py b/apps/api/src/services/report_generation_service.py index 7a8b83f4..828632ae 100644 --- a/apps/api/src/services/report_generation_service.py +++ b/apps/api/src/services/report_generation_service.py @@ -219,33 +219,29 @@ class ReportGenerationService: async def _collect_repair_stats(self, since: datetime) -> dict: """ - 收集自動修復統計(IncidentRecord.outcome JSON) + 收集自動修復統計 - 2026-04-14 Claude Sonnet 4.6 修復 — 原本引用不存在的 ApprovalRequestRecord, - 實際 execution_success 儲存在 IncidentRecord.outcome JSON 欄位。 + 2026-04-22 Claude Sonnet 4.6 修復 — incidents.outcome JSON 在執行鏈路中從未被寫入 + execution_success,導致永遠查詢到 0。改查 approval_records.status 作為 source of truth + (approval_execution.py 每次執行後都會寫入 EXECUTION_SUCCESS / EXECUTION_FAILED)。 """ - from sqlalchemy import func, select, text + from sqlalchemy import text from src.db.base import get_db_context - from src.db.models import IncidentRecord async with get_db_context() as db: - # PostgreSQL JSON 路徑查詢:outcome->>'execution_success' - success = await db.scalar( - select(func.count()).select_from(IncidentRecord).where( - IncidentRecord.created_at >= since, - text("outcome->>'execution_success' = 'true'"), - ) - ) or 0 - - failed = await db.scalar( - select(func.count()).select_from(IncidentRecord).where( - IncidentRecord.created_at >= since, - text("outcome->>'execution_success' = 'false'"), - ) - ) or 0 - - return {"success": success, "failed": failed} + row = await db.execute( + text(""" + SELECT + COUNT(*) FILTER (WHERE status = 'execution_success') AS success, + COUNT(*) FILTER (WHERE status = 'execution_failed') AS failed + FROM approval_records + WHERE created_at >= :since + """), + {"since": since}, + ) + r = row.one() + return {"success": int(r.success or 0), "failed": int(r.failed or 0)} async def _collect_km_stats(self, since: datetime) -> int: """收集新增 KM 條目數""" @@ -559,6 +555,12 @@ async def run_daily_report_loop() -> None: ) await asyncio.sleep(sleep_seconds) + # 2026-04-22 Claude Sonnet 4.6: 多 Pod 競速保護 — 只有搶到 Redis SETNX 的 Pod 才發報告 + from src.services.ai_advisory_helpers import try_acquire_daily_lock + if not await try_acquire_daily_lock("daily_report"): + logger.info("daily_report_skipped_other_pod") + continue + logger.info("daily_report_triggered") await service.send_daily_report()