fix(api): stabilize flywheel success rate window
All checks were successful
CD Pipeline / tests (push) Successful in 1m31s
Code Review / ai-code-review (push) Successful in 12s
CD Pipeline / build-and-deploy (push) Successful in 4m5s
CD Pipeline / post-deploy-checks (push) Successful in 1m59s

This commit is contained in:
Your Name
2026-05-29 11:33:29 +08:00
parent 2828865699
commit d7db0faa4d

View File

@@ -241,6 +241,9 @@ class FlywheelStatsService:
# 執行成功率的 source of truth 是 auto_repair_executions。
# Redis playbook success_count/failure_count 會因回寫鏈路中斷而落後,
# 造成 governance / heartbeat 判定「飛輪沒有執行」。
# 2026-05-29 Codex:
# 24h 低流量不是資料管線斷流;若 24h 未達最小樣本,改用 7d
# 穩定窗口,避免 FlywheelExecutionRateMissing 長期誤報。
try:
async with get_db_context() as db:
row = await db.execute(
@@ -257,7 +260,22 @@ class FlywheelStatsService:
if db_total_exec >= FLYWHEEL_MIN_SAMPLE:
db_total_success = int(repair_stats.success or 0)
return count, db_total_success / db_total_exec
if db_total_exec > 0:
fallback_row = await db.execute(
text("""
SELECT
COUNT(*) FILTER (WHERE success IS TRUE) AS success,
COUNT(*) AS total
FROM auto_repair_executions
WHERE created_at >= NOW() - interval '7 days'
""")
)
fallback_stats = fallback_row.one()
fallback_total = int(fallback_stats.total or 0)
if fallback_total >= FLYWHEEL_MIN_SAMPLE:
fallback_success = int(fallback_stats.success or 0)
return count, fallback_success / fallback_total
if db_total_exec > 0 or fallback_total > 0:
return count, None
except Exception:
logger.warning("flywheel_stats_auto_repair_execution_query_failed")