fix(api): stabilize flywheel success rate window
This commit is contained in:
@@ -241,6 +241,9 @@ class FlywheelStatsService:
|
||||
# 執行成功率的 source of truth 是 auto_repair_executions。
|
||||
# Redis playbook success_count/failure_count 會因回寫鏈路中斷而落後,
|
||||
# 造成 governance / heartbeat 判定「飛輪沒有執行」。
|
||||
# 2026-05-29 Codex:
|
||||
# 24h 低流量不是資料管線斷流;若 24h 未達最小樣本,改用 7d
|
||||
# 穩定窗口,避免 FlywheelExecutionRateMissing 長期誤報。
|
||||
try:
|
||||
async with get_db_context() as db:
|
||||
row = await db.execute(
|
||||
@@ -257,7 +260,22 @@ class FlywheelStatsService:
|
||||
if db_total_exec >= FLYWHEEL_MIN_SAMPLE:
|
||||
db_total_success = int(repair_stats.success or 0)
|
||||
return count, db_total_success / db_total_exec
|
||||
if db_total_exec > 0:
|
||||
|
||||
fallback_row = await db.execute(
|
||||
text("""
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE success IS TRUE) AS success,
|
||||
COUNT(*) AS total
|
||||
FROM auto_repair_executions
|
||||
WHERE created_at >= NOW() - interval '7 days'
|
||||
""")
|
||||
)
|
||||
fallback_stats = fallback_row.one()
|
||||
fallback_total = int(fallback_stats.total or 0)
|
||||
if fallback_total >= FLYWHEEL_MIN_SAMPLE:
|
||||
fallback_success = int(fallback_stats.success or 0)
|
||||
return count, fallback_success / fallback_total
|
||||
if db_total_exec > 0 or fallback_total > 0:
|
||||
return count, None
|
||||
except Exception:
|
||||
logger.warning("flywheel_stats_auto_repair_execution_query_failed")
|
||||
|
||||
Reference in New Issue
Block a user