feat(flywheel-c2-c3): C2 hasType4接真實API + C3 WebSocket指數退避重連
Some checks failed
CD Pipeline / build-and-deploy (push) Failing after 3m41s

C2: flywheel_stats_service 加 type4_count query → API 回傳
    flywheel-diagram.tsx hasType4 改由 type4Count prop 驅動(非 false)
    flywheel-kpi-card.tsx 傳入 type4Count={flowData?.type4_count}

C3: WebSocket onclose 加指數退避重連 (1s→2s→4s→最大30s)
    cancelled 旗標確保 unmount 後不重連
    wsRetryTimer 加入 cleanup

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
OG T
2026-04-12 18:45:40 +08:00
parent 079d0e89b9
commit 65a5220e16
3 changed files with 40 additions and 13 deletions

View File

@@ -79,6 +79,7 @@ class FlywheelMetrics:
self.node_stats = node_stats
self.current_flow = current_flow
self.computed_at = computed_at
self.type4_count: int = 0 # TYPE-4 incidents 數ADR-073-C C2
def to_prometheus_lines(self) -> str:
"""輸出 Prometheus text format"""
@@ -115,6 +116,7 @@ class FlywheelMetrics:
return {
"nodes": self.node_stats,
"current_flow": self.current_flow,
"type4_count": self.type4_count,
"computed_at": self.computed_at.isoformat(),
}
@@ -162,9 +164,10 @@ class FlywheelStatsService:
today_processed,
node_stats,
current_flow,
type4_count,
) = await self._incident_stats(now)
return FlywheelMetrics(
metrics = FlywheelMetrics(
playbook_count=playbook_count,
execution_success_rate=execution_success_rate,
km_unvectorized_count=km_unvectorized_count,
@@ -177,6 +180,8 @@ class FlywheelStatsService:
current_flow=current_flow,
computed_at=now,
)
metrics.type4_count = type4_count
return metrics
# ------------------------------------------------------------------
# Internal helpers
@@ -274,6 +279,16 @@ class FlywheelStatsService:
)
incidents_stuck = stuck_q.scalar_one_or_none() or 0
# TYPE-4 Incident 數ADR-073-C C2 — 供前端 hasType4 判斷)
# 2026-04-12 ogt
type4_q = await db.execute(
select(func.count()).where(
IncidentRecord.notification_type == "TYPE-4",
IncidentRecord.status == IncidentStatus.INVESTIGATING.value,
)
)
type4_count = type4_q.scalar_one_or_none() or 0
# 今日處理數
today_q = await db.execute(
select(func.count()).where(
@@ -355,11 +370,11 @@ class FlywheelStatsService:
},
}
return alertname_null_rate, incidents_stuck, today_processed, node_stats, current_flow
return alertname_null_rate, incidents_stuck, today_processed, node_stats, current_flow, type4_count
except Exception:
logger.exception("flywheel_stats_incident_error")
return 0.0, 0, 0, {n: {"status": "unknown"} for n in FLYWHEEL_NODES}, []
return 0.0, 0, 0, {n: {"status": "unknown"} for n in FLYWHEEL_NODES}, [], 0
def _status_to_node(status: str) -> str:

View File

@@ -24,6 +24,8 @@ interface FlywheelDiagramProps {
currentFlow?: FlowItem[]
/** active node names from node_stats */
activeNodes?: Record<string, { status: string }>
/** TYPE-4 active incident count from API (ADR-073-C C2) */
type4Count?: number
}
const NODES = [
@@ -44,7 +46,7 @@ function toSVG(pct: { x: number; y: number }) {
return { cx: (pct.x / 100) * W, cy: (pct.y / 100) * H }
}
export function FlywheelDiagram({ currentFlow = [], activeNodes = {} }: FlywheelDiagramProps) {
export function FlywheelDiagram({ currentFlow = [], activeNodes = {}, type4Count = 0 }: FlywheelDiagramProps) {
// Count active incidents per node
const nodeCounts: Record<string, number> = {}
for (const f of currentFlow) {
@@ -52,7 +54,7 @@ export function FlywheelDiagram({ currentFlow = [], activeNodes = {} }: Flywheel
}
const hasType3 = currentFlow.some(f => f.current_node === 'reasoning' || f.current_node === 'execution')
const hasType4 = false // TYPE-4 shown when diagnosis node has stuck items
const hasType4 = type4Count > 0
// Build arc path through nodes in order
const pts = NODES.map(n => toSVG(n))

View File

@@ -31,7 +31,7 @@ interface FlywheelSummary {
export function FlywheelKPICard() {
const [data, setData] = useState<FlywheelSummary | null>(null)
const [flowData, setFlowData] = useState<{ current_flow: FlowItem[]; nodes: Record<string, { status: string }> } | null>(null)
const [flowData, setFlowData] = useState<{ current_flow: FlowItem[]; nodes: Record<string, { status: string }>; type4_count?: number } | null>(null)
const [error, setError] = useState(false)
const wsRef = useRef<WebSocket | null>(null)
@@ -55,14 +55,18 @@ export function FlywheelKPICard() {
load()
// C3: WebSocket — upgrades from polling when available
// C3: WebSocket — upgrades from polling when available, with reconnect (ADR-073-C C3)
// 指數退避重連1s → 2s → 4s → 8s → 最大 30s
let wsRetryDelay = 1000
let wsRetryTimer: ReturnType<typeof setTimeout> | null = null
const connectWS = () => {
if (!WS_BASE) return
if (!WS_BASE || cancelled) return
const ws = new WebSocket(`${WS_BASE}/api/v1/stats/flywheel/ws`)
wsRef.current = ws
ws.onopen = () => {
// WS connected — stop HTTP polling
wsRetryDelay = 1000 // 連線成功 — 重置退避
if (pollId) { clearInterval(pollId); pollId = null }
}
ws.onmessage = (e) => {
@@ -75,10 +79,14 @@ export function FlywheelKPICard() {
} catch { /* ignore malformed */ }
}
ws.onclose = () => {
// WS closed — fall back to polling
if (!cancelled && !pollId) {
pollId = setInterval(load, 30_000)
}
if (cancelled) return
// 退回 HTTP polling
if (!pollId) pollId = setInterval(load, 30_000)
// 指數退避重連
wsRetryTimer = setTimeout(() => {
wsRetryDelay = Math.min(wsRetryDelay * 2, 30_000)
connectWS()
}, wsRetryDelay)
}
ws.onerror = () => ws.close()
}
@@ -90,6 +98,7 @@ export function FlywheelKPICard() {
return () => {
cancelled = true
if (pollId) clearInterval(pollId)
if (wsRetryTimer) clearTimeout(wsRetryTimer)
wsRef.current?.close()
}
}, [])
@@ -175,6 +184,7 @@ export function FlywheelKPICard() {
<FlywheelDiagram
currentFlow={flowData?.current_flow ?? []}
activeNodes={flowData?.nodes ?? {}}
type4Count={flowData?.type4_count ?? 0}
/>
</div>