""" AWOOOI AIOps Phase 4 — Trend Predictor(趨勢預測) =================================================== 職責:numpy 線性回歸,預測 metric 在未來 N 小時是否超越警戒閾值 核心 API: predict_breach(metric_name, current_value, threshold) -> TrendPrediction 設計原則: - 不使用 Prophet(500MB+ Stan 依賴),改用 numpy 線性回歸 - 從 DynamicBaselineRecord 取歷史窗口資料,計算趨勢斜率 - Shadow Mode:預測只記錄 logger.info,不觸發 Alert - 熔斷:numpy 失敗 → fallback 到最近值外推 ADR-084: Phase 4 動態異常偵測源頭升級 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 4 初始建立 """ from __future__ import annotations from dataclasses import dataclass from typing import Any import structlog from src.utils.timezone import now_taipei logger = structlog.get_logger(__name__) # ── 常數 ──────────────────────────────────────────────────────────────────── FORECAST_HOURS = 4 # 預測未來 4 小時 MIN_DATAPOINTS_FOR_TREND = 12 # 至少 12 個資料點才能做回歸(12h) TREND_CONFIDENCE_THRESHOLD = 0.7 # R² > 0.7 → 趨勢可信 REDIS_KEY_HISTORY = "trend:history:" # hash: metric_name → JSON list of (ts, value) REDIS_TTL_HISTORY = 86400 * 2 # 保留 2 天歷史 MAX_HISTORY_POINTS = 336 # 最多 336 個點(14天 × 24h) # ───────────────────────────────────────────────────────────────────────────── # Data Types # ───────────────────────────────────────────────────────────────────────────── @dataclass class TrendPrediction: """趨勢預測結果""" metric_name: str current_value: float threshold: float forecast_hours: int # 預測時間窗口(小時) predicted_value: float # 預測在 forecast_hours 後的值 slope_per_hour: float # 每小時變化量(線性斜率) r_squared: float # 線性回歸 R²(0-1,越高趨勢越清晰) will_breach: bool # 是否預測超越 threshold breach_in_hours: float | None # 預計幾小時後超越(None = 不會超越) confidence: str # high / medium / low / insufficient_data shadow_mode: bool = True detected_at: str = "" # ───────────────────────────────────────────────────────────────────────────── # Main Service # ───────────────────────────────────────────────────────────────────────────── class TrendPredictor: """ 趨勢預測服務 工作流程: 1. 從 Redis 取近期歷史資料點(sliding window) 2. numpy 線性回歸計算趨勢斜率 3. 外推預測 4h 後的值 4. 判斷是否在 4h 內超越警戒閾值 """ async def predict_breach( self, metric_name: str, current_value: float, threshold: float, forecast_hours: int = FORECAST_HOURS, ) -> TrendPrediction: """ 預測 metric 是否在 forecast_hours 內超越 threshold。 Args: metric_name: 基線識別名(需與 DynamicBaselineService 一致) current_value: 當前觀測值 threshold: 警戒閾值(超越 = 預警) forecast_hours: 預測窗口(預設 4h) Returns: TrendPrediction(Shadow Mode 時只記錄不觸發) """ from src.core.feature_flags import aiops_flags if not aiops_flags.AIOPS_P4_TREND_PREDICTOR: return self._no_data_result(metric_name, current_value, threshold, forecast_hours) shadow_mode = aiops_flags.AIOPS_P4_SHADOW_MODE detected_at = now_taipei().isoformat() # 推送當前值到歷史 await self._append_history(metric_name, current_value) # 取歷史資料 history = await self._get_history(metric_name) if len(history) < MIN_DATAPOINTS_FOR_TREND: return TrendPrediction( metric_name=metric_name, current_value=current_value, threshold=threshold, forecast_hours=forecast_hours, predicted_value=current_value, slope_per_hour=0.0, r_squared=0.0, will_breach=current_value >= threshold, breach_in_hours=0.0 if current_value >= threshold else None, confidence="insufficient_data", shadow_mode=shadow_mode, detected_at=detected_at, ) prediction = self._linear_regression_predict( history=history, current_value=current_value, threshold=threshold, forecast_hours=forecast_hours, ) prediction.shadow_mode = shadow_mode prediction.detected_at = detected_at if prediction.will_breach: logger.info( "trend_breach_predicted", metric=metric_name, current=current_value, predicted=prediction.predicted_value, threshold=threshold, breach_in_hours=prediction.breach_in_hours, slope=prediction.slope_per_hour, r2=prediction.r_squared, shadow_mode=shadow_mode, ) return prediction async def push_datapoint( self, metric_name: str, value: float, ) -> None: """主動推送資料點(供 ProactiveInspector 呼叫)。""" await self._append_history(metric_name, value) # ────────────────────────────────────────────────────────────────────────── # Private Helpers # ────────────────────────────────────────────────────────────────────────── def _linear_regression_predict( self, history: list[tuple[float, float]], # (timestamp, value) current_value: float, threshold: float, forecast_hours: int, ) -> TrendPrediction: """ numpy 線性回歸:y = slope * x + intercept x = 相對小時數(0 到 N),y = metric 值 """ metric_name = "" # 呼叫方 fillback try: import numpy as np times = np.array([h[0] for h in history], dtype=float) values = np.array([h[1] for h in history], dtype=float) # 相對化時間(小時為單位) times_rel = (times - times[0]) / 3600.0 # 線性回歸:polyfit degree=1 coeffs = np.polyfit(times_rel, values, 1) slope = float(coeffs[0]) # 每小時斜率 intercept = float(coeffs[1]) # R² 計算 fitted = np.polyval(coeffs, times_rel) ss_res = float(np.sum((values - fitted) ** 2)) ss_tot = float(np.sum((values - np.mean(values)) ** 2)) r2 = 1.0 - ss_res / ss_tot if ss_tot > 0 else 0.0 # 預測 forecast_hours 後的值 current_time_rel = (now_taipei().timestamp() - times[0]) / 3600.0 predicted_value = slope * (current_time_rel + forecast_hours) + intercept # 信心度 if r2 >= TREND_CONFIDENCE_THRESHOLD: confidence = "high" elif r2 >= 0.4: confidence = "medium" else: confidence = "low" # 是否超越閾值 will_breach = predicted_value >= threshold breach_in_hours: float | None = None if will_breach and slope > 0 and current_value < threshold: # 計算幾小時後超越:current_value + slope * h = threshold breach_in_hours = round((threshold - current_value) / slope, 2) breach_in_hours = min(breach_in_hours, float(forecast_hours)) elif current_value >= threshold: breach_in_hours = 0.0 return TrendPrediction( metric_name=metric_name, current_value=current_value, threshold=threshold, forecast_hours=forecast_hours, predicted_value=round(predicted_value, 4), slope_per_hour=round(slope, 6), r_squared=round(r2, 4), will_breach=will_breach, breach_in_hours=breach_in_hours, confidence=confidence, ) except Exception as e: logger.warning("trend_regression_failed", error=str(e)) # Fallback:最近值外推 return TrendPrediction( metric_name=metric_name, current_value=current_value, threshold=threshold, forecast_hours=forecast_hours, predicted_value=current_value, slope_per_hour=0.0, r_squared=0.0, will_breach=current_value >= threshold, breach_in_hours=0.0 if current_value >= threshold else None, confidence="low", ) async def _append_history(self, metric_name: str, value: float) -> None: """推送資料點到 Redis 滑動視窗(最舊的被 trim 掉)。""" try: import json from src.core.redis_client import get_redis r = get_redis() key = f"{REDIS_KEY_HISTORY}{metric_name}" point = json.dumps([now_taipei().timestamp(), value]) await r.rpush(key, point) await r.ltrim(key, -MAX_HISTORY_POINTS, -1) # 保留最新 N 個點 await r.expire(key, REDIS_TTL_HISTORY) except Exception as e: logger.warning("trend_history_append_failed", metric=metric_name, error=str(e)) async def _get_history(self, metric_name: str) -> list[tuple[float, float]]: """從 Redis 取歷史資料點。""" try: import json from src.core.redis_client import get_redis r = get_redis() key = f"{REDIS_KEY_HISTORY}{metric_name}" raw = await r.lrange(key, 0, -1) return [tuple(json.loads(item)) for item in raw] except Exception as e: logger.warning("trend_history_get_failed", metric=metric_name, error=str(e)) return [] def _no_data_result( self, metric_name: str, current_value: float, threshold: float, forecast_hours: int, ) -> TrendPrediction: """Feature flag 關閉時的空結果。""" return TrendPrediction( metric_name=metric_name, current_value=current_value, threshold=threshold, forecast_hours=forecast_hours, predicted_value=current_value, slope_per_hour=0.0, r_squared=0.0, will_breach=False, breach_in_hours=None, confidence="insufficient_data", shadow_mode=True, detected_at=now_taipei().isoformat(), ) # ───────────────────────────────────────────────────────────────────────────── # Singleton # ───────────────────────────────────────────────────────────────────────────── _predictor: TrendPredictor | None = None def get_trend_predictor() -> TrendPredictor: global _predictor if _predictor is None: _predictor = TrendPredictor() return _predictor