Files
awoooi/apps/api/src/core/feature_flags.py
OG T 76558a3cd9
Some checks failed
CD Pipeline / build-and-deploy (push) Has been cancelled
feat(AIOps): 全開 P1-P6 feature flags + Nemotron + offline replay loop
- configmap: 啟用 AIOPS_P1~P6 全部總開關與子開關
- configmap: ENABLE_NEMOTRON_COLLABORATION=true(回歸 120s timeout)
- feature_flags.py: 補齊 AIOPS_P6_GOVERNANCE_ENABLED 缺失欄位
- main.py: 掛載 run_offline_replay_loop(ADR-087 Phase 6)

2026-04-15 ogt + Claude Sonnet 4.6(亞太)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-15 21:59:51 +08:00

251 lines
8.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
AWOOOI AIOps Feature Flags
==========================
AI 自主化飛輪 Phase 0-6 功能開關
ADR-080: AI 自主化飛輪總綱
MASTER: docs/superpowers/specs/2026-04-15-MASTER-ai-autonomous-flywheel-v2.md
安全規則:
- 所有 flag 預設 False — 任何 Phase 必須明確開啟才生效
- Phase 總開關 = False 時,該 Phase 所有子開關均視為 False
- 自我降級後 (D6) 不得自動反向升級,升級必須人工設定 env var
回滾方式:
kubectl set env deployment/awoooi-api AIOPS_P1_ENABLED=false
# ⚠️ pydantic_settings 在 Pod 啟動時讀取 env var 並快取為 Singleton
# kubectl set env 修改後必須重啟 Pod 才生效(非熱重載)
# 緊急回滾kubectl rollout restart deployment/awoooi-api
2026-04-15 ogt: Phase 0 — 初始建立ADR-080 批准後啟用
"""
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class AIOpsFeatureFlags(BaseSettings):
"""
AI 自主化飛輪 Feature Flag 集合
每個 Phase 一個總開關 + 細粒度子開關。
讀取順序:環境變數 > .env 檔 > 預設值(全 False
"""
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=True,
extra="ignore",
)
# ==========================================================================
# Phase 總開關Phase N 退出條件達到後才設 True
# ==========================================================================
AIOPS_P1_ENABLED: bool = Field(
default=False,
description="Phase 1 感官縱深PreDecisionInvestigator + EvidenceSnapshot + PostExecutionVerifier",
)
AIOPS_P2_ENABLED: bool = Field(
default=False,
description="Phase 2 多 Agent 協作5 角色全部上線Diagnostician/Solver/Reviewer/Critic/Coordinator",
)
AIOPS_P3_ENABLED: bool = Field(
default=False,
description="Phase 3 學習閉環重建3 根因修復 + EWMA + Evolver + Fine-tune pipeline",
)
AIOPS_P4_ENABLED: bool = Field(
default=False,
description="Phase 4 動態異常偵測Holt-Winters + Drain3 + Prophet + 主動巡檢",
)
AIOPS_P5_ENABLED: bool = Field(
default=False,
description="Phase 5 修復抽象化Declarative + Blast Radius 四級分控 + GitOps PR",
)
AIOPS_P6_ENABLED: bool = Field(
default=False,
description="Phase 6 自我治理閉環SLO + Trust Drift + KB Rot + 離線回放 + 自我降級",
)
# ==========================================================================
# Phase 1 細粒度子開關
# ==========================================================================
AIOPS_P1_PRE_DECISION_INVESTIGATOR: bool = Field(
default=False,
description="P1: PreDecisionInvestigator 是否在決策前執行 MCP 感官蒐集(可獨立關閉)",
)
AIOPS_P1_POST_EXECUTION_VERIFIER: bool = Field(
default=False,
description="P1: PostExecutionVerifier 是否在每次執行後驗證狀態",
)
# ==========================================================================
# Phase 2 細粒度子開關
# ==========================================================================
AIOPS_P2_CRITIC_ENABLED: bool = Field(
default=False,
description="P2: Critic Agent 是否啟用辯證挑戰(關閉可降低延遲但失去質疑機制)",
)
AIOPS_P2_AGENT_TIMEOUT_SEC: int = Field(
default=5,
description="P2: 單 Agent 熔斷閾值(秒),超時則 Coordinator 降級處理",
)
# ==========================================================================
# Phase 3 細粒度子開關
# ==========================================================================
AIOPS_P3_FINETUNE_EXPORT: bool = Field(
default=False,
description="P3: Fine-tune JSONL 每週匯出到 MinIO 是否執行",
)
AIOPS_P3_EVOLVER_ENABLED: bool = Field(
default=False,
description="P3: Evolver Agent 是否執行 Playbook 自動合併與封存",
)
AIOPS_P3_KNOWLEDGE_DECAY: bool = Field(
default=False,
description="P3: 30 天知識遺忘 job 是否執行(標 decayed降到 cold index",
)
# ==========================================================================
# Phase 4 細粒度子開關
# ==========================================================================
AIOPS_P4_DYNAMIC_BASELINE: bool = Field(
default=False,
description="P4: Holt-Winters 動態基線服務是否啟用",
)
AIOPS_P4_LOG_ANOMALY: bool = Field(
default=False,
description="P4: Drain3 日誌異常偵測是否啟用",
)
AIOPS_P4_TREND_PREDICTOR: bool = Field(
default=False,
description="P4: Prophet 趨勢預測是否啟用(預測 4h 內超閾值風險)",
)
AIOPS_P4_PROACTIVE_INSPECTOR: bool = Field(
default=False,
description="P4: 主動巡檢每 5min 是否執行",
)
AIOPS_P4_SHADOW_MODE: bool = Field(
default=True,
description="P4: Shadow Mode = True 時動態偵測只記錄不觸發 AlertFalse = 真實觸發(需先觀察噪音率)",
)
# ==========================================================================
# Phase 5 細粒度子開關
# ==========================================================================
AIOPS_P5_BLAST_RADIUS_CHECK: bool = Field(
default=False,
description="P5: Blast Radius 評估是否執行False = 全部視為低風險自動執行,危險)",
)
AIOPS_P5_GITOPS_PR: bool = Field(
default=False,
description="P5: 高風險修復Blast Radius > 50是否走 GitOps Gitea PR 流程",
)
AIOPS_P5_DRY_RUN_ENFORCED: bool = Field(
default=False,
description="P5: Declarative apply 前是否強制 dry-runFalse = 跳過 dry-run危險",
)
# ==========================================================================
# Phase 6 細粒度子開關
# ==========================================================================
AIOPS_P6_SELF_DEMOTION: bool = Field(
default=False,
description="P6: 自我降級邏輯是否啟用SLO 違反 → 自動提高信心閾值)",
)
AIOPS_P6_OFFLINE_REPLAY: bool = Field(
default=False,
description="P6: 週度離線回放 100 案是否執行",
)
AIOPS_P6_KB_ROT_CLEANER: bool = Field(
default=False,
description="P6: 月度 KB 腐爛清理 job 是否執行",
)
AIOPS_P6_TRUST_DRIFT_DETECTOR: bool = Field(
default=False,
description="P6: Playbook trust 分布漂移偵測是否啟用",
)
AIOPS_P6_GOVERNANCE_ENABLED: bool = Field(
default=False,
description="P6: 治理閉環總開關offline_replay_service / model_rollback_service 守衛)",
)
def is_phase_enabled(self, phase: int) -> bool:
"""
檢查指定 Phase 的總開關是否啟用。
Args:
phase: Phase 編號1-6
Returns:
bool: 該 Phase 是否開啟
Usage:
if flags.is_phase_enabled(1):
await pre_decision_investigator.investigate(...)
"""
phase_flags = {
1: self.AIOPS_P1_ENABLED,
2: self.AIOPS_P2_ENABLED,
3: self.AIOPS_P3_ENABLED,
4: self.AIOPS_P4_ENABLED,
5: self.AIOPS_P5_ENABLED,
6: self.AIOPS_P6_ENABLED,
}
return phase_flags.get(phase, False)
def is_sub_flag_enabled(self, flag_name: str) -> bool:
"""
檢查細粒度子開關(自動驗證父 Phase 開關)。
Args:
flag_name: 子開關名稱,例如 "AIOPS_P1_PRE_DECISION_INVESTIGATOR"
Returns:
bool: 子開關 AND 父 Phase 開關都為 True 才回 True
Usage:
if flags.is_sub_flag_enabled("AIOPS_P1_PRE_DECISION_INVESTIGATOR"):
...
"""
# 解析 Phase 編號
parts = flag_name.split("_")
if len(parts) < 3 or not parts[1].startswith("P"):
return False
try:
phase = int(parts[1][1:])
except ValueError:
return False
# 父 Phase 必須開啟
if not self.is_phase_enabled(phase):
return False
return bool(getattr(self, flag_name, False))
# Singleton — 與 core/config.py 的 settings 相同模式
# 使用from src.core.feature_flags import aiops_flags
aiops_flags = AIOpsFeatureFlags()
def get_aiops_flags() -> AIOpsFeatureFlags:
"""
FastAPI dependency injection 用。
Usage:
@router.get("/status")
async def status(flags: AIOpsFeatureFlags = Depends(get_aiops_flags)):
return {"p1": flags.AIOPS_P1_ENABLED}
"""
return aiops_flags