""" Drift Detector - Phase 25 P2 Config Drift Detection ===================================================== 職責:比對 Git YAML vs K8s 實際狀態,輸出結構化 DriftItem 列表 不判斷嚴重性,不解釋意圖,只做事實比對 版本: v1.0 建立: 2026-04-04 (台北時區) 建立者: ogt (首席架構師設計) + Claude Code (實作) """ from __future__ import annotations import asyncio import subprocess import uuid from pathlib import Path from typing import Any import structlog import yaml from src.models.drift import DriftItem, DriftLevel, DriftReport logger = structlog.get_logger(__name__) # 白名單欄位(靜默記錄,不告警) _DEFAULT_ALLOWLIST_FIELDS = frozenset([ "spec.replicas", "spec.template.spec.containers[*].resources.requests", "spec.template.spec.containers[*].resources.limits", "metadata.annotations", "metadata.labels.pod-template-hash", "metadata.resourceVersion", "metadata.generation", "metadata.uid", "status", ]) # 關鍵欄位(必須立即告警) _DEFAULT_CRITICAL_FIELDS = frozenset([ "spec.template.spec.containers[*].image", "spec.template.spec.containers[*].env", "spec.template.spec.containers[*].ports", "spec.template.spec.volumes", "spec.template.spec.serviceAccountName", ]) class GitStateReader: """從 Git HEAD 讀取 K8s YAML 狀態""" def __init__(self, k8s_dir: str = "k8s"): self._k8s_dir = Path(k8s_dir) async def read(self, namespace: str) -> dict[str, Any]: """ 讀取 Git HEAD 中指定 namespace 的所有 K8s YAML Returns: {resource_key: parsed_yaml_dict} resource_key 格式: "{kind}/{name}" """ try: result = await asyncio.get_event_loop().run_in_executor( None, self._read_sync, namespace ) return result except Exception as e: logger.warning("git_state_read_failed", namespace=namespace, error=str(e)) return {} def _read_sync(self, namespace: str) -> dict[str, Any]: resources: dict[str, Any] = {} if not self._k8s_dir.exists(): logger.warning("k8s_dir_not_found", path=str(self._k8s_dir)) return resources for yaml_file in self._k8s_dir.rglob("*.yaml"): try: with open(yaml_file) as f: docs = list(yaml.safe_load_all(f)) for doc in docs: if not doc or not isinstance(doc, dict): continue metadata = doc.get("metadata", {}) ns = metadata.get("namespace", "") if ns and ns != namespace: continue kind = doc.get("kind", "") name = metadata.get("name", "") if kind and name: key = f"{kind}/{name}" resources[key] = doc except Exception as e: logger.debug("yaml_parse_failed", file=str(yaml_file), error=str(e)) return resources class K8sStateReader: """從 kubectl 讀取 K8s 實際狀態""" async def read(self, namespace: str) -> dict[str, Any]: """ 透過 kubectl 取得指定 namespace 的實際狀態 Returns: {resource_key: actual_resource_dict} """ try: result = await asyncio.get_event_loop().run_in_executor( None, self._read_sync, namespace ) return result except Exception as e: logger.warning("k8s_state_read_failed", namespace=namespace, error=str(e)) return {} def _read_sync(self, namespace: str) -> dict[str, Any]: resources: dict[str, Any] = {} resource_types = ["deployment", "service", "configmap", "ingress"] for rtype in resource_types: try: proc = subprocess.run( ["kubectl", "get", rtype, "-n", namespace, "-o", "yaml"], capture_output=True, text=True, timeout=30, ) if proc.returncode != 0: logger.debug("kubectl_failed", type=rtype, stderr=proc.stderr[:200]) continue data = yaml.safe_load(proc.stdout) if not data or data.get("kind") != "List": continue for item in data.get("items", []): kind = item.get("kind", rtype.capitalize()) name = item.get("metadata", {}).get("name", "") if name: key = f"{kind}/{name}" resources[key] = item except subprocess.TimeoutExpired: logger.warning("kubectl_timeout", type=rtype, namespace=namespace) except Exception as e: logger.warning("kubectl_error", type=rtype, error=str(e)) return resources class DriftDetector: """ 比對 Git vs K8s 實際狀態,輸出 DriftItem 列表 職責邊界:只做事實比對,不判斷嚴重性,不解釋意圖 """ def __init__( self, k8s_dir: str = "k8s", allowlist_fields: frozenset | None = None, critical_fields: frozenset | None = None, ): self._git_reader = GitStateReader(k8s_dir) self._k8s_reader = K8sStateReader() self._allowlist = allowlist_fields or _DEFAULT_ALLOWLIST_FIELDS self._critical_fields = critical_fields or _DEFAULT_CRITICAL_FIELDS async def scan(self, namespace: str, triggered_by: str = "cron") -> DriftReport: """ 掃描指定 namespace 的漂移 Args: namespace: K8s namespace triggered_by: 觸發來源(cron / webhook / api) Returns: DriftReport(含 DriftItem 列表,尚未分析 intent) """ report_id = str(uuid.uuid4())[:8] logger.info("drift_scan_start", namespace=namespace, report_id=report_id) git_state, k8s_state = await asyncio.gather( self._git_reader.read(namespace), self._k8s_reader.read(namespace), ) items: list[DriftItem] = [] # 比對 Git 中有的資源 for resource_key, git_resource in git_state.items(): actual_resource = k8s_state.get(resource_key) if actual_resource is None: # 資源在 Git 中存在但 K8s 中不存在(可能尚未部署) logger.debug("resource_missing_in_k8s", resource=resource_key) continue kind, name = resource_key.split("/", 1) diffs = self._diff_resources(git_resource, actual_resource, kind, name, namespace) items.extend(diffs) high_count = sum(1 for i in items if i.drift_level == DriftLevel.HIGH) medium_count = sum(1 for i in items if i.drift_level == DriftLevel.MEDIUM) info_count = sum(1 for i in items if i.drift_level == DriftLevel.INFO) logger.info( "drift_scan_done", namespace=namespace, report_id=report_id, high=high_count, medium=medium_count, info=info_count, ) return DriftReport( report_id=report_id, namespace=namespace, items=items, high_count=high_count, medium_count=medium_count, info_count=info_count, triggered_by=triggered_by, ) def _diff_resources( self, git_res: dict, actual_res: dict, kind: str, name: str, namespace: str, ) -> list[DriftItem]: """逐欄位比對兩個資源,回傳差異列表""" items: list[DriftItem] = [] # 只比對 spec 層(metadata 的動態欄位太多) git_spec = git_res.get("spec", {}) actual_spec = actual_res.get("spec", {}) diffs = self._flatten_diff("spec", git_spec, actual_spec) for field_path, (git_val, actual_val) in diffs.items(): is_allowlisted = self._is_allowlisted(field_path) if is_allowlisted: level = DriftLevel.INFO elif self._is_critical(field_path): level = DriftLevel.HIGH else: level = DriftLevel.MEDIUM items.append(DriftItem( resource_kind=kind, resource_name=name, namespace=namespace, field_path=field_path, git_value=git_val, actual_value=actual_val, drift_level=level, is_allowlisted=is_allowlisted, )) return items def _flatten_diff( self, prefix: str, git_dict: Any, actual_dict: Any, ) -> dict[str, tuple[Any, Any]]: """遞迴展開並比對兩個 dict,回傳 {field_path: (git_val, actual_val)}""" diffs: dict[str, tuple[Any, Any]] = {} if not isinstance(git_dict, dict) or not isinstance(actual_dict, dict): if git_dict != actual_dict: diffs[prefix] = (git_dict, actual_dict) return diffs all_keys = set(git_dict.keys()) | set(actual_dict.keys()) for key in all_keys: path = f"{prefix}.{key}" git_val = git_dict.get(key) actual_val = actual_dict.get(key) if git_val == actual_val: continue if isinstance(git_val, dict) and isinstance(actual_val, dict): diffs.update(self._flatten_diff(path, git_val, actual_val)) else: diffs[path] = (git_val, actual_val) return diffs @staticmethod def _pattern_matches(pattern: str, field_path: str) -> bool: """ 匹配 field_path 是否符合 pattern。 支援兩種萬用字元: - [*] → 任意索引 (e.g. containers[*] 匹配 containers[0], containers[1]) - * → 任意字串段 2026-04-05 Claude Code: I4 修正 — 舊邏輯直接 strip [*] 導致 containers[*].image 無法匹配 containers[0].image (首席架構師 Review I4) """ import re as _re # 將 pattern 轉為正則:[*] → \[\d+\],* → [^.]+ regex = _re.escape(pattern) regex = regex.replace(r"\[\*\]", r"\[\d+\]") regex = regex.replace(r"\*", r"[^.]+") # 允許 pattern 是前綴(field_path 可能更深,. 或 [ 或字串結尾均可) return bool(_re.match(f"^{regex}(\\.|\\[|$)", field_path)) def _is_allowlisted(self, field_path: str) -> bool: """判斷欄位是否在白名單(靜默記錄不告警)""" return any(self._pattern_matches(p, field_path) for p in self._allowlist) def _is_critical(self, field_path: str) -> bool: """判斷欄位是否為關鍵欄位(HIGH 等級)""" return any(self._pattern_matches(p, field_path) for p in self._critical_fields) # ============================================================================= # Singleton # ============================================================================= _detector: DriftDetector | None = None def get_drift_detector() -> DriftDetector: global _detector if _detector is None: _detector = DriftDetector() return _detector