awoooi/apps/api/src/services/drift_detector.py

"""
Drift Detector - Phase 25 P2 Config Drift Detection
=====================================================
職責：比對 Git YAML vs K8s 實際狀態，輸出結構化 DriftItem 列表
不判斷嚴重性，不解釋意圖，只做事實比對

版本: v1.0
建立: 2026-04-04 (台北時區)
建立者: ogt (首席架構師設計) + Claude Code (實作)
"""

from __future__ import annotations

import asyncio
import subprocess
import uuid
from copy import deepcopy
from pathlib import Path
from typing import Any
from urllib.error import HTTPError, URLError
from urllib.parse import quote
from urllib.request import Request, urlopen

import structlog
import yaml

from src.models.drift import DriftItem, DriftLevel, DriftReport

logger = structlog.get_logger(__name__)

# 白名單欄位（靜默記錄，不告警）
_DEFAULT_ALLOWLIST_FIELDS = frozenset([
    "spec.replicas",
    "spec.template.spec.containers[*].resources.requests",
    "spec.template.spec.containers[*].resources.limits",
    "metadata.annotations",
    "metadata.labels.pod-template-hash",
    "metadata.resourceVersion",
    "metadata.generation",
    "metadata.uid",
    "status",
    # K8s 執行時自動填入的欄位（Git manifest 不指定，K8s 注入預設值或運行時資訊）
    # 2026-04-16 ogt + Claude Sonnet 4.6: 修復 kubectl rollout restart 觸發假 drift 告警
    # 根因：awoooi-web rollout restart 後 restartedAt annotation 被偵測為 "medium" drift
    "spec.template.metadata.annotations",          # kubectl.kubernetes.io/restartedAt
    "spec.template.metadata.creationTimestamp",
    "spec.template.spec.restartPolicy",            # K8s 預設: Always（Git 不指定）
    "spec.template.spec.dnsPolicy",                # K8s 預設: ClusterFirst
    "spec.template.spec.terminationGracePeriodSeconds",  # K8s 預設: 30
    "spec.template.spec.schedulerName",            # K8s 預設: default-scheduler
    "spec.strategy.rollingUpdate",                 # K8s 預設: maxSurge=25%
    "spec.strategy.type",                          # K8s 預設: RollingUpdate
    "spec.progressDeadlineSeconds",                # K8s 預設: 600
    "spec.revisionHistoryLimit",                   # K8s 預設: 10
    "metadata.creationTimestamp",
    "spec.template.spec.containers[*].terminationMessagePath",
    "spec.template.spec.containers[*].terminationMessagePolicy",
    "spec.template.spec.containers[*].imagePullPolicy",  # K8s 預設: IfNotPresent
])

# 關鍵欄位（必須立即告警）
_DEFAULT_CRITICAL_FIELDS = frozenset([
    "spec.template.spec.containers[*].image",
    "spec.template.spec.containers[*].env",
    "spec.template.spec.containers[*].ports",
    "spec.template.spec.volumes",
    "spec.template.spec.serviceAccountName",
])


class GitStateReader:
    """從 Git HEAD 讀取 K8s YAML 狀態"""

    def __init__(self, k8s_dir: str = "k8s"):
        self._k8s_dir = Path(k8s_dir)

    async def read(self, namespace: str) -> dict[str, Any]:
        """
        讀取 Git HEAD 中指定 namespace 的所有 K8s YAML

        Returns:
            {resource_key: parsed_yaml_dict}
            resource_key 格式: "{kind}/{name}"
        """
        try:
            result = await asyncio.get_event_loop().run_in_executor(
                None, self._read_sync, namespace
            )
            return result
        except Exception as e:
            logger.warning("git_state_read_failed", namespace=namespace, error=str(e))
            return {}

    def _read_sync(self, namespace: str) -> dict[str, Any]:
        remote_resources = self._read_remote_sync(namespace)
        if remote_resources:
            return remote_resources

        resources: dict[str, Any] = {}
        kustomization_cache: dict[Path, dict[str, Any] | None] = {}

        if not self._k8s_dir.exists():
            logger.warning("k8s_dir_not_found", path=str(self._k8s_dir))
            return resources

        for yaml_file in self._k8s_dir.rglob("*.yaml"):
            try:
                with open(yaml_file) as f:
                    docs = list(yaml.safe_load_all(f))
                kustomization = self._kustomization_for_file(yaml_file, kustomization_cache)
                for doc in docs:
                    if not doc or not isinstance(doc, dict):
                        continue
                    if kustomization:
                        doc = self._apply_kustomization(doc, kustomization)
                    metadata = doc.get("metadata", {})
                    ns = metadata.get("namespace", "")
                    if ns and ns != namespace:
                        continue
                    kind = doc.get("kind", "")
                    name = metadata.get("name", "")
                    if kind and name:
                        key = f"{kind}/{name}"
                        resources[key] = doc
            except Exception as e:
                logger.debug("yaml_parse_failed", file=str(yaml_file), error=str(e))

        return resources

    def _read_remote_sync(self, namespace: str) -> dict[str, Any]:
        """Read drift source of truth from Gitea main when available."""
        if self._k8s_dir.is_absolute():
            return {}

        try:
            from src.core.config import get_settings
            settings = get_settings()
        except Exception as e:
            logger.debug("drift_remote_settings_unavailable", error=str(e))
            return {}

        repo_dir = self._k8s_dir.as_posix().strip("/")
        namespace_dir = f"{repo_dir}/{namespace}"
        kustomization_path = f"{namespace_dir}/kustomization.yaml"
        kustomization_text = self._fetch_gitea_raw(settings, kustomization_path)
        if not kustomization_text:
            return {}

        try:
            config = yaml.safe_load(kustomization_text) or {}
        except Exception as e:
            logger.warning(
                "drift_remote_kustomization_parse_failed",
                path=kustomization_path,
                error=str(e),
            )
            return {}
        if not isinstance(config, dict):
            return {}

        resources: dict[str, Any] = {}
        resource_paths = self._remote_resource_paths(namespace_dir, config)
        for resource_path in resource_paths:
            content = self._fetch_gitea_raw(settings, resource_path)
            if not content:
                continue
            try:
                docs = list(yaml.safe_load_all(content))
            except Exception as e:
                logger.debug(
                    "drift_remote_yaml_parse_failed",
                    path=resource_path,
                    error=str(e),
                )
                continue
            self._collect_resources(resources, docs, namespace, config)

        if resources:
            logger.info(
                "drift_git_state_remote_read",
                namespace=namespace,
                resources=len(resources),
                source="gitea_main",
            )
        return resources

    @staticmethod
    def _remote_resource_paths(namespace_dir: str, config: dict[str, Any]) -> list[str]:
        resources = config.get("resources") or []
        paths: list[str] = []
        for resource in resources:
            resource_str = str(resource)
            if not resource_str.endswith((".yaml", ".yml")):
                continue
            path = (Path(namespace_dir) / resource_str).as_posix()
            paths.append(path)
        return paths

    def _fetch_gitea_raw(self, settings: Any, path: str) -> str | None:
        api_url = str(settings.GITEA_API_URL).rstrip("/")
        owner = quote(str(settings.GITEA_REPO_OWNER), safe="")
        repo = quote(str(settings.GITEA_REPO_NAME), safe="")
        encoded_path = quote(path, safe="/")
        url = f"{api_url}/api/v1/repos/{owner}/{repo}/raw/{encoded_path}?ref=main"
        headers = {"Accept": "text/plain"}
        token = getattr(settings, "GITEA_API_TOKEN", "")
        if token:
            headers["Authorization"] = f"token {token}"

        request = Request(url, headers=headers)
        try:
            with urlopen(request, timeout=10) as response:
                return response.read().decode("utf-8")
        except HTTPError as e:
            logger.debug("drift_remote_raw_http_error", path=path, status=e.code)
        except URLError as e:
            logger.debug("drift_remote_raw_url_error", path=path, error=str(e))
        except Exception as e:
            logger.debug("drift_remote_raw_fetch_failed", path=path, error=str(e))
        return None

    def _collect_resources(
        self,
        resources: dict[str, Any],
        docs: list[Any],
        namespace: str,
        kustomization: dict[str, Any] | None = None,
    ) -> None:
        for doc in docs:
            if not doc or not isinstance(doc, dict):
                continue
            if kustomization:
                doc = self._apply_kustomization(doc, kustomization)
            metadata = doc.get("metadata", {})
            ns = metadata.get("namespace", "")
            if ns and ns != namespace:
                continue
            kind = doc.get("kind", "")
            name = metadata.get("name", "")
            if kind and name:
                key = f"{kind}/{name}"
                resources[key] = doc

    def _kustomization_for_file(
        self,
        yaml_file: Path,
        cache: dict[Path, dict[str, Any] | None],
    ) -> dict[str, Any] | None:
        """Return same-directory Kustomize settings when the file is an included resource."""
        directory = yaml_file.parent
        if yaml_file.name == "kustomization.yaml":
            return None
        if directory not in cache:
            path = directory / "kustomization.yaml"
            if not path.exists():
                cache[directory] = None
            else:
                try:
                    with open(path) as f:
                        config = yaml.safe_load(f) or {}
                    cache[directory] = config if isinstance(config, dict) else None
                except Exception as e:
                    logger.debug("kustomization_parse_failed", file=str(path), error=str(e))
                    cache[directory] = None

        config = cache.get(directory)
        if not config:
            return None

        resources = config.get("resources") or []
        resource_names = {Path(str(resource)).name for resource in resources}
        if resource_names and yaml_file.name not in resource_names:
            return None
        return config

    def _apply_kustomization(self, resource: dict[str, Any], config: dict[str, Any]) -> dict[str, Any]:
        """Apply the Kustomize transforms that affect drift-relevant spec fields.

        The scanner compares Git intent with live ArgoCD output. Reading raw YAML
        skips Kustomize commonLabels and image transforms, which creates repeated
        false drift alerts for selectors, affinity, and image tags.
        """
        transformed = deepcopy(resource)

        namespace = config.get("namespace")
        if namespace and transformed.get("kind") != "Namespace":
            metadata = transformed.setdefault("metadata", {})
            metadata.setdefault("namespace", namespace)

        common_labels = config.get("commonLabels") or {}
        if isinstance(common_labels, dict) and common_labels:
            self._apply_common_labels(transformed, common_labels)

        images = config.get("images") or []
        if isinstance(images, list) and images:
            self._apply_image_overrides(transformed, images)

        return transformed

    def _apply_common_labels(self, resource: dict[str, Any], labels: dict[str, Any]) -> None:
        labels = {str(k): str(v) for k, v in labels.items()}
        kind = resource.get("kind")
        metadata = resource.setdefault("metadata", {})
        self._merge_labels(metadata.setdefault("labels", {}), labels)

        spec = resource.setdefault("spec", {})
        if kind in {"Deployment", "StatefulSet", "DaemonSet", "ReplicaSet"}:
            selector = spec.setdefault("selector", {}).setdefault("matchLabels", {})
            self._merge_labels(selector, labels)
            template_metadata = spec.setdefault("template", {}).setdefault("metadata", {})
            self._merge_labels(template_metadata.setdefault("labels", {}), labels)
        elif kind == "Service":
            selector = spec.setdefault("selector", {})
            if isinstance(selector, dict):
                self._merge_labels(selector, labels)
        elif kind == "PodDisruptionBudget":
            selector = spec.setdefault("selector", {}).setdefault("matchLabels", {})
            self._merge_labels(selector, labels)

        self._apply_label_selector_labels(spec, labels)

    @staticmethod
    def _merge_labels(target: dict[str, Any], labels: dict[str, str]) -> None:
        for key, value in labels.items():
            target.setdefault(key, value)

    def _apply_label_selector_labels(self, value: Any, labels: dict[str, str]) -> None:
        if isinstance(value, dict):
            selector = value.get("labelSelector")
            if isinstance(selector, dict):
                match_labels = selector.setdefault("matchLabels", {})
                if isinstance(match_labels, dict):
                    self._merge_labels(match_labels, labels)
            for child in value.values():
                self._apply_label_selector_labels(child, labels)
        elif isinstance(value, list):
            for child in value:
                self._apply_label_selector_labels(child, labels)

    def _apply_image_overrides(self, resource: dict[str, Any], images: list[Any]) -> None:
        overrides = [image for image in images if isinstance(image, dict)]
        if not overrides:
            return

        for pod_spec in self._iter_pod_specs(resource):
            for key in ("containers", "initContainers"):
                containers = pod_spec.get(key)
                if not isinstance(containers, list):
                    continue
                for container in containers:
                    if not isinstance(container, dict) or "image" not in container:
                        continue
                    image = container.get("image")
                    if isinstance(image, str):
                        container["image"] = self._rewrite_image(image, overrides)

    @staticmethod
    def _iter_pod_specs(resource: dict[str, Any]) -> list[dict[str, Any]]:
        spec = resource.get("spec")
        if not isinstance(spec, dict):
            return []

        pod_specs: list[dict[str, Any]] = []
        template_spec = spec.get("template", {}).get("spec")
        if isinstance(template_spec, dict):
            pod_specs.append(template_spec)

        job_template_spec = (
            spec.get("jobTemplate", {})
            .get("spec", {})
            .get("template", {})
            .get("spec")
        )
        if isinstance(job_template_spec, dict):
            pod_specs.append(job_template_spec)

        if resource.get("kind") == "Pod":
            pod_specs.append(spec)

        return pod_specs

    def _rewrite_image(self, current_image: str, overrides: list[dict[str, Any]]) -> str:
        current_name = self._image_name_without_tag(current_image)
        for override in overrides:
            source = str(override.get("name", ""))
            new_name = str(override.get("newName") or self._image_name_without_tag(source))
            new_tag = override.get("newTag")
            source_name = self._image_name_without_tag(source)
            if current_image != source and current_name != source_name:
                continue
            return f"{new_name}:{new_tag}" if new_tag else new_name
        return current_image

    @staticmethod
    def _image_name_without_tag(image: str) -> str:
        without_digest = image.split("@", 1)[0]
        slash_index = without_digest.rfind("/")
        colon_index = without_digest.rfind(":")
        if colon_index > slash_index:
            return without_digest[:colon_index]
        return without_digest


class K8sStateReader:
    """從 kubectl 讀取 K8s 實際狀態"""

    async def read(self, namespace: str) -> dict[str, Any]:
        """
        透過 kubectl 取得指定 namespace 的實際狀態

        Returns:
            {resource_key: actual_resource_dict}
        """
        try:
            result = await asyncio.get_event_loop().run_in_executor(
                None, self._read_sync, namespace
            )
            return result
        except Exception as e:
            logger.warning("k8s_state_read_failed", namespace=namespace, error=str(e))
            return {}

    def _read_sync(self, namespace: str) -> dict[str, Any]:
        resources: dict[str, Any] = {}
        resource_types = ["deployment", "service", "configmap", "ingress"]

        for rtype in resource_types:
            try:
                proc = subprocess.run(
                    ["kubectl", "get", rtype, "-n", namespace, "-o", "yaml"],
                    capture_output=True,
                    text=True,
                    timeout=30,
                )
                if proc.returncode != 0:
                    logger.debug("kubectl_failed", type=rtype, stderr=proc.stderr[:200])
                    continue

                data = yaml.safe_load(proc.stdout)
                if not data or data.get("kind") != "List":
                    continue

                for item in data.get("items", []):
                    kind = item.get("kind", rtype.capitalize())
                    name = item.get("metadata", {}).get("name", "")
                    if name:
                        key = f"{kind}/{name}"
                        resources[key] = item

            except subprocess.TimeoutExpired:
                logger.warning("kubectl_timeout", type=rtype, namespace=namespace)
            except Exception as e:
                logger.warning("kubectl_error", type=rtype, error=str(e))

        return resources


class DriftDetector:
    """
    比對 Git vs K8s 實際狀態，輸出 DriftItem 列表

    職責邊界：只做事實比對，不判斷嚴重性，不解釋意圖
    """

    def __init__(
        self,
        k8s_dir: str = "k8s",
        allowlist_fields: frozenset | None = None,
        critical_fields: frozenset | None = None,
    ):
        self._git_reader = GitStateReader(k8s_dir)
        self._k8s_reader = K8sStateReader()
        self._allowlist = allowlist_fields or _DEFAULT_ALLOWLIST_FIELDS
        self._critical_fields = critical_fields or _DEFAULT_CRITICAL_FIELDS

    async def scan(self, namespace: str, triggered_by: str = "cron") -> DriftReport:
        """
        掃描指定 namespace 的漂移

        Args:
            namespace: K8s namespace
            triggered_by: 觸發來源（cron / webhook / api）

        Returns:
            DriftReport（含 DriftItem 列表，尚未分析 intent）
        """
        report_id = str(uuid.uuid4())[:8]

        logger.info("drift_scan_start", namespace=namespace, report_id=report_id)

        git_state, k8s_state = await asyncio.gather(
            self._git_reader.read(namespace),
            self._k8s_reader.read(namespace),
        )

        items: list[DriftItem] = []

        # 比對 Git 中有的資源
        for resource_key, git_resource in git_state.items():
            actual_resource = k8s_state.get(resource_key)
            if actual_resource is None:
                # 資源在 Git 中存在但 K8s 中不存在（可能尚未部署）
                logger.debug("resource_missing_in_k8s", resource=resource_key)
                continue

            kind, name = resource_key.split("/", 1)
            diffs = self._diff_resources(git_resource, actual_resource, kind, name, namespace)
            items.extend(diffs)

        high_count = sum(1 for i in items if i.drift_level == DriftLevel.HIGH)
        medium_count = sum(1 for i in items if i.drift_level == DriftLevel.MEDIUM)
        info_count = sum(1 for i in items if i.drift_level == DriftLevel.INFO)

        logger.info(
            "drift_scan_done",
            namespace=namespace,
            report_id=report_id,
            high=high_count,
            medium=medium_count,
            info=info_count,
        )

        return DriftReport(
            report_id=report_id,
            namespace=namespace,
            items=items,
            high_count=high_count,
            medium_count=medium_count,
            info_count=info_count,
            triggered_by=triggered_by,
        )

    def _diff_resources(
        self,
        git_res: dict,
        actual_res: dict,
        kind: str,
        name: str,
        namespace: str,
    ) -> list[DriftItem]:
        """逐欄位比對兩個資源，回傳差異列表"""
        items: list[DriftItem] = []

        # 只比對 spec 層（metadata 的動態欄位太多）
        git_spec = self._normalized_spec(git_res)
        actual_spec = self._normalized_spec(actual_res)

        diffs = self._flatten_diff("spec", git_spec, actual_spec)
        for field_path, (git_val, actual_val) in diffs.items():
            is_allowlisted = self._is_allowlisted(field_path)
            if is_allowlisted:
                level = DriftLevel.INFO
            elif self._is_critical(field_path):
                level = DriftLevel.HIGH
            else:
                level = DriftLevel.MEDIUM

            items.append(DriftItem(
                resource_kind=kind,
                resource_name=name,
                namespace=namespace,
                field_path=field_path,
                git_value=git_val,
                actual_value=actual_val,
                drift_level=level,
                is_allowlisted=is_allowlisted,
            ))

        return items

    def _normalized_spec(self, resource: dict[str, Any]) -> dict[str, Any]:
        """Normalize Kubernetes API defaults before field diffing."""
        spec = deepcopy(resource.get("spec", {}))
        if not isinstance(spec, dict):
            return {}

        kind = resource.get("kind")
        if kind == "Service":
            self._normalize_service_spec(spec)
        if kind in {"Deployment", "StatefulSet", "DaemonSet", "ReplicaSet"}:
            self._normalize_controller_defaults(spec)
        self._normalize_template_defaults(spec)
        return spec

    @staticmethod
    def _normalize_service_spec(spec: dict[str, Any]) -> None:
        for field in (
            "clusterIP",
            "clusterIPs",
            "ipFamilies",
            "ipFamilyPolicy",
            "internalTrafficPolicy",
        ):
            spec.pop(field, None)
        if spec.get("externalTrafficPolicy") == "Cluster":
            spec.pop("externalTrafficPolicy", None)
        if spec.get("sessionAffinity") == "None":
            spec.pop("sessionAffinity", None)
        ports = spec.get("ports")
        if isinstance(ports, list):
            for port in ports:
                if isinstance(port, dict) and port.get("protocol") == "TCP":
                    port.pop("protocol", None)

    def _normalize_controller_defaults(self, spec: dict[str, Any]) -> None:
        if spec.get("progressDeadlineSeconds") == 600:
            spec.pop("progressDeadlineSeconds", None)
        if spec.get("revisionHistoryLimit") == 10:
            spec.pop("revisionHistoryLimit", None)
        strategy = spec.get("strategy")
        if isinstance(strategy, dict):
            rolling_update = strategy.get("rollingUpdate")
            if isinstance(rolling_update, dict):
                if rolling_update.get("maxSurge") == "25%":
                    rolling_update.pop("maxSurge", None)
                if rolling_update.get("maxUnavailable") == "25%":
                    rolling_update.pop("maxUnavailable", None)
                if not rolling_update:
                    strategy.pop("rollingUpdate", None)
            if strategy.get("type") == "RollingUpdate" and len(strategy) == 1:
                spec.pop("strategy", None)

    def _normalize_template_defaults(self, spec: dict[str, Any]) -> None:
        template = spec.get("template")
        if not isinstance(template, dict):
            return

        template_metadata = template.get("metadata")
        if isinstance(template_metadata, dict):
            annotations = template_metadata.get("annotations")
            if isinstance(annotations, dict):
                annotations.pop("kubectl.kubernetes.io/restartedAt", None)
                if not annotations:
                    template_metadata.pop("annotations", None)

        pod_spec = template.get("spec")
        if isinstance(pod_spec, dict):
            self._normalize_pod_spec_defaults(pod_spec)

    def _normalize_pod_spec_defaults(self, pod_spec: dict[str, Any]) -> None:
        defaults = {
            "restartPolicy": "Always",
            "dnsPolicy": "ClusterFirst",
            "schedulerName": "default-scheduler",
            "terminationGracePeriodSeconds": 30,
        }
        for field, default in defaults.items():
            if pod_spec.get(field) == default:
                pod_spec.pop(field, None)

        if pod_spec.get("securityContext") == {}:
            pod_spec.pop("securityContext", None)
        if pod_spec.get("serviceAccount") == pod_spec.get("serviceAccountName"):
            pod_spec.pop("serviceAccount", None)

        for key in ("containers", "initContainers"):
            containers = pod_spec.get(key)
            if isinstance(containers, list):
                for container in containers:
                    if isinstance(container, dict):
                        self._normalize_container_defaults(container)

        volumes = pod_spec.get("volumes")
        if isinstance(volumes, list):
            for volume in volumes:
                if isinstance(volume, dict):
                    self._normalize_volume_defaults(volume)

    def _normalize_container_defaults(self, container: dict[str, Any]) -> None:
        if container.get("terminationMessagePath") == "/dev/termination-log":
            container.pop("terminationMessagePath", None)
        if container.get("terminationMessagePolicy") == "File":
            container.pop("terminationMessagePolicy", None)

        ports = container.get("ports")
        if isinstance(ports, list):
            for port in ports:
                if isinstance(port, dict) and port.get("protocol") == "TCP":
                    port.pop("protocol", None)

        for probe_name in ("livenessProbe", "readinessProbe", "startupProbe"):
            probe = container.get(probe_name)
            if isinstance(probe, dict):
                self._normalize_probe_defaults(probe)

        env = container.get("env")
        if isinstance(env, list):
            for item in env:
                if isinstance(item, dict):
                    self._normalize_env_defaults(item)

    @staticmethod
    def _normalize_probe_defaults(probe: dict[str, Any]) -> None:
        if probe.get("successThreshold") == 1:
            probe.pop("successThreshold", None)
        http_get = probe.get("httpGet")
        if isinstance(http_get, dict) and http_get.get("scheme") == "HTTP":
            http_get.pop("scheme", None)

    @staticmethod
    def _normalize_env_defaults(env_item: dict[str, Any]) -> None:
        value_from = env_item.get("valueFrom")
        if not isinstance(value_from, dict):
            return
        field_ref = value_from.get("fieldRef")
        if isinstance(field_ref, dict) and field_ref.get("apiVersion") == "v1":
            field_ref.pop("apiVersion", None)

    @staticmethod
    def _normalize_volume_defaults(volume: dict[str, Any]) -> None:
        for source_key in ("secret", "configMap"):
            source = volume.get(source_key)
            if isinstance(source, dict) and source.get("defaultMode") in {420, "420"}:
                source.pop("defaultMode", None)

    def _flatten_diff(
        self,
        prefix: str,
        git_dict: Any,
        actual_dict: Any,
    ) -> dict[str, tuple[Any, Any]]:
        """遞迴展開並比對兩個 dict，回傳 {field_path: (git_val, actual_val)}"""
        diffs: dict[str, tuple[Any, Any]] = {}

        if not isinstance(git_dict, dict) or not isinstance(actual_dict, dict):
            if git_dict != actual_dict:
                diffs[prefix] = (git_dict, actual_dict)
            return diffs

        all_keys = set(git_dict.keys()) | set(actual_dict.keys())
        for key in all_keys:
            path = f"{prefix}.{key}"
            git_val = git_dict.get(key)
            actual_val = actual_dict.get(key)

            if git_val == actual_val:
                continue

            if isinstance(git_val, dict) and isinstance(actual_val, dict):
                diffs.update(self._flatten_diff(path, git_val, actual_val))
            else:
                diffs[path] = (git_val, actual_val)

        return diffs

    @staticmethod
    def _pattern_matches(pattern: str, field_path: str) -> bool:
        """
        匹配 field_path 是否符合 pattern。

        支援兩種萬用字元：
          - [*]  → 任意索引 (e.g. containers[*] 匹配 containers[0], containers[1])
          - *    → 任意字串段

        2026-04-05 Claude Code: I4 修正 — 舊邏輯直接 strip [*] 導致
        containers[*].image 無法匹配 containers[0].image (首席架構師 Review I4)
        """
        import re as _re

        # 將 pattern 轉為正則：[*] → \[\d+\]，* → [^.]+
        regex = _re.escape(pattern)
        regex = regex.replace(r"\[\*\]", r"\[\d+\]")
        regex = regex.replace(r"\*", r"[^.]+")
        # 允許 pattern 是前綴（field_path 可能更深，. 或 [ 或字串結尾均可）
        return bool(_re.match(f"^{regex}(\\.|\\[|$)", field_path))

    def _is_allowlisted(self, field_path: str) -> bool:
        """判斷欄位是否在白名單（靜默記錄不告警）"""
        return any(self._pattern_matches(p, field_path) for p in self._allowlist)

    def _is_critical(self, field_path: str) -> bool:
        """判斷欄位是否為關鍵欄位（HIGH 等級）"""
        return any(self._pattern_matches(p, field_path) for p in self._critical_fields)


# =============================================================================
# Singleton
# =============================================================================

_detector: DriftDetector | None = None


def get_drift_detector() -> DriftDetector:
    global _detector
    if _detector is None:
        _detector = DriftDetector()
    return _detector