788 lines
30 KiB
Python
788 lines
30 KiB
Python
"""
|
||
Drift Detector - Phase 25 P2 Config Drift Detection
|
||
=====================================================
|
||
職責:比對 Git YAML vs K8s 實際狀態,輸出結構化 DriftItem 列表
|
||
不判斷嚴重性,不解釋意圖,只做事實比對
|
||
|
||
版本: v1.0
|
||
建立: 2026-04-04 (台北時區)
|
||
建立者: ogt (首席架構師設計) + Claude Code (實作)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import subprocess
|
||
import uuid
|
||
from copy import deepcopy
|
||
from pathlib import Path
|
||
from typing import Any
|
||
from urllib.error import HTTPError, URLError
|
||
from urllib.parse import quote
|
||
from urllib.request import Request, urlopen
|
||
|
||
import structlog
|
||
import yaml
|
||
|
||
from src.models.drift import DriftItem, DriftLevel, DriftReport
|
||
|
||
logger = structlog.get_logger(__name__)
|
||
|
||
# 白名單欄位(靜默記錄,不告警)
|
||
_DEFAULT_ALLOWLIST_FIELDS = frozenset([
|
||
"spec.replicas",
|
||
"spec.template.spec.containers[*].resources.requests",
|
||
"spec.template.spec.containers[*].resources.limits",
|
||
"metadata.annotations",
|
||
"metadata.labels.pod-template-hash",
|
||
"metadata.resourceVersion",
|
||
"metadata.generation",
|
||
"metadata.uid",
|
||
"status",
|
||
# K8s 執行時自動填入的欄位(Git manifest 不指定,K8s 注入預設值或運行時資訊)
|
||
# 2026-04-16 ogt + Claude Sonnet 4.6: 修復 kubectl rollout restart 觸發假 drift 告警
|
||
# 根因:awoooi-web rollout restart 後 restartedAt annotation 被偵測為 "medium" drift
|
||
"spec.template.metadata.annotations", # kubectl.kubernetes.io/restartedAt
|
||
"spec.template.metadata.creationTimestamp",
|
||
"spec.template.spec.restartPolicy", # K8s 預設: Always(Git 不指定)
|
||
"spec.template.spec.dnsPolicy", # K8s 預設: ClusterFirst
|
||
"spec.template.spec.terminationGracePeriodSeconds", # K8s 預設: 30
|
||
"spec.template.spec.schedulerName", # K8s 預設: default-scheduler
|
||
"spec.strategy.rollingUpdate", # K8s 預設: maxSurge=25%
|
||
"spec.strategy.type", # K8s 預設: RollingUpdate
|
||
"spec.progressDeadlineSeconds", # K8s 預設: 600
|
||
"spec.revisionHistoryLimit", # K8s 預設: 10
|
||
"metadata.creationTimestamp",
|
||
"spec.template.spec.containers[*].terminationMessagePath",
|
||
"spec.template.spec.containers[*].terminationMessagePolicy",
|
||
"spec.template.spec.containers[*].imagePullPolicy", # K8s 預設: IfNotPresent
|
||
])
|
||
|
||
# 關鍵欄位(必須立即告警)
|
||
_DEFAULT_CRITICAL_FIELDS = frozenset([
|
||
"spec.template.spec.containers[*].image",
|
||
"spec.template.spec.containers[*].env",
|
||
"spec.template.spec.containers[*].ports",
|
||
"spec.template.spec.volumes",
|
||
"spec.template.spec.serviceAccountName",
|
||
])
|
||
|
||
|
||
class GitStateReader:
|
||
"""從 Git HEAD 讀取 K8s YAML 狀態"""
|
||
|
||
def __init__(self, k8s_dir: str = "k8s"):
|
||
self._k8s_dir = Path(k8s_dir)
|
||
|
||
async def read(self, namespace: str) -> dict[str, Any]:
|
||
"""
|
||
讀取 Git HEAD 中指定 namespace 的所有 K8s YAML
|
||
|
||
Returns:
|
||
{resource_key: parsed_yaml_dict}
|
||
resource_key 格式: "{kind}/{name}"
|
||
"""
|
||
try:
|
||
result = await asyncio.get_event_loop().run_in_executor(
|
||
None, self._read_sync, namespace
|
||
)
|
||
return result
|
||
except Exception as e:
|
||
logger.warning("git_state_read_failed", namespace=namespace, error=str(e))
|
||
return {}
|
||
|
||
def _read_sync(self, namespace: str) -> dict[str, Any]:
|
||
remote_resources = self._read_remote_sync(namespace)
|
||
if remote_resources:
|
||
return remote_resources
|
||
|
||
resources: dict[str, Any] = {}
|
||
kustomization_cache: dict[Path, dict[str, Any] | None] = {}
|
||
|
||
if not self._k8s_dir.exists():
|
||
logger.warning("k8s_dir_not_found", path=str(self._k8s_dir))
|
||
return resources
|
||
|
||
for yaml_file in self._k8s_dir.rglob("*.yaml"):
|
||
try:
|
||
with open(yaml_file) as f:
|
||
docs = list(yaml.safe_load_all(f))
|
||
kustomization = self._kustomization_for_file(yaml_file, kustomization_cache)
|
||
for doc in docs:
|
||
if not doc or not isinstance(doc, dict):
|
||
continue
|
||
if kustomization:
|
||
doc = self._apply_kustomization(doc, kustomization)
|
||
metadata = doc.get("metadata", {})
|
||
ns = metadata.get("namespace", "")
|
||
if ns and ns != namespace:
|
||
continue
|
||
kind = doc.get("kind", "")
|
||
name = metadata.get("name", "")
|
||
if kind and name:
|
||
key = f"{kind}/{name}"
|
||
resources[key] = doc
|
||
except Exception as e:
|
||
logger.debug("yaml_parse_failed", file=str(yaml_file), error=str(e))
|
||
|
||
return resources
|
||
|
||
def _read_remote_sync(self, namespace: str) -> dict[str, Any]:
|
||
"""Read drift source of truth from Gitea main when available."""
|
||
if self._k8s_dir.is_absolute():
|
||
return {}
|
||
|
||
try:
|
||
from src.core.config import get_settings
|
||
settings = get_settings()
|
||
except Exception as e:
|
||
logger.debug("drift_remote_settings_unavailable", error=str(e))
|
||
return {}
|
||
|
||
repo_dir = self._k8s_dir.as_posix().strip("/")
|
||
namespace_dir = f"{repo_dir}/{namespace}"
|
||
kustomization_path = f"{namespace_dir}/kustomization.yaml"
|
||
kustomization_text = self._fetch_gitea_raw(settings, kustomization_path)
|
||
if not kustomization_text:
|
||
return {}
|
||
|
||
try:
|
||
config = yaml.safe_load(kustomization_text) or {}
|
||
except Exception as e:
|
||
logger.warning(
|
||
"drift_remote_kustomization_parse_failed",
|
||
path=kustomization_path,
|
||
error=str(e),
|
||
)
|
||
return {}
|
||
if not isinstance(config, dict):
|
||
return {}
|
||
|
||
resources: dict[str, Any] = {}
|
||
resource_paths = self._remote_resource_paths(namespace_dir, config)
|
||
for resource_path in resource_paths:
|
||
content = self._fetch_gitea_raw(settings, resource_path)
|
||
if not content:
|
||
continue
|
||
try:
|
||
docs = list(yaml.safe_load_all(content))
|
||
except Exception as e:
|
||
logger.debug(
|
||
"drift_remote_yaml_parse_failed",
|
||
path=resource_path,
|
||
error=str(e),
|
||
)
|
||
continue
|
||
self._collect_resources(resources, docs, namespace, config)
|
||
|
||
if resources:
|
||
logger.info(
|
||
"drift_git_state_remote_read",
|
||
namespace=namespace,
|
||
resources=len(resources),
|
||
source="gitea_main",
|
||
)
|
||
return resources
|
||
|
||
@staticmethod
|
||
def _remote_resource_paths(namespace_dir: str, config: dict[str, Any]) -> list[str]:
|
||
resources = config.get("resources") or []
|
||
paths: list[str] = []
|
||
for resource in resources:
|
||
resource_str = str(resource)
|
||
if not resource_str.endswith((".yaml", ".yml")):
|
||
continue
|
||
path = (Path(namespace_dir) / resource_str).as_posix()
|
||
paths.append(path)
|
||
return paths
|
||
|
||
def _fetch_gitea_raw(self, settings: Any, path: str) -> str | None:
|
||
api_url = str(settings.GITEA_API_URL).rstrip("/")
|
||
owner = quote(str(settings.GITEA_REPO_OWNER), safe="")
|
||
repo = quote(str(settings.GITEA_REPO_NAME), safe="")
|
||
encoded_path = quote(path, safe="/")
|
||
url = f"{api_url}/api/v1/repos/{owner}/{repo}/raw/{encoded_path}?ref=main"
|
||
headers = {"Accept": "text/plain"}
|
||
token = getattr(settings, "GITEA_API_TOKEN", "")
|
||
if token:
|
||
headers["Authorization"] = f"token {token}"
|
||
|
||
request = Request(url, headers=headers)
|
||
try:
|
||
with urlopen(request, timeout=10) as response:
|
||
return response.read().decode("utf-8")
|
||
except HTTPError as e:
|
||
logger.debug("drift_remote_raw_http_error", path=path, status=e.code)
|
||
except URLError as e:
|
||
logger.debug("drift_remote_raw_url_error", path=path, error=str(e))
|
||
except Exception as e:
|
||
logger.debug("drift_remote_raw_fetch_failed", path=path, error=str(e))
|
||
return None
|
||
|
||
def _collect_resources(
|
||
self,
|
||
resources: dict[str, Any],
|
||
docs: list[Any],
|
||
namespace: str,
|
||
kustomization: dict[str, Any] | None = None,
|
||
) -> None:
|
||
for doc in docs:
|
||
if not doc or not isinstance(doc, dict):
|
||
continue
|
||
if kustomization:
|
||
doc = self._apply_kustomization(doc, kustomization)
|
||
metadata = doc.get("metadata", {})
|
||
ns = metadata.get("namespace", "")
|
||
if ns and ns != namespace:
|
||
continue
|
||
kind = doc.get("kind", "")
|
||
name = metadata.get("name", "")
|
||
if kind and name:
|
||
key = f"{kind}/{name}"
|
||
resources[key] = doc
|
||
|
||
def _kustomization_for_file(
|
||
self,
|
||
yaml_file: Path,
|
||
cache: dict[Path, dict[str, Any] | None],
|
||
) -> dict[str, Any] | None:
|
||
"""Return same-directory Kustomize settings when the file is an included resource."""
|
||
directory = yaml_file.parent
|
||
if yaml_file.name == "kustomization.yaml":
|
||
return None
|
||
if directory not in cache:
|
||
path = directory / "kustomization.yaml"
|
||
if not path.exists():
|
||
cache[directory] = None
|
||
else:
|
||
try:
|
||
with open(path) as f:
|
||
config = yaml.safe_load(f) or {}
|
||
cache[directory] = config if isinstance(config, dict) else None
|
||
except Exception as e:
|
||
logger.debug("kustomization_parse_failed", file=str(path), error=str(e))
|
||
cache[directory] = None
|
||
|
||
config = cache.get(directory)
|
||
if not config:
|
||
return None
|
||
|
||
resources = config.get("resources") or []
|
||
resource_names = {Path(str(resource)).name for resource in resources}
|
||
if resource_names and yaml_file.name not in resource_names:
|
||
return None
|
||
return config
|
||
|
||
def _apply_kustomization(self, resource: dict[str, Any], config: dict[str, Any]) -> dict[str, Any]:
|
||
"""Apply the Kustomize transforms that affect drift-relevant spec fields.
|
||
|
||
The scanner compares Git intent with live ArgoCD output. Reading raw YAML
|
||
skips Kustomize commonLabels and image transforms, which creates repeated
|
||
false drift alerts for selectors, affinity, and image tags.
|
||
"""
|
||
transformed = deepcopy(resource)
|
||
|
||
namespace = config.get("namespace")
|
||
if namespace and transformed.get("kind") != "Namespace":
|
||
metadata = transformed.setdefault("metadata", {})
|
||
metadata.setdefault("namespace", namespace)
|
||
|
||
common_labels = config.get("commonLabels") or {}
|
||
if isinstance(common_labels, dict) and common_labels:
|
||
self._apply_common_labels(transformed, common_labels)
|
||
|
||
images = config.get("images") or []
|
||
if isinstance(images, list) and images:
|
||
self._apply_image_overrides(transformed, images)
|
||
|
||
return transformed
|
||
|
||
def _apply_common_labels(self, resource: dict[str, Any], labels: dict[str, Any]) -> None:
|
||
labels = {str(k): str(v) for k, v in labels.items()}
|
||
kind = resource.get("kind")
|
||
metadata = resource.setdefault("metadata", {})
|
||
self._merge_labels(metadata.setdefault("labels", {}), labels)
|
||
|
||
spec = resource.setdefault("spec", {})
|
||
if kind in {"Deployment", "StatefulSet", "DaemonSet", "ReplicaSet"}:
|
||
selector = spec.setdefault("selector", {}).setdefault("matchLabels", {})
|
||
self._merge_labels(selector, labels)
|
||
template_metadata = spec.setdefault("template", {}).setdefault("metadata", {})
|
||
self._merge_labels(template_metadata.setdefault("labels", {}), labels)
|
||
elif kind == "Service":
|
||
selector = spec.setdefault("selector", {})
|
||
if isinstance(selector, dict):
|
||
self._merge_labels(selector, labels)
|
||
elif kind == "PodDisruptionBudget":
|
||
selector = spec.setdefault("selector", {}).setdefault("matchLabels", {})
|
||
self._merge_labels(selector, labels)
|
||
|
||
self._apply_label_selector_labels(spec, labels)
|
||
|
||
@staticmethod
|
||
def _merge_labels(target: dict[str, Any], labels: dict[str, str]) -> None:
|
||
for key, value in labels.items():
|
||
target.setdefault(key, value)
|
||
|
||
def _apply_label_selector_labels(self, value: Any, labels: dict[str, str]) -> None:
|
||
if isinstance(value, dict):
|
||
selector = value.get("labelSelector")
|
||
if isinstance(selector, dict):
|
||
match_labels = selector.setdefault("matchLabels", {})
|
||
if isinstance(match_labels, dict):
|
||
self._merge_labels(match_labels, labels)
|
||
for child in value.values():
|
||
self._apply_label_selector_labels(child, labels)
|
||
elif isinstance(value, list):
|
||
for child in value:
|
||
self._apply_label_selector_labels(child, labels)
|
||
|
||
def _apply_image_overrides(self, resource: dict[str, Any], images: list[Any]) -> None:
|
||
overrides = [image for image in images if isinstance(image, dict)]
|
||
if not overrides:
|
||
return
|
||
|
||
for pod_spec in self._iter_pod_specs(resource):
|
||
for key in ("containers", "initContainers"):
|
||
containers = pod_spec.get(key)
|
||
if not isinstance(containers, list):
|
||
continue
|
||
for container in containers:
|
||
if not isinstance(container, dict) or "image" not in container:
|
||
continue
|
||
image = container.get("image")
|
||
if isinstance(image, str):
|
||
container["image"] = self._rewrite_image(image, overrides)
|
||
|
||
@staticmethod
|
||
def _iter_pod_specs(resource: dict[str, Any]) -> list[dict[str, Any]]:
|
||
spec = resource.get("spec")
|
||
if not isinstance(spec, dict):
|
||
return []
|
||
|
||
pod_specs: list[dict[str, Any]] = []
|
||
template_spec = spec.get("template", {}).get("spec")
|
||
if isinstance(template_spec, dict):
|
||
pod_specs.append(template_spec)
|
||
|
||
job_template_spec = (
|
||
spec.get("jobTemplate", {})
|
||
.get("spec", {})
|
||
.get("template", {})
|
||
.get("spec")
|
||
)
|
||
if isinstance(job_template_spec, dict):
|
||
pod_specs.append(job_template_spec)
|
||
|
||
if resource.get("kind") == "Pod":
|
||
pod_specs.append(spec)
|
||
|
||
return pod_specs
|
||
|
||
def _rewrite_image(self, current_image: str, overrides: list[dict[str, Any]]) -> str:
|
||
current_name = self._image_name_without_tag(current_image)
|
||
for override in overrides:
|
||
source = str(override.get("name", ""))
|
||
new_name = str(override.get("newName") or self._image_name_without_tag(source))
|
||
new_tag = override.get("newTag")
|
||
source_name = self._image_name_without_tag(source)
|
||
if current_image != source and current_name != source_name:
|
||
continue
|
||
return f"{new_name}:{new_tag}" if new_tag else new_name
|
||
return current_image
|
||
|
||
@staticmethod
|
||
def _image_name_without_tag(image: str) -> str:
|
||
without_digest = image.split("@", 1)[0]
|
||
slash_index = without_digest.rfind("/")
|
||
colon_index = without_digest.rfind(":")
|
||
if colon_index > slash_index:
|
||
return without_digest[:colon_index]
|
||
return without_digest
|
||
|
||
|
||
class K8sStateReader:
|
||
"""從 kubectl 讀取 K8s 實際狀態"""
|
||
|
||
async def read(self, namespace: str) -> dict[str, Any]:
|
||
"""
|
||
透過 kubectl 取得指定 namespace 的實際狀態
|
||
|
||
Returns:
|
||
{resource_key: actual_resource_dict}
|
||
"""
|
||
try:
|
||
result = await asyncio.get_event_loop().run_in_executor(
|
||
None, self._read_sync, namespace
|
||
)
|
||
return result
|
||
except Exception as e:
|
||
logger.warning("k8s_state_read_failed", namespace=namespace, error=str(e))
|
||
return {}
|
||
|
||
def _read_sync(self, namespace: str) -> dict[str, Any]:
|
||
resources: dict[str, Any] = {}
|
||
resource_types = ["deployment", "service", "configmap", "ingress"]
|
||
|
||
for rtype in resource_types:
|
||
try:
|
||
proc = subprocess.run(
|
||
["kubectl", "get", rtype, "-n", namespace, "-o", "yaml"],
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=30,
|
||
)
|
||
if proc.returncode != 0:
|
||
logger.debug("kubectl_failed", type=rtype, stderr=proc.stderr[:200])
|
||
continue
|
||
|
||
data = yaml.safe_load(proc.stdout)
|
||
if not data or data.get("kind") != "List":
|
||
continue
|
||
|
||
for item in data.get("items", []):
|
||
kind = item.get("kind", rtype.capitalize())
|
||
name = item.get("metadata", {}).get("name", "")
|
||
if name:
|
||
key = f"{kind}/{name}"
|
||
resources[key] = item
|
||
|
||
except subprocess.TimeoutExpired:
|
||
logger.warning("kubectl_timeout", type=rtype, namespace=namespace)
|
||
except Exception as e:
|
||
logger.warning("kubectl_error", type=rtype, error=str(e))
|
||
|
||
return resources
|
||
|
||
|
||
class DriftDetector:
|
||
"""
|
||
比對 Git vs K8s 實際狀態,輸出 DriftItem 列表
|
||
|
||
職責邊界:只做事實比對,不判斷嚴重性,不解釋意圖
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
k8s_dir: str = "k8s",
|
||
allowlist_fields: frozenset | None = None,
|
||
critical_fields: frozenset | None = None,
|
||
):
|
||
self._git_reader = GitStateReader(k8s_dir)
|
||
self._k8s_reader = K8sStateReader()
|
||
self._allowlist = allowlist_fields or _DEFAULT_ALLOWLIST_FIELDS
|
||
self._critical_fields = critical_fields or _DEFAULT_CRITICAL_FIELDS
|
||
|
||
async def scan(self, namespace: str, triggered_by: str = "cron") -> DriftReport:
|
||
"""
|
||
掃描指定 namespace 的漂移
|
||
|
||
Args:
|
||
namespace: K8s namespace
|
||
triggered_by: 觸發來源(cron / webhook / api)
|
||
|
||
Returns:
|
||
DriftReport(含 DriftItem 列表,尚未分析 intent)
|
||
"""
|
||
report_id = str(uuid.uuid4())[:8]
|
||
|
||
logger.info("drift_scan_start", namespace=namespace, report_id=report_id)
|
||
|
||
git_state, k8s_state = await asyncio.gather(
|
||
self._git_reader.read(namespace),
|
||
self._k8s_reader.read(namespace),
|
||
)
|
||
|
||
items: list[DriftItem] = []
|
||
|
||
# 比對 Git 中有的資源
|
||
for resource_key, git_resource in git_state.items():
|
||
actual_resource = k8s_state.get(resource_key)
|
||
if actual_resource is None:
|
||
# 資源在 Git 中存在但 K8s 中不存在(可能尚未部署)
|
||
logger.debug("resource_missing_in_k8s", resource=resource_key)
|
||
continue
|
||
|
||
kind, name = resource_key.split("/", 1)
|
||
diffs = self._diff_resources(git_resource, actual_resource, kind, name, namespace)
|
||
items.extend(diffs)
|
||
|
||
high_count = sum(1 for i in items if i.drift_level == DriftLevel.HIGH)
|
||
medium_count = sum(1 for i in items if i.drift_level == DriftLevel.MEDIUM)
|
||
info_count = sum(1 for i in items if i.drift_level == DriftLevel.INFO)
|
||
|
||
logger.info(
|
||
"drift_scan_done",
|
||
namespace=namespace,
|
||
report_id=report_id,
|
||
high=high_count,
|
||
medium=medium_count,
|
||
info=info_count,
|
||
)
|
||
|
||
return DriftReport(
|
||
report_id=report_id,
|
||
namespace=namespace,
|
||
items=items,
|
||
high_count=high_count,
|
||
medium_count=medium_count,
|
||
info_count=info_count,
|
||
triggered_by=triggered_by,
|
||
)
|
||
|
||
def _diff_resources(
|
||
self,
|
||
git_res: dict,
|
||
actual_res: dict,
|
||
kind: str,
|
||
name: str,
|
||
namespace: str,
|
||
) -> list[DriftItem]:
|
||
"""逐欄位比對兩個資源,回傳差異列表"""
|
||
items: list[DriftItem] = []
|
||
|
||
# 只比對 spec 層(metadata 的動態欄位太多)
|
||
git_spec = self._normalized_spec(git_res)
|
||
actual_spec = self._normalized_spec(actual_res)
|
||
|
||
diffs = self._flatten_diff("spec", git_spec, actual_spec)
|
||
for field_path, (git_val, actual_val) in diffs.items():
|
||
is_allowlisted = self._is_allowlisted(field_path)
|
||
if is_allowlisted:
|
||
level = DriftLevel.INFO
|
||
elif self._is_critical(field_path):
|
||
level = DriftLevel.HIGH
|
||
else:
|
||
level = DriftLevel.MEDIUM
|
||
|
||
items.append(DriftItem(
|
||
resource_kind=kind,
|
||
resource_name=name,
|
||
namespace=namespace,
|
||
field_path=field_path,
|
||
git_value=git_val,
|
||
actual_value=actual_val,
|
||
drift_level=level,
|
||
is_allowlisted=is_allowlisted,
|
||
))
|
||
|
||
return items
|
||
|
||
def _normalized_spec(self, resource: dict[str, Any]) -> dict[str, Any]:
|
||
"""Normalize Kubernetes API defaults before field diffing."""
|
||
spec = deepcopy(resource.get("spec", {}))
|
||
if not isinstance(spec, dict):
|
||
return {}
|
||
|
||
kind = resource.get("kind")
|
||
if kind == "Service":
|
||
self._normalize_service_spec(spec)
|
||
if kind in {"Deployment", "StatefulSet", "DaemonSet", "ReplicaSet"}:
|
||
self._normalize_controller_defaults(spec)
|
||
self._normalize_template_defaults(spec)
|
||
return spec
|
||
|
||
@staticmethod
|
||
def _normalize_service_spec(spec: dict[str, Any]) -> None:
|
||
for field in (
|
||
"clusterIP",
|
||
"clusterIPs",
|
||
"ipFamilies",
|
||
"ipFamilyPolicy",
|
||
"internalTrafficPolicy",
|
||
):
|
||
spec.pop(field, None)
|
||
if spec.get("externalTrafficPolicy") == "Cluster":
|
||
spec.pop("externalTrafficPolicy", None)
|
||
if spec.get("sessionAffinity") == "None":
|
||
spec.pop("sessionAffinity", None)
|
||
ports = spec.get("ports")
|
||
if isinstance(ports, list):
|
||
for port in ports:
|
||
if isinstance(port, dict) and port.get("protocol") == "TCP":
|
||
port.pop("protocol", None)
|
||
|
||
def _normalize_controller_defaults(self, spec: dict[str, Any]) -> None:
|
||
if spec.get("progressDeadlineSeconds") == 600:
|
||
spec.pop("progressDeadlineSeconds", None)
|
||
if spec.get("revisionHistoryLimit") == 10:
|
||
spec.pop("revisionHistoryLimit", None)
|
||
strategy = spec.get("strategy")
|
||
if isinstance(strategy, dict):
|
||
rolling_update = strategy.get("rollingUpdate")
|
||
if isinstance(rolling_update, dict):
|
||
if rolling_update.get("maxSurge") == "25%":
|
||
rolling_update.pop("maxSurge", None)
|
||
if rolling_update.get("maxUnavailable") == "25%":
|
||
rolling_update.pop("maxUnavailable", None)
|
||
if not rolling_update:
|
||
strategy.pop("rollingUpdate", None)
|
||
if strategy.get("type") == "RollingUpdate" and len(strategy) == 1:
|
||
spec.pop("strategy", None)
|
||
|
||
def _normalize_template_defaults(self, spec: dict[str, Any]) -> None:
|
||
template = spec.get("template")
|
||
if not isinstance(template, dict):
|
||
return
|
||
|
||
template_metadata = template.get("metadata")
|
||
if isinstance(template_metadata, dict):
|
||
annotations = template_metadata.get("annotations")
|
||
if isinstance(annotations, dict):
|
||
annotations.pop("kubectl.kubernetes.io/restartedAt", None)
|
||
if not annotations:
|
||
template_metadata.pop("annotations", None)
|
||
|
||
pod_spec = template.get("spec")
|
||
if isinstance(pod_spec, dict):
|
||
self._normalize_pod_spec_defaults(pod_spec)
|
||
|
||
def _normalize_pod_spec_defaults(self, pod_spec: dict[str, Any]) -> None:
|
||
defaults = {
|
||
"restartPolicy": "Always",
|
||
"dnsPolicy": "ClusterFirst",
|
||
"schedulerName": "default-scheduler",
|
||
"terminationGracePeriodSeconds": 30,
|
||
}
|
||
for field, default in defaults.items():
|
||
if pod_spec.get(field) == default:
|
||
pod_spec.pop(field, None)
|
||
|
||
if pod_spec.get("securityContext") == {}:
|
||
pod_spec.pop("securityContext", None)
|
||
if pod_spec.get("serviceAccount") == pod_spec.get("serviceAccountName"):
|
||
pod_spec.pop("serviceAccount", None)
|
||
|
||
for key in ("containers", "initContainers"):
|
||
containers = pod_spec.get(key)
|
||
if isinstance(containers, list):
|
||
for container in containers:
|
||
if isinstance(container, dict):
|
||
self._normalize_container_defaults(container)
|
||
|
||
volumes = pod_spec.get("volumes")
|
||
if isinstance(volumes, list):
|
||
for volume in volumes:
|
||
if isinstance(volume, dict):
|
||
self._normalize_volume_defaults(volume)
|
||
|
||
def _normalize_container_defaults(self, container: dict[str, Any]) -> None:
|
||
if container.get("terminationMessagePath") == "/dev/termination-log":
|
||
container.pop("terminationMessagePath", None)
|
||
if container.get("terminationMessagePolicy") == "File":
|
||
container.pop("terminationMessagePolicy", None)
|
||
|
||
ports = container.get("ports")
|
||
if isinstance(ports, list):
|
||
for port in ports:
|
||
if isinstance(port, dict) and port.get("protocol") == "TCP":
|
||
port.pop("protocol", None)
|
||
|
||
for probe_name in ("livenessProbe", "readinessProbe", "startupProbe"):
|
||
probe = container.get(probe_name)
|
||
if isinstance(probe, dict):
|
||
self._normalize_probe_defaults(probe)
|
||
|
||
env = container.get("env")
|
||
if isinstance(env, list):
|
||
for item in env:
|
||
if isinstance(item, dict):
|
||
self._normalize_env_defaults(item)
|
||
|
||
@staticmethod
|
||
def _normalize_probe_defaults(probe: dict[str, Any]) -> None:
|
||
if probe.get("successThreshold") == 1:
|
||
probe.pop("successThreshold", None)
|
||
http_get = probe.get("httpGet")
|
||
if isinstance(http_get, dict) and http_get.get("scheme") == "HTTP":
|
||
http_get.pop("scheme", None)
|
||
|
||
@staticmethod
|
||
def _normalize_env_defaults(env_item: dict[str, Any]) -> None:
|
||
value_from = env_item.get("valueFrom")
|
||
if not isinstance(value_from, dict):
|
||
return
|
||
field_ref = value_from.get("fieldRef")
|
||
if isinstance(field_ref, dict) and field_ref.get("apiVersion") == "v1":
|
||
field_ref.pop("apiVersion", None)
|
||
|
||
@staticmethod
|
||
def _normalize_volume_defaults(volume: dict[str, Any]) -> None:
|
||
for source_key in ("secret", "configMap"):
|
||
source = volume.get(source_key)
|
||
if isinstance(source, dict) and source.get("defaultMode") in {420, "420"}:
|
||
source.pop("defaultMode", None)
|
||
|
||
def _flatten_diff(
|
||
self,
|
||
prefix: str,
|
||
git_dict: Any,
|
||
actual_dict: Any,
|
||
) -> dict[str, tuple[Any, Any]]:
|
||
"""遞迴展開並比對兩個 dict,回傳 {field_path: (git_val, actual_val)}"""
|
||
diffs: dict[str, tuple[Any, Any]] = {}
|
||
|
||
if not isinstance(git_dict, dict) or not isinstance(actual_dict, dict):
|
||
if git_dict != actual_dict:
|
||
diffs[prefix] = (git_dict, actual_dict)
|
||
return diffs
|
||
|
||
all_keys = set(git_dict.keys()) | set(actual_dict.keys())
|
||
for key in all_keys:
|
||
path = f"{prefix}.{key}"
|
||
git_val = git_dict.get(key)
|
||
actual_val = actual_dict.get(key)
|
||
|
||
if git_val == actual_val:
|
||
continue
|
||
|
||
if isinstance(git_val, dict) and isinstance(actual_val, dict):
|
||
diffs.update(self._flatten_diff(path, git_val, actual_val))
|
||
else:
|
||
diffs[path] = (git_val, actual_val)
|
||
|
||
return diffs
|
||
|
||
@staticmethod
|
||
def _pattern_matches(pattern: str, field_path: str) -> bool:
|
||
"""
|
||
匹配 field_path 是否符合 pattern。
|
||
|
||
支援兩種萬用字元:
|
||
- [*] → 任意索引 (e.g. containers[*] 匹配 containers[0], containers[1])
|
||
- * → 任意字串段
|
||
|
||
2026-04-05 Claude Code: I4 修正 — 舊邏輯直接 strip [*] 導致
|
||
containers[*].image 無法匹配 containers[0].image (首席架構師 Review I4)
|
||
"""
|
||
import re as _re
|
||
|
||
# 將 pattern 轉為正則:[*] → \[\d+\],* → [^.]+
|
||
regex = _re.escape(pattern)
|
||
regex = regex.replace(r"\[\*\]", r"\[\d+\]")
|
||
regex = regex.replace(r"\*", r"[^.]+")
|
||
# 允許 pattern 是前綴(field_path 可能更深,. 或 [ 或字串結尾均可)
|
||
return bool(_re.match(f"^{regex}(\\.|\\[|$)", field_path))
|
||
|
||
def _is_allowlisted(self, field_path: str) -> bool:
|
||
"""判斷欄位是否在白名單(靜默記錄不告警)"""
|
||
return any(self._pattern_matches(p, field_path) for p in self._allowlist)
|
||
|
||
def _is_critical(self, field_path: str) -> bool:
|
||
"""判斷欄位是否為關鍵欄位(HIGH 等級)"""
|
||
return any(self._pattern_matches(p, field_path) for p in self._critical_fields)
|
||
|
||
|
||
# =============================================================================
|
||
# Singleton
|
||
# =============================================================================
|
||
|
||
_detector: DriftDetector | None = None
|
||
|
||
|
||
def get_drift_detector() -> DriftDetector:
|
||
global _detector
|
||
if _detector is None:
|
||
_detector = DriftDetector()
|
||
return _detector
|