Files
awoooi/apps/api/src/services/drift_adopt_service.py
Your Name 0367dde686
All checks were successful
Code Review / ai-code-review (push) Successful in 9s
CD Pipeline / tests (push) Successful in 1m4s
CD Pipeline / build-and-deploy (push) Successful in 3m41s
CD Pipeline / post-deploy-checks (push) Successful in 1m39s
fix(drift): dedupe blocked auto-adopt escalations
2026-05-19 00:13:41 +08:00

498 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Drift Adopt Service — ADR-057: Gitea PR API 實作
=================================================
職責:將合法漂移反向寫回 Git透過 Gitea PR API 而非直接 git push
設計邊界(核心原則):
- 不直接操作 git不 git add/commit/push
- 透過 Gitea REST API 建立 branch + commit + PR
- PR 需 SRE 手動 review 後 merge不自動 merge main
- 只修改漂移相關的 YAML 欄位(不 add -A
流程:
adopt() 被呼叫
→ 讀取 k8s/ 目錄對應 YAML 檔
→ 建立 drift/adopt-{report_id[:8]}-{ts} branch
→ 透過 Gitea API commit YAML 更新
→ 建立 PR + 推送 Telegram 通知
版本: v1.0
建立: 2026-04-05 (台北時區)
建立者: Claude Code (ADR-057 實作)
關聯 ADR: ADR-057
"""
from __future__ import annotations
import base64
import pathlib
from typing import TYPE_CHECKING
import httpx
import structlog
from src.core.config import get_settings
from src.utils.timezone import now_taipei
if TYPE_CHECKING:
from src.models.drift import DriftInterpretation, DriftReport
logger = structlog.get_logger(__name__)
class DriftAdoptService:
"""
透過 Gitea PR API 將漂移寫回 Git
ADR-057 安全設計:
✅ 使用 Gitea API不在 API Pod 內執行 git
✅ 建立 PR 讓 SRE review不直接 push main
✅ 只 commit k8s/ 目錄的 YAML不 git add -A
✅ API Token 從 K8s Secret 注入(不寫死)
"""
def __init__(self) -> None:
settings = get_settings()
self._api_url = settings.GITEA_API_URL.rstrip("/")
self._token = settings.GITEA_API_TOKEN
self._owner = settings.GITEA_REPO_OWNER
self._repo = settings.GITEA_REPO_NAME
self._k8s_dir = pathlib.Path("k8s")
async def auto_adopt_if_safe(self, report: "DriftReport") -> dict:
"""
低風險 drift 自動採納2026-04-24 ogt + Claude Sonnet 4.6
自動採納條件(全部滿足):
1. high_count == 0無 HIGH drift
2. medium_count <= 5MEDIUM drift 不超過 5 項)
3. actionable_count <= 10非白名單非 trivial 項目不超過 10
4. interpretation.confidence >= 0.70(意圖確定性足夠)
5. interpretation.intent NOT IN {unknown}(必須有明確意圖)
6. interpretation.risk NOT IN {HIGH, CRITICAL}(非高風險)
不滿足任一條件 → return {"success": False, "reason": "...", "skipped": True}
自動採納 PR title 加 [AUTO] 前綴,通知標示「請 SRE 複核後 merge」
Returns:
{"success": bool, "skipped": bool, "reason": str, "pr_url": str | None}
skipped=True → 條件不符,人工卡片照舊推送
skipped=False → 嘗試了,但 Gitea API 失敗
"""
interp = report.interpretation
# 條件 1: 無 HIGH drift
if report.high_count > 0:
return {
"success": False,
"skipped": True,
"reason": f"存在 {report.high_count} 項 HIGH drift不自動採納",
"pr_url": None,
}
# 條件 2: MEDIUM 不超過 5 項
if report.medium_count > 5:
return {
"success": False,
"skipped": True,
"reason": f"MEDIUM drift {report.medium_count} 項超過上限 5不自動採納",
"pr_url": None,
}
# 條件 3: actionable 不超過 10過濾白名單 + trivial
actionable_items = [
item for item in report.items
if not item.is_allowlisted
and not self._is_trivial_drift(item.git_value, item.actual_value)
]
actionable_count = len(actionable_items)
if actionable_count > 10:
return {
"success": False,
"skipped": True,
"reason": f"可操作漂移 {actionable_count} 項超過上限 10不自動採納",
"pr_url": None,
}
# 條件 4 / 5 / 6: 需要有 interpretation
if not interp:
return {
"success": False,
"skipped": True,
"reason": "尚無 Nemotron 意圖分析,不自動採納",
"pr_url": None,
}
if interp.confidence < 0.70:
return {
"success": False,
"skipped": True,
"reason": f"意圖信心 {interp.confidence:.0%} < 70%,不自動採納",
"pr_url": None,
}
if interp.intent.value == "unknown":
return {
"success": False,
"skipped": True,
"reason": "Nemotron 意圖為 unknown不自動採納",
"pr_url": None,
}
if interp.risk.upper() in ("HIGH", "CRITICAL"):
return {
"success": False,
"skipped": True,
"reason": f"Nemotron 風險評估為 {interp.risk},不自動採納",
"pr_url": None,
}
logger.info(
"drift_auto_adopt_eligible",
report_id=report.report_id,
medium_count=report.medium_count,
actionable=actionable_count,
intent=interp.intent.value,
confidence=interp.confidence,
risk=interp.risk,
)
# 通過全部條件 → 執行採納suppress_notification=True 避免與下方通知重複)
result = await self.adopt(report, field_description="[AUTO] 低風險自動採納", suppress_notification=True)
if result.get("success"):
# 通知 SRE無按鈕標示需複核
await self._notify_auto_adopt_telegram(result["pr_url"], report, actionable_count, interp)
return {
"success": result.get("success", False),
"skipped": False,
"reason": result.get("message", ""),
"pr_url": result.get("pr_url"),
}
@staticmethod
def _is_trivial_drift(git_val, actual_val) -> bool:
"""
判斷是否為 K8s controller 自動補齊的噪音
(與 DriftNarratorService._is_trivial_drift 邏輯一致)
"""
def _is_empty(v) -> bool:
if v is None:
return True
s = str(v).strip()
return s in ("", "{}", "[]", "null", "None", "false", "False", "0")
return _is_empty(git_val) and _is_empty(actual_val)
async def _notify_auto_adopt_telegram(
self,
pr_url: str,
report: "DriftReport",
actionable_count: int,
interp: "DriftInterpretation",
) -> None:
"""
自動採納成功後的無按鈕 Telegram 通知TYPE-1 純資訊)
標示「已自動建立 PR請 SRE 複核後 merge」
"""
try:
from src.services.telegram_gateway import get_telegram_gateway
tg = get_telegram_gateway()
message = (
f"Namespace: {report.namespace}\n"
f"漂移摘要: {report.summary}(可操作 {actionable_count} 項)\n"
f"Nemotron 意圖: {interp.intent.value} | 信心: {interp.confidence:.0%} | 風險: {interp.risk}\n\n"
f"PR: {pr_url}\n\n"
f"請 SRE 複核後 merge。"
)
await tg.send_info_notification(
incident_id=report.report_id,
title="Config Drift 已自動採納(低風險)",
message=message,
alertname="ConfigDriftAutoAdopt",
severity="info",
)
except Exception as e:
logger.warning("drift_auto_adopt_telegram_failed", error=str(e))
# =========================================================================
# Public: 人工按鈕觸發入口
# =========================================================================
async def adopt_drift(self, report_id: str) -> dict:
"""
2026-04-20 ogt + Claude Opus 4.7: Telegram 按鈕呼叫入口
從 DB 載入 DriftReport 後委派給 adopt()。
telegram_gateway._handle_drift_action 呼叫此方法。
"""
from src.repositories.drift_repository import get_drift_repository
report = await get_drift_repository().get(report_id)
if not report:
return {"success": False, "message": f"Report {report_id} not found"}
return await self.adopt(report)
async def adopt(self, report: "DriftReport", field_description: str = "", suppress_notification: bool = False) -> dict:
"""
將漂移寫回 Git建立 branch + commit + PR
Args:
report: 漂移報告(含具體 drift items
field_description: 漂移欄位說明(用於 PR title
Returns:
{"success": bool, "pr_url": str, "message": str}
"""
if not self._token:
return {
"success": False,
"message": "GITEA_API_TOKEN 未設定,請在 K8s Secret 中新增此 key",
"pr_url": None,
}
ts = now_taipei().strftime("%Y%m%d%H%M%S")
branch_name = f"drift/adopt-{report.report_id[:8]}-{ts}"
pr_title = f"chore: adopt drift — {report.namespace} {field_description or report.summary}"
logger.info(
"drift_adopt_start",
report_id=report.report_id,
branch=branch_name,
)
try:
async with httpx.AsyncClient(timeout=30.0) as client:
headers = {
"Authorization": f"token {self._token}",
"Content-Type": "application/json",
}
# Step 1: 取得 main branch 的最新 SHA
main_sha = await self._get_main_sha(client, headers)
if not main_sha:
return {"success": False, "message": "無法取得 main branch SHA", "pr_url": None}
# Step 2: 建立新 branch
branch_ok = await self._create_branch(client, headers, branch_name, main_sha)
if not branch_ok:
return {"success": False, "message": f"建立 branch {branch_name} 失敗", "pr_url": None}
# Step 3: 找出受影響的 YAML 檔並 commit 更新
committed_files = await self._commit_drift_yaml(client, headers, branch_name, report)
if not committed_files:
return {
"success": False,
"message": "無直接匹配的 YAML 檔,未建立零 diff 承認 PR",
"pr_url": None,
}
# Step 4: 建立 PR
pr_url = await self._create_pr(
client, headers, branch_name, pr_title, report, committed_files
)
if not pr_url:
return {"success": False, "message": "建立 PR 失敗", "pr_url": None}
# Step 5: 推送 Telegram 通知auto_adopt_if_safe 會自己發,避免重複)
if not suppress_notification:
await self._notify_telegram(pr_url, report, pr_title)
logger.info("drift_adopt_pr_created", report_id=report.report_id, pr_url=pr_url)
return {
"success": True,
"message": "PR 已建立,請 SRE review 後 merge",
"pr_url": pr_url,
}
except Exception as e:
logger.error("drift_adopt_failed", report_id=report.report_id, error=str(e))
return {"success": False, "message": f"adopt 失敗: {str(e)}", "pr_url": None}
# =========================================================================
# Private helpers
# =========================================================================
async def _get_main_sha(self, client: httpx.AsyncClient, headers: dict) -> str | None:
"""取得 main branch 最新 commit SHA"""
resp = await client.get(
f"{self._api_url}/api/v1/repos/{self._owner}/{self._repo}/branches/main",
headers=headers,
)
if resp.status_code == 200:
return resp.json()["commit"]["id"]
logger.error("drift_adopt_get_main_sha_failed", status=resp.status_code)
return None
async def _create_branch(
self, client: httpx.AsyncClient, headers: dict, branch_name: str, main_sha: str
) -> bool:
"""建立新 branch"""
resp = await client.post(
f"{self._api_url}/api/v1/repos/{self._owner}/{self._repo}/branches",
headers=headers,
json={"new_branch_name": branch_name, "old_branch_name": "main"},
)
if resp.status_code in (200, 201):
return True
logger.error("drift_adopt_create_branch_failed", status=resp.status_code, body=resp.text[:200])
return False
async def _commit_drift_yaml(
self, client: httpx.AsyncClient, headers: dict, branch_name: str, report: "DriftReport"
) -> list[str]:
"""
找出受漂移影響的 YAML 檔,更新實際值後 commit 到 branch
目前策略:標記 YAML 檔為「已承認漂移」(加注解),
等 ADR-057 Phase 2 再實作精確 YAML patch 邏輯。
"""
committed = []
affected_kinds = {
item.resource_kind.lower()
for item in report.items
if not item.is_allowlisted
}
for yaml_file in sorted(self._k8s_dir.rglob("*.yaml")):
if not yaml_file.is_file():
continue
# 判斷此 YAML 是否與漂移相關
file_stem = yaml_file.stem.lower()
if not any(kind in file_stem for kind in affected_kinds):
continue
try:
content = yaml_file.read_text()
# 取得 Gitea 上的 file SHA用於 update API
file_sha = await self._get_file_sha(client, headers, str(yaml_file))
# 在檔案末尾加入漂移承認注解
from src.utils.timezone import now_taipei as _now
ts_str = _now().strftime("%Y-%m-%d %H:%M:%S +0800")
annotation = (
f"\n# [drift-adopted] {ts_str}\n"
f"# Report: {report.report_id}\n"
f"# Namespace: {report.namespace}\n"
f"# Summary: {report.summary}\n"
)
updated_content = content + annotation
encoded = base64.b64encode(updated_content.encode()).decode()
# Commit 到 branch
payload = {
"message": f"chore(drift): adopt {report.namespace} drift — {yaml_file.name}",
"content": encoded,
"branch": branch_name,
}
if file_sha:
payload["sha"] = file_sha
url = f"{self._api_url}/api/v1/repos/{self._owner}/{self._repo}/contents/{yaml_file}"
resp = await client.put(url, headers=headers, json=payload)
if resp.status_code in (200, 201):
committed.append(str(yaml_file))
logger.info("drift_adopt_file_committed", file=str(yaml_file))
else:
logger.warning("drift_adopt_file_commit_failed", file=str(yaml_file), status=resp.status_code)
except Exception as e:
logger.warning("drift_adopt_file_error", file=str(yaml_file), error=str(e))
return committed
async def _get_file_sha(
self, client: httpx.AsyncClient, headers: dict, file_path: str
) -> str | None:
"""取得 Gitea 上檔案的 SHAupdate 時需要)"""
resp = await client.get(
f"{self._api_url}/api/v1/repos/{self._owner}/{self._repo}/contents/{file_path}",
headers=headers,
params={"ref": "main"},
)
if resp.status_code == 200:
return resp.json().get("sha")
return None
async def _create_pr(
self,
client: httpx.AsyncClient,
headers: dict,
branch_name: str,
pr_title: str,
report: "DriftReport",
committed_files: list[str],
) -> str | None:
"""建立 Pull Request回傳 PR URL"""
files_md = "\n".join(f"- `{f}`" for f in committed_files) if committed_files else "(無直接匹配的 YAML 檔)"
intent_label = "❓ 意圖不明"
if report.interpretation:
intent_map = {
"emergency_hotfix": "🚨 緊急 Hotfix",
"human_error": "⚠️ 人為誤操作",
"automated_change": "🤖 系統自動變更",
"unknown": "❓ 意圖不明",
}
intent_label = intent_map.get(report.interpretation.intent.value, "❓ 意圖不明")
body = (
f"## Config Drift 承認\n\n"
f"**Report ID**: `{report.report_id}`\n"
f"**Namespace**: `{report.namespace}`\n"
f"**漂移摘要**: {report.summary}\n"
f"**Nemotron 意圖**: {intent_label}\n"
f"{f'**說明**: {report.interpretation.explanation}' if report.interpretation else ''}\n\n"
f"## 異動檔案\n\n{files_md}\n\n"
f"## 說明\n\n"
f"此 PR 由 AWOOOI Config Drift Detection 系統自動建立。\n"
f"承認此次 K8s 漂移為合法變更,將漂移狀態寫回 Git。\n\n"
f"> **SRE 確認事項**:\n"
f"> - [ ] 確認漂移是預期的合法變更\n"
f"> - [ ] 確認 YAML 注解正確反映變更意圖\n"
f"> - [ ] merge 後手動更新 K8s YAML 的實際差異值\n"
)
resp = await client.post(
f"{self._api_url}/api/v1/repos/{self._owner}/{self._repo}/pulls",
headers=headers,
json={
"title": pr_title,
"body": body,
"head": branch_name,
"base": "main",
},
)
if resp.status_code in (200, 201):
pr_data = resp.json()
return pr_data.get("html_url") or pr_data.get("url")
logger.error("drift_adopt_create_pr_failed", status=resp.status_code, body=resp.text[:300])
return None
async def _notify_telegram(self, pr_url: str, report: "DriftReport", pr_title: str) -> None:
"""推送 Telegram 通知 SRE"""
try:
from src.services.telegram_gateway import get_telegram_gateway
tg = get_telegram_gateway()
await tg.send_text(
f"📋 <b>Config Drift 承認 PR 已建立</b>\n"
f"Namespace: {report.namespace}\n"
f"漂移: {report.summary}\n\n"
f"PR: {pr_url}\n\n"
f"請 SRE review 後 merge。"
)
except Exception as e:
logger.warning("drift_adopt_telegram_failed", error=str(e))
# =============================================================================
# Singleton
# =============================================================================
_adopt_service: DriftAdoptService | None = None
def get_drift_adopt_service() -> DriftAdoptService:
global _adopt_service
if _adopt_service is None:
_adopt_service = DriftAdoptService()
return _adopt_service