""" AWOOOI AIOps Phase 5 — GitOps PR Service(GitOps 高風險修復 PR) ================================================================ 職責:當 Blast Radius > 50(tier=dual)時,在 Gitea 建立 Issue 記錄修復計畫, 等待雙人審核後方可執行。 設計原則: 1. 只建立 Gitea Issue(不直接推 PR)— 修復計畫在 Issue 描述,含 rollback plan 2. 非阻塞:建立失敗不影響主路徑(fallback → 人工審核 Telegram 通知) 3. Issue 標題含 [AI-Repair] 前綴,方便篩選 4. 連線失敗時記錄 warning,不拋出例外 NOTE: 使用 Issue 而非 PR 的原因—— 修復動作是 kubectl 命令,不是代碼變更,沒有對應 diff 可以 PR。 Issue 提供人類可讀的審計軌跡,並觸發 Gitea 通知。 ADR-086: Phase 5 Declarative 修復與 Blast Radius 分控 2026-04-15 ogt + Claude Sonnet 4.6(亞太): Phase 5 初始建立 """ from __future__ import annotations from dataclasses import dataclass from typing import TYPE_CHECKING import structlog if TYPE_CHECKING: from src.services.declarative_remediation import DeclarativeSpec logger = structlog.get_logger(__name__) # ───────────────────────────────────────────────────────────────────────────── # Data Types # ───────────────────────────────────────────────────────────────────────────── @dataclass class GitOpsPRResult: """GitOps Issue 建立結果""" success: bool issue_url: str | None # Gitea Issue URL issue_number: int | None # Issue 編號 error: str | None # 失敗時的錯誤訊息 # ───────────────────────────────────────────────────────────────────────────── # Main Service # ───────────────────────────────────────────────────────────────────────────── class GitOpsPRService: """ GitOps 高風險修復 Issue 建立器 Usage: svc = GitOpsPRService() result = await svc.create_repair_issue(spec, incident_id="INC-001") if result.success: print(result.issue_url) """ async def create_repair_issue( self, spec: "DeclarativeSpec", incident_id: str, diagnosis: str = "", ) -> GitOpsPRResult: """ 在 Gitea 建立高風險修復審核 Issue。 Args: spec: DeclarativeSpec(必須是 tier=dual) incident_id: 關聯的 Incident ID diagnosis: 診斷摘要(供人類理解上下文) Returns: GitOpsPRResult """ if not spec.requires_gitops_pr: return GitOpsPRResult(success=False, issue_url=None, issue_number=None, error="spec.tier 不是 dual,無需 GitOps PR") from src.core.feature_flags import aiops_flags if not aiops_flags.AIOPS_P5_GITOPS_PR: logger.info( "gitops_pr_skipped_feature_flag", incident_id=incident_id, blast_radius=spec.blast_radius_score, ) return GitOpsPRResult(success=False, issue_url=None, issue_number=None, error="AIOPS_P5_GITOPS_PR=False,跳過 Gitea Issue 建立") title = f"[AI-Repair] 高風險修復審核(Blast={spec.blast_radius_score})— {incident_id}" body = _build_issue_body(spec, incident_id, diagnosis) return await self._create_gitea_issue(title, body, incident_id) async def _create_gitea_issue( self, title: str, body: str, incident_id: str, ) -> GitOpsPRResult: """呼叫 Gitea API 建立 Issue。""" import httpx from src.core.config import settings url = ( f"{settings.GITEA_API_URL}/repos/" f"{settings.GITEA_REPO_OWNER}/{settings.GITEA_REPO_NAME}/issues" ) headers = { "Authorization": f"token {settings.GITEA_API_TOKEN}", "Content-Type": "application/json", } payload = { "title": title[:255], "body": body, "labels": [], } try: async with httpx.AsyncClient(timeout=10.0) as client: resp = await client.post(url, json=payload, headers=headers) resp.raise_for_status() data = resp.json() issue_number = data.get("number") issue_url = data.get("html_url", "") logger.info( "gitops_issue_created", incident_id=incident_id, issue_number=issue_number, issue_url=issue_url, ) return GitOpsPRResult( success=True, issue_url=issue_url, issue_number=issue_number, error=None, ) except Exception as e: logger.warning( "gitops_issue_create_failed", incident_id=incident_id, error=str(e), ) return GitOpsPRResult(success=False, issue_url=None, issue_number=None, error=str(e)) # ───────────────────────────────────────────────────────────────────────────── # Helpers # ───────────────────────────────────────────────────────────────────────────── def _build_issue_body( spec: "DeclarativeSpec", incident_id: str, diagnosis: str, ) -> str: """建立 Gitea Issue 描述(Markdown 格式)。""" constraints_md = "\n".join(f"- {c}" for c in spec.constraints) or "(無額外約束)" return f"""## AI 自主修復審核請求 **Incident ID**: `{incident_id}` **Blast Radius Score**: `{spec.blast_radius_score}` / 100(tier: `{spec.tier}`) **需要**: 雙人審核後方可執行 --- ## 修復計畫 **目標狀態**: {spec.target_state} **執行命令**: ```bash {spec.action} ``` **命名空間**: `{spec.namespace}` **目標資源**: `{spec.target}` **需要 dry-run**: {'✅ 是' if spec.dry_run_required else '⬜ 否'} --- ## 安全約束 {constraints_md} --- ## 回滾計畫 ```bash {spec.rollback_plan} ``` --- ## 爆炸半徑計分依據 {spec.blast_reason} --- ## 診斷摘要 {diagnosis[:1000] if diagnosis else '(未提供診斷摘要)'} --- ## 審核流程 1. SRE-1 確認問題診斷正確,評估修復計畫 2. SRE-2 交叉驗證,確認回滾計畫可行 3. 兩人均在 Telegram 回覆 `/approve {incident_id}` 後,系統自動執行 > 此 Issue 由 AWOOOI AI 自主修復系統(Phase 5 ADR-086)自動建立。 """ # ───────────────────────────────────────────────────────────────────────────── # Singleton # ───────────────────────────────────────────────────────────────────────────── _service: GitOpsPRService | None = None def get_gitops_pr_service() -> GitOpsPRService: global _service if _service is None: _service = GitOpsPRService() return _service