fix(api): connect approval execution truth chain
This commit is contained in:
@@ -27,6 +27,7 @@ from pydantic import BaseModel
|
|||||||
|
|
||||||
from src.core.config import settings
|
from src.core.config import settings
|
||||||
from src.core.logging import get_logger
|
from src.core.logging import get_logger
|
||||||
|
from src.services.approval_action_classifier import is_no_action_approval_action
|
||||||
from src.services.approval_db import get_approval_service
|
from src.services.approval_db import get_approval_service
|
||||||
from src.services.approval_execution import get_execution_service
|
from src.services.approval_execution import get_execution_service
|
||||||
from src.services.incident_approval_service import get_incident_approval_service
|
from src.services.incident_approval_service import get_incident_approval_service
|
||||||
@@ -117,6 +118,15 @@ async def _finalize_telegram_approval(approval, execution_triggered: bool) -> bo
|
|||||||
"""
|
"""
|
||||||
if not execution_triggered:
|
if not execution_triggered:
|
||||||
return False
|
return False
|
||||||
|
approval_action = getattr(approval, "action", None)
|
||||||
|
if approval_action is not None and is_no_action_approval_action(approval_action):
|
||||||
|
logger.warning(
|
||||||
|
"telegram_approval_execution_suppressed_no_repair_action",
|
||||||
|
approval_id=str(getattr(approval, "id", "")),
|
||||||
|
incident_id=getattr(approval, "incident_id", None),
|
||||||
|
action=str(approval_action)[:200],
|
||||||
|
)
|
||||||
|
return False
|
||||||
return _schedule_telegram_approved_execution(approval)
|
return _schedule_telegram_approved_execution(approval)
|
||||||
|
|
||||||
|
|
||||||
@@ -313,6 +323,12 @@ async def telegram_webhook(
|
|||||||
approval=approval,
|
approval=approval,
|
||||||
execution_triggered=execution_triggered,
|
execution_triggered=execution_triggered,
|
||||||
)
|
)
|
||||||
|
approval_action = getattr(approval, "action", None)
|
||||||
|
execution_suppressed = bool(
|
||||||
|
execution_triggered
|
||||||
|
and approval_action is not None
|
||||||
|
and is_no_action_approval_action(approval_action)
|
||||||
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
"telegram_approval_signed",
|
"telegram_approval_signed",
|
||||||
approval_id=approval_id,
|
approval_id=approval_id,
|
||||||
@@ -320,16 +336,22 @@ async def telegram_webhook(
|
|||||||
status=status_value,
|
status=status_value,
|
||||||
execution_triggered=execution_triggered,
|
execution_triggered=execution_triggered,
|
||||||
execution_scheduled=execution_scheduled,
|
execution_scheduled=execution_scheduled,
|
||||||
|
execution_suppressed=execution_suppressed,
|
||||||
)
|
)
|
||||||
await _log_user_action("approve", True, getattr(approval, "incident_id", None))
|
await _log_user_action("approve", True, getattr(approval, "incident_id", None))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"ok": True,
|
"ok": True,
|
||||||
"message": "Approved" if execution_triggered else "Signed",
|
"message": (
|
||||||
|
"ApprovedWithoutExecution"
|
||||||
|
if execution_suppressed
|
||||||
|
else ("Approved" if execution_triggered else "Signed")
|
||||||
|
),
|
||||||
"approval_id": approval_id,
|
"approval_id": approval_id,
|
||||||
"status": status_value,
|
"status": status_value,
|
||||||
"execution_triggered": execution_triggered,
|
"execution_triggered": execution_triggered,
|
||||||
"execution_scheduled": execution_scheduled,
|
"execution_scheduled": execution_scheduled,
|
||||||
|
"execution_suppressed": execution_suppressed,
|
||||||
}
|
}
|
||||||
|
|
||||||
elif action == "reject":
|
elif action == "reject":
|
||||||
|
|||||||
@@ -2254,9 +2254,9 @@ async def _process_new_alert_background(
|
|||||||
"playbook_id": _matched_playbook_id_cs4,
|
"playbook_id": _matched_playbook_id_cs4,
|
||||||
}
|
}
|
||||||
fallback_create = ApprovalRequestCreate(
|
fallback_create = ApprovalRequestCreate(
|
||||||
action="OBSERVE",
|
action="NO_ACTION - REPAIR_CANDIDATE_MISSING: LLM 分析失敗,尚未產生可安全執行的修復指令",
|
||||||
description=f"[LLM Failed] {message}",
|
description=f"[LLM Failed] {message}",
|
||||||
risk_level=RiskLevel.MEDIUM,
|
risk_level=RiskLevel.LOW,
|
||||||
blast_radius=BlastRadius(
|
blast_radius=BlastRadius(
|
||||||
affected_pods=1,
|
affected_pods=1,
|
||||||
estimated_downtime="unknown",
|
estimated_downtime="unknown",
|
||||||
@@ -2277,9 +2277,9 @@ async def _process_new_alert_background(
|
|||||||
# 2026-04-27 Claude Sonnet 4.6: shadow-run Step2 — 只記 log,不改執行決策
|
# 2026-04-27 Claude Sonnet 4.6: shadow-run Step2 — 只記 log,不改執行決策
|
||||||
try:
|
try:
|
||||||
_shadow_proposal_cs4 = {
|
_shadow_proposal_cs4 = {
|
||||||
"risk_level": "medium",
|
"risk_level": "low",
|
||||||
"confidence": 0.0,
|
"confidence": 0.0,
|
||||||
"action": "OBSERVE",
|
"action": fallback_create.action,
|
||||||
"kubectl_command": "",
|
"kubectl_command": "",
|
||||||
"is_rule_based": False,
|
"is_rule_based": False,
|
||||||
"source": "fallback",
|
"source": "fallback",
|
||||||
@@ -2371,10 +2371,10 @@ async def _process_new_alert_background(
|
|||||||
|
|
||||||
await _push_to_telegram_background(
|
await _push_to_telegram_background(
|
||||||
approval_id=str(approval.id),
|
approval_id=str(approval.id),
|
||||||
risk_level="medium",
|
risk_level="low",
|
||||||
resource_name=target_resource,
|
resource_name=target_resource,
|
||||||
root_cause=message,
|
root_cause=message,
|
||||||
suggested_action="OBSERVE",
|
suggested_action=fallback_create.action,
|
||||||
estimated_downtime="unknown",
|
estimated_downtime="unknown",
|
||||||
hit_count=1,
|
hit_count=1,
|
||||||
primary_responsibility="HUMAN",
|
primary_responsibility="HUMAN",
|
||||||
|
|||||||
@@ -24,3 +24,8 @@ def is_no_action_approval_action(action: str | None) -> bool:
|
|||||||
or upper.startswith("OBSERVE")
|
or upper.startswith("OBSERVE")
|
||||||
or upper.startswith("INVESTIGATE")
|
or upper.startswith("INVESTIGATE")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def is_executable_repair_approval_action(action: str | None) -> bool:
|
||||||
|
"""Return True when approving the action should schedule a repair executor."""
|
||||||
|
return not is_no_action_approval_action(action)
|
||||||
|
|||||||
@@ -34,6 +34,10 @@ from src.models.approval import (
|
|||||||
RiskLevel,
|
RiskLevel,
|
||||||
Signature,
|
Signature,
|
||||||
)
|
)
|
||||||
|
from src.services.approval_action_classifier import (
|
||||||
|
is_executable_repair_approval_action,
|
||||||
|
is_no_action_approval_action,
|
||||||
|
)
|
||||||
|
|
||||||
logger = structlog.get_logger(__name__)
|
logger = structlog.get_logger(__name__)
|
||||||
|
|
||||||
@@ -703,10 +707,21 @@ class ApprovalDBService:
|
|||||||
if new_sig_count >= record.required_signatures:
|
if new_sig_count >= record.required_signatures:
|
||||||
new_status = ApprovalStatus.APPROVED
|
new_status = ApprovalStatus.APPROVED
|
||||||
resolved_at = datetime.now(UTC)
|
resolved_at = datetime.now(UTC)
|
||||||
execution_triggered = True
|
execution_triggered = is_executable_repair_approval_action(
|
||||||
|
record.action
|
||||||
|
)
|
||||||
|
|
||||||
# Phase 5: 樂觀鎖更新 - 使用 WHERE current_signatures = old_value
|
# Phase 5: 樂觀鎖更新 - 使用 WHERE current_signatures = old_value
|
||||||
# 如果其他人已更新,這個 UPDATE 會更新 0 行
|
# 如果其他人已更新,這個 UPDATE 會更新 0 行
|
||||||
|
metadata = dict(record.extra_metadata or {})
|
||||||
|
if is_no_action_approval_action(record.action):
|
||||||
|
metadata["execution_kind"] = metadata.get("execution_kind") or "no_action"
|
||||||
|
metadata["repair_executed"] = False
|
||||||
|
metadata["repair_attempted"] = False
|
||||||
|
metadata["execution_suppressed_reason"] = (
|
||||||
|
"approval_action_has_no_executable_repair"
|
||||||
|
)
|
||||||
|
|
||||||
result = await db.execute(
|
result = await db.execute(
|
||||||
update(ApprovalRecord)
|
update(ApprovalRecord)
|
||||||
.where(and_(
|
.where(and_(
|
||||||
@@ -718,6 +733,7 @@ class ApprovalDBService:
|
|||||||
current_signatures=new_sig_count,
|
current_signatures=new_sig_count,
|
||||||
status=new_status,
|
status=new_status,
|
||||||
resolved_at=resolved_at,
|
resolved_at=resolved_at,
|
||||||
|
extra_metadata=metadata,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -571,6 +571,16 @@ class ApprovalExecutionService:
|
|||||||
repair_executed=repair_executed,
|
repair_executed=repair_executed,
|
||||||
repair_attempted=repair_attempted,
|
repair_attempted=repair_attempted,
|
||||||
)
|
)
|
||||||
|
if repair_attempted:
|
||||||
|
await self._record_approved_repair_execution(
|
||||||
|
approval=approval,
|
||||||
|
success=result.success,
|
||||||
|
error_message=None if result.success else result.error,
|
||||||
|
operation_type=operation_type,
|
||||||
|
resource_name=resource_name,
|
||||||
|
namespace=namespace,
|
||||||
|
duration_ms=result.duration_ms,
|
||||||
|
)
|
||||||
|
|
||||||
# Update approval status based on result
|
# Update approval status based on result
|
||||||
total_attempts = attempt # attempt 在重試迴圈後為最終嘗試次數
|
total_attempts = attempt # attempt 在重試迴圈後為最終嘗試次數
|
||||||
@@ -631,12 +641,33 @@ class ApprovalExecutionService:
|
|||||||
approval_id=str(approval.id),
|
approval_id=str(approval.id),
|
||||||
timeout_sec=30.0,
|
timeout_sec=30.0,
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(
|
||||||
|
self.write_execution_result_to_km(
|
||||||
|
approval=approval,
|
||||||
|
success=True,
|
||||||
|
error_message=None,
|
||||||
|
),
|
||||||
|
timeout=15.0,
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
logger.warning(
|
||||||
|
"execution_km_write_timeout",
|
||||||
|
approval_id=str(approval.id),
|
||||||
|
timeout_sec=15.0,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"execution_km_write_failed",
|
||||||
|
approval_id=str(approval.id),
|
||||||
|
error=str(exc),
|
||||||
|
)
|
||||||
|
|
||||||
# ADR-081 Phase 1 + ADR-090 修復 (2026-04-19 ogt + Claude Opus 4.7):
|
# ADR-081 Phase 1 + ADR-090 修復 (2026-04-19 ogt + Claude Opus 4.7):
|
||||||
# PostExecutionVerifier 改 await + 60s timeout,確保 verification_result 必寫入。
|
# PostExecutionVerifier 改 await + 60s timeout,確保 verification_result 必寫入。
|
||||||
# 之前 fire-and-forget 在 Pod recycle 時 task 被殺,導致 1212 筆 evidence 全 NULL.
|
# 之前 fire-and-forget 在 Pod recycle 時 task 被殺,導致 1212 筆 evidence 全 NULL.
|
||||||
from src.core.feature_flags import aiops_flags
|
from src.core.feature_flags import aiops_flags
|
||||||
if aiops_flags.is_sub_flag_enabled("AIOPS_P1_POST_EXECUTION_VERIFIER"):
|
if repair_executed or aiops_flags.is_sub_flag_enabled("AIOPS_P1_POST_EXECUTION_VERIFIER"):
|
||||||
try:
|
try:
|
||||||
await asyncio.wait_for(
|
await asyncio.wait_for(
|
||||||
self._run_post_execution_verify(
|
self._run_post_execution_verify(
|
||||||
@@ -771,6 +802,28 @@ class ApprovalExecutionService:
|
|||||||
approval_id=str(approval.id),
|
approval_id=str(approval.id),
|
||||||
timeout_sec=30.0,
|
timeout_sec=30.0,
|
||||||
)
|
)
|
||||||
|
if repair_attempted:
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(
|
||||||
|
self.write_execution_result_to_km(
|
||||||
|
approval=approval,
|
||||||
|
success=False,
|
||||||
|
error_message=result.error,
|
||||||
|
),
|
||||||
|
timeout=15.0,
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
logger.warning(
|
||||||
|
"execution_km_write_timeout",
|
||||||
|
approval_id=str(approval.id),
|
||||||
|
timeout_sec=15.0,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"execution_km_write_failed",
|
||||||
|
approval_id=str(approval.id),
|
||||||
|
error=str(exc),
|
||||||
|
)
|
||||||
|
|
||||||
# ADR-090 修復 (2026-04-19 ogt + Claude Opus 4.7):
|
# ADR-090 修復 (2026-04-19 ogt + Claude Opus 4.7):
|
||||||
# 失敗時也跑 verifier,把 verification_result='failed' 回寫 evidence。
|
# 失敗時也跑 verifier,把 verification_result='failed' 回寫 evidence。
|
||||||
@@ -1477,6 +1530,93 @@ class ApprovalExecutionService:
|
|||||||
return None
|
return None
|
||||||
return getattr(risk_level, "value", str(risk_level))
|
return getattr(risk_level, "value", str(risk_level))
|
||||||
|
|
||||||
|
async def _record_approved_repair_execution(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
approval: "ApprovalRequest",
|
||||||
|
success: bool,
|
||||||
|
error_message: str | None,
|
||||||
|
operation_type: OperationType | None,
|
||||||
|
resource_name: str | None,
|
||||||
|
namespace: str | None,
|
||||||
|
duration_ms: int | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Persist the repair evidence for an approved executable action."""
|
||||||
|
incident_id = getattr(approval, "incident_id", None)
|
||||||
|
if not incident_id:
|
||||||
|
logger.info(
|
||||||
|
"approved_repair_execution_record_skipped_no_incident",
|
||||||
|
approval_id=str(getattr(approval, "id", "")),
|
||||||
|
action=str(getattr(approval, "action", ""))[:160],
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if self._is_observation_only_action(getattr(approval, "action", None)):
|
||||||
|
return
|
||||||
|
|
||||||
|
operation_label = (
|
||||||
|
operation_type.value
|
||||||
|
if operation_type is not None and hasattr(operation_type, "value")
|
||||||
|
else str(operation_type or "unknown")
|
||||||
|
)
|
||||||
|
target = resource_name or "unknown"
|
||||||
|
playbook_id = str(getattr(approval, "matched_playbook_id", None) or approval.id)[:36]
|
||||||
|
requested_by = str(getattr(approval, "requested_by", None) or "telegram_approval")
|
||||||
|
triggered_by = (
|
||||||
|
requested_by[:50]
|
||||||
|
if self._is_auto_approved_request(approval)
|
||||||
|
else "human_approved"
|
||||||
|
)
|
||||||
|
playbook_name = f"approval_execute:{operation_label}:{target}"[:200]
|
||||||
|
step = str(getattr(approval, "action", "") or "")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from src.repositories.audit_log_repository import get_auto_repair_execution_repository
|
||||||
|
|
||||||
|
repo = get_auto_repair_execution_repository()
|
||||||
|
existing = await repo.list_by_incident(incident_id)
|
||||||
|
already_recorded = any(
|
||||||
|
str(getattr(row, "playbook_id", "")) == playbook_id
|
||||||
|
and getattr(row, "triggered_by", "") == triggered_by
|
||||||
|
and step in list(getattr(row, "executed_steps", []) or [])
|
||||||
|
for row in existing
|
||||||
|
)
|
||||||
|
if already_recorded:
|
||||||
|
logger.info(
|
||||||
|
"approved_repair_execution_record_already_exists",
|
||||||
|
approval_id=str(approval.id),
|
||||||
|
incident_id=incident_id,
|
||||||
|
playbook_id=playbook_id,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
await repo.create(
|
||||||
|
incident_id=incident_id,
|
||||||
|
playbook_id=playbook_id,
|
||||||
|
playbook_name=playbook_name,
|
||||||
|
success=success,
|
||||||
|
executed_steps=[step],
|
||||||
|
error_message=error_message,
|
||||||
|
triggered_by=triggered_by,
|
||||||
|
risk_level=self._approval_risk_value(approval),
|
||||||
|
execution_time_ms=duration_ms,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"approved_repair_execution_recorded",
|
||||||
|
approval_id=str(approval.id),
|
||||||
|
incident_id=incident_id,
|
||||||
|
operation_type=operation_label,
|
||||||
|
target=target,
|
||||||
|
namespace=namespace,
|
||||||
|
success=success,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"approved_repair_execution_record_failed",
|
||||||
|
approval_id=str(getattr(approval, "id", "")),
|
||||||
|
incident_id=incident_id,
|
||||||
|
error=str(exc),
|
||||||
|
)
|
||||||
|
|
||||||
async def finalize_auto_approved_execution(
|
async def finalize_auto_approved_execution(
|
||||||
self,
|
self,
|
||||||
approval: "ApprovalRequest",
|
approval: "ApprovalRequest",
|
||||||
|
|||||||
@@ -45,6 +45,10 @@ from src.services.awooop_deeplinks import (
|
|||||||
incident_truth_chain_button_row,
|
incident_truth_chain_button_row,
|
||||||
incident_truth_chain_reply_markup,
|
incident_truth_chain_reply_markup,
|
||||||
)
|
)
|
||||||
|
from src.services.approval_action_classifier import (
|
||||||
|
is_executable_repair_approval_action,
|
||||||
|
is_no_action_approval_action,
|
||||||
|
)
|
||||||
from src.services.chat_manager import get_chat_manager
|
from src.services.chat_manager import get_chat_manager
|
||||||
from src.services.operator_outcome import build_operator_outcome
|
from src.services.operator_outcome import build_operator_outcome
|
||||||
from src.services.security_interceptor import (
|
from src.services.security_interceptor import (
|
||||||
@@ -3625,6 +3629,7 @@ class TelegramGateway:
|
|||||||
include_auto_tuning: bool = True,
|
include_auto_tuning: bool = True,
|
||||||
auto_tuning_command: str = "",
|
auto_tuning_command: str = "",
|
||||||
incident_id: str = "",
|
incident_id: str = "",
|
||||||
|
suggested_action: str = "",
|
||||||
# ADR-071-E: TYPE-3 動態按鈕 (2026-04-11 Claude Sonnet 4.6)
|
# ADR-071-E: TYPE-3 動態按鈕 (2026-04-11 Claude Sonnet 4.6)
|
||||||
alert_category: str = "",
|
alert_category: str = "",
|
||||||
notification_type: str = "",
|
notification_type: str = "",
|
||||||
@@ -3661,13 +3666,38 @@ class TelegramGateway:
|
|||||||
approve_nonce = self._security.generate_callback_nonce(approval_id, "approve")
|
approve_nonce = self._security.generate_callback_nonce(approval_id, "approve")
|
||||||
reject_nonce = self._security.generate_callback_nonce(approval_id, "reject")
|
reject_nonce = self._security.generate_callback_nonce(approval_id, "reject")
|
||||||
silence_nonce = self._security.generate_callback_nonce(approval_id, "silence")
|
silence_nonce = self._security.generate_callback_nonce(approval_id, "silence")
|
||||||
|
approval_buttons_enabled = (
|
||||||
|
True
|
||||||
|
if not str(suggested_action or "").strip()
|
||||||
|
else is_executable_repair_approval_action(suggested_action)
|
||||||
|
)
|
||||||
|
|
||||||
# 第一排永遠置頂(HARD RULE,任何路徑不得改動)
|
# 可執行修復卡第一排置頂批准/拒絕;純觀察卡不得提供誤導性的執行批准。
|
||||||
first_row: list[dict] = [
|
first_row: list[dict] = [
|
||||||
{"text": "✅ 批准", "callback_data": approve_nonce},
|
{"text": "✅ 批准", "callback_data": approve_nonce},
|
||||||
{"text": "❌ 拒絕", "callback_data": reject_nonce},
|
{"text": "❌ 拒絕", "callback_data": reject_nonce},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if not approval_buttons_enabled:
|
||||||
|
info_row: list[dict] = []
|
||||||
|
if incident_id:
|
||||||
|
info_row.extend([
|
||||||
|
{"text": "📋 詳情", "callback_data": f"detail:{incident_id}"},
|
||||||
|
{"text": "📊 歷史", "callback_data": f"history:{incident_id}"},
|
||||||
|
])
|
||||||
|
info_row.append({"text": "🔕 靜默", "callback_data": silence_nonce})
|
||||||
|
buttons: list[list[dict]] = [info_row]
|
||||||
|
awooop_row = _awooop_truth_chain_button_row(incident_id)
|
||||||
|
if awooop_row:
|
||||||
|
buttons.append(awooop_row)
|
||||||
|
logger.info(
|
||||||
|
"telegram_keyboard_built",
|
||||||
|
source="non_repair_action",
|
||||||
|
approval_id=approval_id,
|
||||||
|
incident_id=incident_id,
|
||||||
|
)
|
||||||
|
return {"inline_keyboard": buttons}
|
||||||
|
|
||||||
# ── B3: LLM 動態路徑 ─────────────────────────────────────────────────
|
# ── B3: LLM 動態路徑 ─────────────────────────────────────────────────
|
||||||
# 2026-04-27 Claude Sonnet 4.6: B3 — USE_LLM_DYNAMIC_BUTTONS=true 且
|
# 2026-04-27 Claude Sonnet 4.6: B3 — USE_LLM_DYNAMIC_BUTTONS=true 且
|
||||||
# action_plan.recommended_actions 非空時走此路徑,否則 fallback 到 YAML。
|
# action_plan.recommended_actions 非空時走此路徑,否則 fallback 到 YAML。
|
||||||
@@ -3723,7 +3753,7 @@ class TelegramGateway:
|
|||||||
_dynamic_buttons = _build_category_buttons_for(alert_category) if alert_category else []
|
_dynamic_buttons = _build_category_buttons_for(alert_category) if alert_category else []
|
||||||
|
|
||||||
if is_type3 and _dynamic_buttons:
|
if is_type3 and _dynamic_buttons:
|
||||||
# TYPE-3 動態按鈕:批准/拒絕永遠置頂第一行
|
# TYPE-3 動態按鈕:可執行修復卡把批准/拒絕置頂第一行
|
||||||
# 2026-04-17 ogt + Claude Sonnet 4.6 (BUG-C): 強制置頂批准/拒絕
|
# 2026-04-17 ogt + Claude Sonnet 4.6 (BUG-C): 強制置頂批准/拒絕
|
||||||
# 舊:批准/拒絕列在最後且受 requires_human_approval 控制 → K8s 按鈕蓋台 → 死卡
|
# 舊:批准/拒絕列在最後且受 requires_human_approval 控制 → K8s 按鈕蓋台 → 死卡
|
||||||
# 新:[批准][拒絕] 永遠第一行,K8s 類別按鈕置後,SRE 第一眼就看到審核扳機
|
# 新:[批准][拒絕] 永遠第一行,K8s 類別按鈕置後,SRE 第一眼就看到審核扳機
|
||||||
@@ -4098,6 +4128,7 @@ class TelegramGateway:
|
|||||||
include_auto_tuning=bool(auto_tuning_command),
|
include_auto_tuning=bool(auto_tuning_command),
|
||||||
auto_tuning_command=auto_tuning_command,
|
auto_tuning_command=auto_tuning_command,
|
||||||
incident_id=incident_id,
|
incident_id=incident_id,
|
||||||
|
suggested_action=suggested_action,
|
||||||
alert_category=alert_category,
|
alert_category=alert_category,
|
||||||
notification_type=notification_type,
|
notification_type=notification_type,
|
||||||
)
|
)
|
||||||
@@ -4293,6 +4324,7 @@ class TelegramGateway:
|
|||||||
_group_keyboard = await self._build_inline_keyboard(
|
_group_keyboard = await self._build_inline_keyboard(
|
||||||
approval_id=approval_id,
|
approval_id=approval_id,
|
||||||
incident_id=incident_id,
|
incident_id=incident_id,
|
||||||
|
suggested_action=suggested_action,
|
||||||
alert_category=alert_category,
|
alert_category=alert_category,
|
||||||
notification_type=notification_type,
|
notification_type=notification_type,
|
||||||
)
|
)
|
||||||
@@ -8654,6 +8686,7 @@ class TelegramGateway:
|
|||||||
action: str,
|
action: str,
|
||||||
username: str,
|
username: str,
|
||||||
execution_triggered: bool,
|
execution_triggered: bool,
|
||||||
|
approval_action: str | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
2026-04-09 Claude Sonnet 4.6: 批准/拒絕後立即更新 Telegram 訊息狀態。
|
2026-04-09 Claude Sonnet 4.6: 批准/拒絕後立即更新 Telegram 訊息狀態。
|
||||||
@@ -8689,7 +8722,11 @@ class TelegramGateway:
|
|||||||
if action == "approve":
|
if action == "approve":
|
||||||
status_emoji = "✅"
|
status_emoji = "✅"
|
||||||
status_text = f"<b>已批准</b> by {_html.escape(username)}"
|
status_text = f"<b>已批准</b> by {_html.escape(username)}"
|
||||||
suffix = "⚡ 執行中..." if execution_triggered else "已簽核,等待更多簽核"
|
if approval_action is not None and is_no_action_approval_action(approval_action):
|
||||||
|
status_emoji = "🟠"
|
||||||
|
suffix = "已記錄;此卡沒有可執行修復,等待補修復候選"
|
||||||
|
else:
|
||||||
|
suffix = "⚡ 執行中..." if execution_triggered else "已簽核,等待更多簽核"
|
||||||
else:
|
else:
|
||||||
status_emoji = "❌"
|
status_emoji = "❌"
|
||||||
status_text = f"<b>已拒絕</b> by {_html.escape(username)}"
|
status_text = f"<b>已拒絕</b> by {_html.escape(username)}"
|
||||||
@@ -8806,13 +8843,29 @@ class TelegramGateway:
|
|||||||
# 非 PENDING 狀態下 sign_approval early-return → approval 是舊 record
|
# 非 PENDING 狀態下 sign_approval early-return → approval 是舊 record
|
||||||
# 此時不應發「執行中...」,應告知用戶告警已處理過
|
# 此時不應發「執行中...」,應告知用戶告警已處理過
|
||||||
if approval.status == ApprovalStatus.APPROVED and execution_triggered:
|
if approval.status == ApprovalStatus.APPROVED and execution_triggered:
|
||||||
|
_execution_allowed = not is_no_action_approval_action(
|
||||||
|
getattr(approval, "action", None)
|
||||||
|
)
|
||||||
# 2026-04-09 Claude Sonnet 4.6: 回應 Telegram — 更新訊息狀態 + answer callback
|
# 2026-04-09 Claude Sonnet 4.6: 回應 Telegram — 更新訊息狀態 + answer callback
|
||||||
await self._notify_approval_result(
|
await self._notify_approval_result(
|
||||||
message_id=message_id,
|
message_id=message_id,
|
||||||
incident_id=approval_id,
|
incident_id=approval_id,
|
||||||
action="approve",
|
action="approve",
|
||||||
username=username,
|
username=username,
|
||||||
execution_triggered=execution_triggered,
|
execution_triggered=execution_triggered and _execution_allowed,
|
||||||
|
approval_action=getattr(approval, "action", None),
|
||||||
|
)
|
||||||
|
elif (
|
||||||
|
approval.status == ApprovalStatus.APPROVED
|
||||||
|
and is_no_action_approval_action(getattr(approval, "action", None))
|
||||||
|
):
|
||||||
|
await self._notify_approval_result(
|
||||||
|
message_id=message_id,
|
||||||
|
incident_id=approval_id,
|
||||||
|
action="approve",
|
||||||
|
username=username,
|
||||||
|
execution_triggered=False,
|
||||||
|
approval_action=getattr(approval, "action", None),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# 告警已是 execution_failed / execution_success / rejected 等終態
|
# 告警已是 execution_failed / execution_success / rejected 等終態
|
||||||
@@ -8830,7 +8883,11 @@ class TelegramGateway:
|
|||||||
# 原本 gate 用 execution_triggered,race condition 時失效(樂觀鎖失敗)
|
# 原本 gate 用 execution_triggered,race condition 時失效(樂觀鎖失敗)
|
||||||
# 改用 approval.status == APPROVED(與 REST API 路徑 approvals.py:360 對齊)
|
# 改用 approval.status == APPROVED(與 REST API 路徑 approvals.py:360 對齊)
|
||||||
# 用 Redis lock exec:{approval_id} 防重入(REST + Telegram 同時簽核)
|
# 用 Redis lock exec:{approval_id} 防重入(REST + Telegram 同時簽核)
|
||||||
if approval.status == ApprovalStatus.APPROVED and execution_triggered:
|
if (
|
||||||
|
approval.status == ApprovalStatus.APPROVED
|
||||||
|
and execution_triggered
|
||||||
|
and not is_no_action_approval_action(getattr(approval, "action", None))
|
||||||
|
):
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
from src.core.redis_client import get_redis
|
from src.core.redis_client import get_redis
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from unittest.mock import AsyncMock
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from src.models.approval import RiskLevel
|
from src.models.approval import RiskLevel
|
||||||
|
from src.services.executor import OperationType
|
||||||
from src.services.approval_execution import ApprovalExecutionService
|
from src.services.approval_execution import ApprovalExecutionService
|
||||||
|
|
||||||
|
|
||||||
@@ -128,3 +129,47 @@ async def test_finalize_auto_approved_execution_skips_no_action(monkeypatch):
|
|||||||
assert repo.created == []
|
assert repo.created == []
|
||||||
write_km.assert_not_awaited()
|
write_km.assert_not_awaited()
|
||||||
run_verify.assert_not_awaited()
|
run_verify.assert_not_awaited()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_record_approved_repair_execution_persists_human_approved_trace(monkeypatch):
|
||||||
|
repo = _FakeAutoRepairRepo()
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"src.repositories.audit_log_repository.get_auto_repair_execution_repository",
|
||||||
|
lambda: repo,
|
||||||
|
)
|
||||||
|
|
||||||
|
approval = SimpleNamespace(
|
||||||
|
id="77777777-7777-7777-7777-777777777777",
|
||||||
|
incident_id="INC-20260611-HUMAN",
|
||||||
|
action="kubectl rollout restart deployment/api -n awoooi-prod",
|
||||||
|
requested_by="OpenClaw (ollama_gcp_a)",
|
||||||
|
matched_playbook_id="pb-human-001",
|
||||||
|
risk_level=RiskLevel.MEDIUM,
|
||||||
|
)
|
||||||
|
|
||||||
|
await ApprovalExecutionService()._record_approved_repair_execution(
|
||||||
|
approval=approval,
|
||||||
|
success=True,
|
||||||
|
error_message=None,
|
||||||
|
operation_type=OperationType.RESTART_DEPLOYMENT,
|
||||||
|
resource_name="api",
|
||||||
|
namespace="awoooi-prod",
|
||||||
|
duration_ms=1234,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert repo.created == [
|
||||||
|
{
|
||||||
|
"incident_id": "INC-20260611-HUMAN",
|
||||||
|
"playbook_id": "pb-human-001",
|
||||||
|
"playbook_name": "approval_execute:RESTART_DEPLOYMENT:api",
|
||||||
|
"success": True,
|
||||||
|
"executed_steps": [
|
||||||
|
"kubectl rollout restart deployment/api -n awoooi-prod"
|
||||||
|
],
|
||||||
|
"error_message": None,
|
||||||
|
"triggered_by": "human_approved",
|
||||||
|
"risk_level": "medium",
|
||||||
|
"execution_time_ms": 1234,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|||||||
@@ -742,6 +742,36 @@ async def test_build_inline_keyboard_includes_awooop_deep_link() -> None:
|
|||||||
} in buttons
|
} in buttons
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_build_inline_keyboard_hides_approval_for_no_action() -> None:
|
||||||
|
"""OBSERVE / NO_ACTION 卡片不能提供會誤導成修復執行的批准入口。"""
|
||||||
|
gateway = TelegramGateway()
|
||||||
|
|
||||||
|
keyboard = await gateway._build_inline_keyboard(
|
||||||
|
approval_id="approval-no-repair-1",
|
||||||
|
include_auto_tuning=False,
|
||||||
|
incident_id="INC-20260611-NOOP",
|
||||||
|
suggested_action="NO_ACTION - REPAIR_CANDIDATE_MISSING: LLM 分析失敗",
|
||||||
|
)
|
||||||
|
buttons = [
|
||||||
|
button
|
||||||
|
for row in keyboard["inline_keyboard"]
|
||||||
|
for button in row
|
||||||
|
]
|
||||||
|
button_texts = {button["text"] for button in buttons}
|
||||||
|
|
||||||
|
assert "✅ 批准" not in button_texts
|
||||||
|
assert "❌ 拒絕" not in button_texts
|
||||||
|
assert "🔕 靜默" in button_texts
|
||||||
|
assert {
|
||||||
|
"text": "🧭 Runs",
|
||||||
|
"url": (
|
||||||
|
"https://awoooi.wooo.work/zh-TW/awooop/runs"
|
||||||
|
"?project_id=awoooi&incident_id=INC-20260611-NOOP"
|
||||||
|
),
|
||||||
|
} in buttons
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_send_request_strips_awooop_callback_metadata_before_telegram_api(monkeypatch):
|
async def test_send_request_strips_awooop_callback_metadata_before_telegram_api(monkeypatch):
|
||||||
"""AwoooP truth-chain metadata must be mirrored, not sent to Telegram Bot API."""
|
"""AwoooP truth-chain metadata must be mirrored, not sent to Telegram Bot API."""
|
||||||
|
|||||||
@@ -117,6 +117,56 @@ async def test_telegram_approval_schedules_executor_after_required_signature(mon
|
|||||||
assert op_log_repo.rows[0]["kwargs"]["action_detail"] == "approve"
|
assert op_log_repo.rows[0]["kwargs"]["action_detail"] == "approve"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_telegram_approval_suppresses_executor_for_no_action(monkeypatch):
|
||||||
|
approval_id = "55555555-5555-5555-5555-555555555555"
|
||||||
|
approval = SimpleNamespace(
|
||||||
|
id=UUID(approval_id),
|
||||||
|
status=SimpleNamespace(value="approved"),
|
||||||
|
incident_id="INC-20260611-NOEXEC",
|
||||||
|
action="NO_ACTION - REPAIR_CANDIDATE_MISSING: LLM 分析失敗",
|
||||||
|
)
|
||||||
|
finalizer_calls: list[dict] = []
|
||||||
|
op_log_repo = _FakeAlertOperationLogRepository()
|
||||||
|
|
||||||
|
async def fake_finalize(*, approval, execution_triggered: bool) -> bool:
|
||||||
|
finalizer_calls.append({
|
||||||
|
"approval_id": str(approval.id),
|
||||||
|
"execution_triggered": execution_triggered,
|
||||||
|
})
|
||||||
|
return False
|
||||||
|
|
||||||
|
fake_gateway = _FakeGateway({
|
||||||
|
"success": True,
|
||||||
|
"action": "approve",
|
||||||
|
"approval_id": approval_id,
|
||||||
|
"user": {"id": 42, "username": "ops"},
|
||||||
|
})
|
||||||
|
monkeypatch.setattr(telegram_api, "get_telegram_gateway", lambda: fake_gateway)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
telegram_api,
|
||||||
|
"get_approval_service",
|
||||||
|
lambda: _FakeApprovalService(approval, execution_triggered=True),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(telegram_api, "_finalize_telegram_approval", fake_finalize)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"src.repositories.alert_operation_log_repository.get_alert_operation_log_repository",
|
||||||
|
lambda: op_log_repo,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await telegram_api.telegram_webhook(_callback_update(f"approve:{approval_id}:ts:nonce"))
|
||||||
|
|
||||||
|
assert result["ok"] is True
|
||||||
|
assert result["message"] == "ApprovedWithoutExecution"
|
||||||
|
assert result["execution_triggered"] is True
|
||||||
|
assert result["execution_scheduled"] is False
|
||||||
|
assert result["execution_suppressed"] is True
|
||||||
|
assert finalizer_calls == [{
|
||||||
|
"approval_id": approval_id,
|
||||||
|
"execution_triggered": True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_telegram_approval_duplicate_does_not_schedule_executor(monkeypatch):
|
async def test_telegram_approval_duplicate_does_not_schedule_executor(monkeypatch):
|
||||||
approval_id = "33333333-3333-3333-3333-333333333333"
|
approval_id = "33333333-3333-3333-3333-333333333333"
|
||||||
@@ -242,3 +292,33 @@ async def test_finalize_telegram_approval_runs_executor_task(monkeypatch):
|
|||||||
assert scheduled is True
|
assert scheduled is True
|
||||||
await telegram_api.asyncio.sleep(0)
|
await telegram_api.asyncio.sleep(0)
|
||||||
assert executed == ["33333333-3333-3333-3333-333333333333"]
|
assert executed == ["33333333-3333-3333-3333-333333333333"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_finalize_telegram_approval_does_not_schedule_no_action(monkeypatch):
|
||||||
|
executed: list[str] = []
|
||||||
|
approval = SimpleNamespace(
|
||||||
|
id=UUID("66666666-6666-6666-6666-666666666666"),
|
||||||
|
incident_id="INC-20260611-NOOP",
|
||||||
|
action="OBSERVE",
|
||||||
|
)
|
||||||
|
|
||||||
|
class _FakeExecutionService:
|
||||||
|
async def execute_approved_action(self, received_approval):
|
||||||
|
executed.append(str(received_approval.id))
|
||||||
|
return True
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
telegram_api,
|
||||||
|
"get_execution_service",
|
||||||
|
lambda: _FakeExecutionService(),
|
||||||
|
)
|
||||||
|
|
||||||
|
scheduled = await telegram_api._finalize_telegram_approval(
|
||||||
|
approval=approval,
|
||||||
|
execution_triggered=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert scheduled is False
|
||||||
|
await telegram_api.asyncio.sleep(0)
|
||||||
|
assert executed == []
|
||||||
|
|||||||
Reference in New Issue
Block a user