chore(governance,watchdog): enrich alerts and enable prometheus multiproc
Some checks failed
CD Pipeline / tests (push) Failing after 1m22s
CD Pipeline / build-and-deploy (push) Has been skipped
CD Pipeline / post-deploy-checks (push) Has been skipped
Code Review / ai-code-review (push) Successful in 43s
Deploy Alert Rules / Deploy Prometheus Alert Rules (push) Successful in 57s

This commit is contained in:
Your Name
2026-05-02 23:44:03 +08:00
parent b371edb70c
commit dedb12085b
7 changed files with 191 additions and 56 deletions

View File

@@ -1093,9 +1093,10 @@ async def receive_alert(
"is_rule_based": False,
"playbook_id": None,
}
_cmd_cs1 = (analysis_result.kubectl_command or "").strip()
approval_create = ApprovalRequestCreate(
action=f"{analysis_result.action_title} | {analysis_result.kubectl_command}",
description=f"[AI: {ai_provider}] {analysis_result.description}",
action=(_cmd_cs1 or f"{analysis_result.action_title} | NO_ACTION"),
description=f"[AI: {ai_provider}] {analysis_result.action_title} | {analysis_result.description}",
risk_level=risk_level,
blast_radius=BlastRadius(
affected_pods=blast.affected_pods,
@@ -1179,6 +1180,13 @@ async def receive_alert(
status=ApprovalStatus.APPROVED,
risk_level=risk_level.value,
matched_playbook_id=None,
metadata={
**_approval_metadata_cs1,
"is_high_confidence": True,
"policy_reason": _shadow_result.reason.value
if hasattr(_shadow_result, "reason")
else "cs1_auto_confident_execution",
},
)
_cs1_auto_approval.id = approval.id
@@ -1194,13 +1202,19 @@ async def receive_alert(
error=str(_cs1_upd_err),
)
logger.info(
"llm_high_confidence_auto_executed",
approval_id=str(approval.id),
confidence=analysis_result.confidence,
exec_success=_cs1_exec_success,
action=_cs1_kubectl[:80],
)
logger.info(
"llm_high_confidence_auto_executed",
approval_id=str(approval.id),
confidence=analysis_result.confidence,
exec_success=_cs1_exec_success,
action=_cs1_kubectl[:80],
is_high_confidence=True,
policy_reason=(
_shadow_result.reason.value
if hasattr(_shadow_result, "reason")
else "cs1_auto_confident_execution"
),
)
except Exception as _cs1_auto_err:
logger.warning(
"llm_high_confidence_auto_execute_failed",
@@ -1419,7 +1433,7 @@ async def _process_new_alert_background(
rule_kubectl = str(rule_response.get("kubectl_command", "")).strip()
rule_description = str(rule_response.get("description", message))
rule_action = (
f"{rule_action_title} | {rule_kubectl}"
rule_kubectl
if rule_kubectl else
f"NO_ACTION - {rule_description[:120]}"
)
@@ -1656,9 +1670,10 @@ async def _process_new_alert_background(
"is_rule_based": False,
"playbook_id": None,
}
_cmd_cs3 = (analysis_result.kubectl_command or "").strip()
approval_create = ApprovalRequestCreate(
action=f"{analysis_result.action_title} | {analysis_result.kubectl_command}",
description=f"[AI: {ai_provider}] {analysis_result.description}",
action=(_cmd_cs3 or f"{analysis_result.action_title} | NO_ACTION"),
description=f"[AI: {ai_provider}] {analysis_result.action_title} | {analysis_result.description}",
risk_level=risk_level,
blast_radius=BlastRadius(
affected_pods=blast.affected_pods if blast else 1,
@@ -1719,6 +1734,13 @@ async def _process_new_alert_background(
status=ApprovalStatus.APPROVED,
risk_level=risk_level.value,
matched_playbook_id=None,
metadata={
**_approval_metadata_cs3,
"is_high_confidence": True,
"policy_reason": _shadow_result_cs3.reason.value
if hasattr(_shadow_result_cs3, "reason")
else "cs3_auto_confident_execution",
},
)
_cs3_executor = ApprovalExecutionService()
_cs3_exec_success = await _cs3_executor.execute_approved_action(_cs3_auto_approval)
@@ -1729,6 +1751,12 @@ async def _process_new_alert_background(
confidence=analysis_result.confidence,
success=_cs3_exec_success,
provider=ai_provider,
is_high_confidence=True,
policy_reason=(
_shadow_result_cs3.reason.value
if hasattr(_shadow_result_cs3, "reason")
else "cs3_auto_confident_execution"
),
)
except Exception as _cs3_exec_err:
logger.warning("cs3_llm_auto_execute_failed", error=str(_cs3_exec_err))