fix(aiops): 補齊 Elephant Alpha trigger context
All checks were successful
CD Pipeline / deploy (push) Successful in 1m39s

部署閘門修復:補 autonomous engine 的 confidence/max decision 初始化與 _build_trigger_context,避免 scheduler autonomous monitoring 持續噴 AttributeError。
This commit is contained in:
OoO
2026-04-29 21:48:41 +08:00
parent 4d5a995718
commit 4cbd775c1b

View File

@@ -43,6 +43,8 @@ SSH_COMMAND_TIMEOUT = int(os.getenv("ELEPHANT_ALPHA_SSH_COMMAND_TIMEOUT", "60"))
CACHE_DB_PATH = os.getenv("ELEPHANT_ALPHA_CACHE_DB", ":memory:")
ESCALATION_COOLDOWN_MIN = int(os.getenv("ELEPHANT_ALPHA_ESCALATION_COOLDOWN_MIN", "30"))
CONFIDENCE_THRESHOLD = float(os.getenv("ELEPHANT_ALPHA_CONFIDENCE_THRESHOLD", "0.7"))
MAX_AUTONOMOUS_DECISIONS_PER_HOUR = int(os.getenv("ELEPHANT_ALPHA_MAX_AUTONOMOUS_DECISIONS_PER_HOUR", "10"))
# ---- Constants ----
_ALLOWED_ACTION_TYPES = frozenset({
@@ -181,6 +183,8 @@ class ElephantAlphaAutonomousEngine:
self._init_ssh_key_once()
self.decision_history: List[DecisionOutcome] = []
self.triggers: List[AutonomousTrigger] = []
self.confidence_threshold = CONFIDENCE_THRESHOLD
self.max_autonomous_decisions_per_hour = MAX_AUTONOMOUS_DECISIONS_PER_HOUR
self._initialize_triggers()
self._circuit_breaker_state = {"failures": 0, "last_failure": None}
self._cb_threshold = 5
@@ -469,6 +473,48 @@ class ElephantAlphaAutonomousEngine:
session.close()
# ---- Decision execution ----
async def _build_trigger_context(self, trigger: AutonomousTrigger) -> Dict[str, Any]:
"""Build the business context passed to Elephant Alpha."""
context = {
"trigger_type": trigger.trigger_type,
"trigger_label": _zh_trigger(trigger.trigger_type),
"conditions": trigger.conditions,
"threshold": trigger.threshold,
"enabled": trigger.enabled,
"last_triggered": trigger.last_triggered.isoformat() if trigger.last_triggered else None,
"generated_at": datetime.now().isoformat(),
"system_state": {
"action_queue_size": self._safe_metric(self._get_action_queue_size, default=0),
"system_load_pct": self._safe_metric(self._get_system_load_percentage, default=0.0),
"circuit_breaker_failures": self._circuit_breaker_state.get("failures", 0),
"confidence_threshold": self.confidence_threshold,
"max_autonomous_decisions_per_hour": self.max_autonomous_decisions_per_hour,
},
"objectives": [
"維持 MOMO 商品監控與業績分析服務可用性",
"降低重複告警與人工排查成本",
"在低風險範圍內產生可稽核的自動化行動建議",
],
"constraints": [
"不得使用 docker compose down 或 --remove-orphans",
"不得操作 momo-db 容器生命週期",
"P1/P2 行動需保留 audit trail 與 Telegram 稽核通知",
],
}
if trigger.temp_error_msg:
context["error_context"] = trigger.temp_error_msg[:4000]
if trigger.temp_target_file:
context["target_file"] = trigger.temp_target_file
return context
@staticmethod
def _safe_metric(fn, default):
try:
return fn()
except Exception:
return default
async def _execute_autonomous_decision(self, trigger: AutonomousTrigger) -> None:
context = await self._build_trigger_context(trigger)
try: