From 4cbd775c1b11396a36463d341ebe6daa99efe484 Mon Sep 17 00:00:00 2001 From: OoO Date: Wed, 29 Apr 2026 21:48:41 +0800 Subject: [PATCH] =?UTF-8?q?fix(aiops):=20=E8=A3=9C=E9=BD=8A=20Elephant=20A?= =?UTF-8?q?lpha=20trigger=20context?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 部署閘門修復:補 autonomous engine 的 confidence/max decision 初始化與 _build_trigger_context,避免 scheduler autonomous monitoring 持續噴 AttributeError。 --- services/elephant_alpha_autonomous_engine.py | 46 ++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/services/elephant_alpha_autonomous_engine.py b/services/elephant_alpha_autonomous_engine.py index 4749528..46999d6 100644 --- a/services/elephant_alpha_autonomous_engine.py +++ b/services/elephant_alpha_autonomous_engine.py @@ -43,6 +43,8 @@ SSH_COMMAND_TIMEOUT = int(os.getenv("ELEPHANT_ALPHA_SSH_COMMAND_TIMEOUT", "60")) CACHE_DB_PATH = os.getenv("ELEPHANT_ALPHA_CACHE_DB", ":memory:") ESCALATION_COOLDOWN_MIN = int(os.getenv("ELEPHANT_ALPHA_ESCALATION_COOLDOWN_MIN", "30")) +CONFIDENCE_THRESHOLD = float(os.getenv("ELEPHANT_ALPHA_CONFIDENCE_THRESHOLD", "0.7")) +MAX_AUTONOMOUS_DECISIONS_PER_HOUR = int(os.getenv("ELEPHANT_ALPHA_MAX_AUTONOMOUS_DECISIONS_PER_HOUR", "10")) # ---- Constants ---- _ALLOWED_ACTION_TYPES = frozenset({ @@ -181,6 +183,8 @@ class ElephantAlphaAutonomousEngine: self._init_ssh_key_once() self.decision_history: List[DecisionOutcome] = [] self.triggers: List[AutonomousTrigger] = [] + self.confidence_threshold = CONFIDENCE_THRESHOLD + self.max_autonomous_decisions_per_hour = MAX_AUTONOMOUS_DECISIONS_PER_HOUR self._initialize_triggers() self._circuit_breaker_state = {"failures": 0, "last_failure": None} self._cb_threshold = 5 @@ -469,6 +473,48 @@ class ElephantAlphaAutonomousEngine: session.close() # ---- Decision execution ---- + async def _build_trigger_context(self, trigger: AutonomousTrigger) -> Dict[str, Any]: + """Build the business context passed to Elephant Alpha.""" + context = { + "trigger_type": trigger.trigger_type, + "trigger_label": _zh_trigger(trigger.trigger_type), + "conditions": trigger.conditions, + "threshold": trigger.threshold, + "enabled": trigger.enabled, + "last_triggered": trigger.last_triggered.isoformat() if trigger.last_triggered else None, + "generated_at": datetime.now().isoformat(), + "system_state": { + "action_queue_size": self._safe_metric(self._get_action_queue_size, default=0), + "system_load_pct": self._safe_metric(self._get_system_load_percentage, default=0.0), + "circuit_breaker_failures": self._circuit_breaker_state.get("failures", 0), + "confidence_threshold": self.confidence_threshold, + "max_autonomous_decisions_per_hour": self.max_autonomous_decisions_per_hour, + }, + "objectives": [ + "維持 MOMO 商品監控與業績分析服務可用性", + "降低重複告警與人工排查成本", + "在低風險範圍內產生可稽核的自動化行動建議", + ], + "constraints": [ + "不得使用 docker compose down 或 --remove-orphans", + "不得操作 momo-db 容器生命週期", + "P1/P2 行動需保留 audit trail 與 Telegram 稽核通知", + ], + } + + if trigger.temp_error_msg: + context["error_context"] = trigger.temp_error_msg[:4000] + if trigger.temp_target_file: + context["target_file"] = trigger.temp_target_file + return context + + @staticmethod + def _safe_metric(fn, default): + try: + return fn() + except Exception: + return default + async def _execute_autonomous_decision(self, trigger: AutonomousTrigger) -> None: context = await self._build_trigger_context(trigger) try: