fix(aiops): 補齊 Elephant Alpha trigger context
All checks were successful
CD Pipeline / deploy (push) Successful in 1m39s
All checks were successful
CD Pipeline / deploy (push) Successful in 1m39s
部署閘門修復:補 autonomous engine 的 confidence/max decision 初始化與 _build_trigger_context,避免 scheduler autonomous monitoring 持續噴 AttributeError。
This commit is contained in:
@@ -43,6 +43,8 @@ SSH_COMMAND_TIMEOUT = int(os.getenv("ELEPHANT_ALPHA_SSH_COMMAND_TIMEOUT", "60"))
|
||||
|
||||
CACHE_DB_PATH = os.getenv("ELEPHANT_ALPHA_CACHE_DB", ":memory:")
|
||||
ESCALATION_COOLDOWN_MIN = int(os.getenv("ELEPHANT_ALPHA_ESCALATION_COOLDOWN_MIN", "30"))
|
||||
CONFIDENCE_THRESHOLD = float(os.getenv("ELEPHANT_ALPHA_CONFIDENCE_THRESHOLD", "0.7"))
|
||||
MAX_AUTONOMOUS_DECISIONS_PER_HOUR = int(os.getenv("ELEPHANT_ALPHA_MAX_AUTONOMOUS_DECISIONS_PER_HOUR", "10"))
|
||||
|
||||
# ---- Constants ----
|
||||
_ALLOWED_ACTION_TYPES = frozenset({
|
||||
@@ -181,6 +183,8 @@ class ElephantAlphaAutonomousEngine:
|
||||
self._init_ssh_key_once()
|
||||
self.decision_history: List[DecisionOutcome] = []
|
||||
self.triggers: List[AutonomousTrigger] = []
|
||||
self.confidence_threshold = CONFIDENCE_THRESHOLD
|
||||
self.max_autonomous_decisions_per_hour = MAX_AUTONOMOUS_DECISIONS_PER_HOUR
|
||||
self._initialize_triggers()
|
||||
self._circuit_breaker_state = {"failures": 0, "last_failure": None}
|
||||
self._cb_threshold = 5
|
||||
@@ -469,6 +473,48 @@ class ElephantAlphaAutonomousEngine:
|
||||
session.close()
|
||||
|
||||
# ---- Decision execution ----
|
||||
async def _build_trigger_context(self, trigger: AutonomousTrigger) -> Dict[str, Any]:
|
||||
"""Build the business context passed to Elephant Alpha."""
|
||||
context = {
|
||||
"trigger_type": trigger.trigger_type,
|
||||
"trigger_label": _zh_trigger(trigger.trigger_type),
|
||||
"conditions": trigger.conditions,
|
||||
"threshold": trigger.threshold,
|
||||
"enabled": trigger.enabled,
|
||||
"last_triggered": trigger.last_triggered.isoformat() if trigger.last_triggered else None,
|
||||
"generated_at": datetime.now().isoformat(),
|
||||
"system_state": {
|
||||
"action_queue_size": self._safe_metric(self._get_action_queue_size, default=0),
|
||||
"system_load_pct": self._safe_metric(self._get_system_load_percentage, default=0.0),
|
||||
"circuit_breaker_failures": self._circuit_breaker_state.get("failures", 0),
|
||||
"confidence_threshold": self.confidence_threshold,
|
||||
"max_autonomous_decisions_per_hour": self.max_autonomous_decisions_per_hour,
|
||||
},
|
||||
"objectives": [
|
||||
"維持 MOMO 商品監控與業績分析服務可用性",
|
||||
"降低重複告警與人工排查成本",
|
||||
"在低風險範圍內產生可稽核的自動化行動建議",
|
||||
],
|
||||
"constraints": [
|
||||
"不得使用 docker compose down 或 --remove-orphans",
|
||||
"不得操作 momo-db 容器生命週期",
|
||||
"P1/P2 行動需保留 audit trail 與 Telegram 稽核通知",
|
||||
],
|
||||
}
|
||||
|
||||
if trigger.temp_error_msg:
|
||||
context["error_context"] = trigger.temp_error_msg[:4000]
|
||||
if trigger.temp_target_file:
|
||||
context["target_file"] = trigger.temp_target_file
|
||||
return context
|
||||
|
||||
@staticmethod
|
||||
def _safe_metric(fn, default):
|
||||
try:
|
||||
return fn()
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
async def _execute_autonomous_decision(self, trigger: AutonomousTrigger) -> None:
|
||||
context = await self._build_trigger_context(trigger)
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user