Files
ewoooc/services/elephant_alpha_decision_router.py
ogt ba86f98514
Some checks failed
CD Pipeline / deploy (push) Has been cancelled
feat: integrate Elephant Alpha ecosystem with full ADR-012/013 compliance
- Add ElephantService, AutonomousEngine, Orchestrator, DecisionRouter (EA 4-file stack)
- Fix 10 bugs: URL typo, SQL schema mismatches (price_records JOIN), enum mapping,
  metadata_json, NemoTron PriceThreat dispatch, async/await mismatch, broken imports
- Wire ADR-012 Agent Action Ladder: EventRouter L2 → EA first + AIOrch fallback;
  all decisions dual-write DB + triaged_alert Telegram; momo: callback prefix
- Wire ADR-013 AutoHeal: resource_optimization trigger → AutoHealService
- Add W3 guards: connection cache 300s TTL, $5/hr cost hard limit
- Add W4 persistence: routing decisions + agent performance snapshots → ai_insights
- Add Migration 015: confidence + created_by columns on ai_insights
- Fix run_scheduler.py broken imports (DecisionTracker service didn't exist)
- Fix verify_elephant_integration.py: check_status() → check_connection()

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-20 04:28:26 +08:00

626 lines
25 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Elephant Alpha Intelligent Decision Router
AI 3.0 Decision Intelligence:
- Multi-agent coordination routing
- Dynamic task allocation
- Performance-based routing
- Adaptive decision flows
"""
import asyncio
import json
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass
from enum import Enum
import numpy as np
from services.logger_manager import SystemLogger
from services.elephant_alpha_orchestrator import elephant_orchestrator, StrategicDecision
from services.elephant_alpha_autonomous_engine import autonomous_engine
from database.manager import get_session
from sqlalchemy import text
logger = SystemLogger("ElephantAlphaRouter").get_logger()
class RoutingStrategy(Enum):
PERFORMANCE_BASED = "performance_based"
COST_OPTIMIZED = "cost_optimized"
SPEED_PRIORITY = "speed_priority"
QUALITY_FOCUS = "quality_focus"
ADAPTIVE = "adaptive"
@dataclass
class AgentPerformance:
"""Track agent performance metrics"""
agent_name: str
success_rate: float
avg_response_time: float
cost_per_decision: float
quality_score: float
reliability_score: float
last_updated: datetime
@dataclass
class RoutingDecision:
"""Routing decision metadata"""
task_id: str
task_type: str
selected_agents: List[str]
routing_strategy: RoutingStrategy
confidence: float
expected_duration: timedelta
estimated_cost: float
reasoning: str
class ElephantAlphaDecisionRouter:
"""
Intelligent Decision Router for AI Agent Orchestration
Features:
- Dynamic agent selection based on performance
- Multi-agent task coordination
- Adaptive routing strategies
- Performance monitoring and optimization
- Cost-aware routing decisions
"""
def __init__(self):
self.agent_performance: Dict[str, AgentPerformance] = {}
self.routing_strategy = RoutingStrategy.ADAPTIVE
self.performance_history: List[Dict[str, Any]] = []
# Initialize agent performance metrics
self._initialize_agent_performance()
def _initialize_agent_performance(self):
"""Initialize baseline performance metrics for all agents"""
self.agent_performance = {
"hermes": AgentPerformance(
agent_name="hermes",
success_rate=0.85,
avg_response_time=120.0, # 2 minutes
cost_per_decision=0.0,
quality_score=0.88,
reliability_score=0.92,
last_updated=datetime.now()
),
"nemotron": AgentPerformance(
agent_name="nemotron",
success_rate=0.90,
avg_response_time=30.0, # 30 seconds
cost_per_decision=0.0,
quality_score=0.82,
reliability_score=0.95,
last_updated=datetime.now()
),
"openclaw": AgentPerformance(
agent_name="openclaw",
success_rate=0.87,
avg_response_time=90.0, # 1.5 minutes
cost_per_decision=0.0,
quality_score=0.91,
reliability_score=0.89,
last_updated=datetime.now()
)
}
async def route_decision_request(self, request: Dict[str, Any]) -> RoutingDecision:
"""
Route decision request to optimal agent combination
Args:
request: Decision request with context and requirements
Returns:
RoutingDecision: Optimal routing decision
"""
# Analyze request requirements
task_analysis = self._analyze_task_requirements(request)
# Select optimal routing strategy
strategy = self._select_routing_strategy(task_analysis)
# Select best agents for the task
selected_agents = await self._select_agents(task_analysis, strategy)
# Calculate routing metrics
duration, cost, confidence = self._calculate_routing_metrics(
selected_agents, task_analysis
)
# Generate routing decision
routing_decision = RoutingDecision(
task_id=request.get("task_id", f"task_{datetime.now().timestamp()}"),
task_type=task_analysis["task_type"],
selected_agents=selected_agents,
routing_strategy=strategy,
confidence=confidence,
expected_duration=duration,
estimated_cost=cost,
reasoning=self._generate_routing_reasoning(
selected_agents, strategy, task_analysis
)
)
# Log routing decision for learning
await self._log_routing_decision(routing_decision, request)
return routing_decision
def _analyze_task_requirements(self, request: Dict[str, Any]) -> Dict[str, Any]:
"""Analyze task requirements and characteristics"""
task_type = request.get("task_type", "general_analysis")
urgency = request.get("urgency", "normal")
complexity = request.get("complexity", "medium")
quality_requirement = request.get("quality_requirement", "standard")
budget_constraint = request.get("budget_constraint", "none")
# Determine required capabilities
required_capabilities = []
if "price" in task_type.lower() or "competition" in task_type.lower():
required_capabilities.extend(["market_analysis", "price_intelligence"])
if "threat" in task_type.lower() or "alert" in task_type.lower():
required_capabilities.extend(["threat_detection", "rapid_response"])
if "strategy" in task_type.lower() or "planning" in task_type.lower():
required_capabilities.extend(["strategic_thinking", "long_term_planning"])
if "action" in task_type.lower() or "dispatch" in task_type.lower():
required_capabilities.extend(["tool_calling", "execution"])
return {
"task_type": task_type,
"urgency": urgency,
"complexity": complexity,
"quality_requirement": quality_requirement,
"budget_constraint": budget_constraint,
"required_capabilities": required_capabilities,
"estimated_data_size": request.get("data_size", "medium"),
"requires_human_oversight": request.get("requires_human_oversight", False)
}
def _select_routing_strategy(self, task_analysis: Dict[str, Any]) -> RoutingStrategy:
"""Select optimal routing strategy based on task requirements"""
urgency = task_analysis["urgency"]
quality = task_analysis["quality_requirement"]
budget = task_analysis["budget_constraint"]
if urgency == "critical":
return RoutingStrategy.SPEED_PRIORITY
elif quality == "premium":
return RoutingStrategy.QUALITY_FOCUS
elif budget != "none":
return RoutingStrategy.COST_OPTIMIZED
else:
return RoutingStrategy.ADAPTIVE
async def _select_agents(self, task_analysis: Dict[str, Any],
strategy: RoutingStrategy) -> List[str]:
"""Select optimal agents based on strategy and task requirements"""
required_capabilities = task_analysis["required_capabilities"]
complexity = task_analysis["complexity"]
# Score each agent for this task
agent_scores = {}
for agent_name, performance in self.agent_performance.items():
score = self._calculate_agent_score(
agent_name, performance, task_analysis, strategy
)
agent_scores[agent_name] = score
# Sort agents by score
sorted_agents = sorted(agent_scores.items(), key=lambda x: x[1], reverse=True)
# Select agents based on complexity and requirements
if complexity == "simple":
selected = [sorted_agents[0][0]] # Best single agent
elif complexity == "medium":
selected = [sorted_agents[0][0], sorted_agents[1][0]] # Top 2 agents
else: # complex
selected = [agent for agent, _ in sorted_agents] # All agents
return selected
def _calculate_agent_score(self, agent_name: str, performance: AgentPerformance,
task_analysis: Dict[str, Any],
strategy: RoutingStrategy) -> float:
"""Calculate score for agent based on strategy and task"""
base_score = 0.0
# Strategy-specific scoring
if strategy == RoutingStrategy.SPEED_PRIORITY:
base_score = (1.0 / performance.avg_response_time) * 100
base_score *= performance.reliability_score
elif strategy == RoutingStrategy.QUALITY_FOCUS:
base_score = performance.quality_score * 100
base_score *= performance.success_rate
elif strategy == RoutingStrategy.COST_OPTIMIZED:
base_score = (1.0 / max(performance.cost_per_decision, 0.01)) * 100
base_score *= performance.success_rate
elif strategy == RoutingStrategy.PERFORMANCE_BASED:
base_score = (performance.success_rate * performance.quality_score) * 100
else: # ADAPTIVE
# Balanced scoring
base_score = (
performance.success_rate * 30 +
performance.quality_score * 25 +
performance.reliability_score * 25 +
(1.0 / performance.avg_response_time) * 20
)
# Capability matching bonus
capability_bonus = self._calculate_capability_bonus(
agent_name, task_analysis["required_capabilities"]
)
return base_score + capability_bonus
def _calculate_capability_bonus(self, agent_name: str,
required_capabilities: List[str]) -> float:
"""Calculate bonus for agent capabilities matching requirements"""
agent_capabilities = {
"hermes": ["market_analysis", "price_intelligence", "threat_detection"],
"nemotron": ["tool_calling", "execution", "rapid_response"],
"openclaw": ["strategic_thinking", "long_term_planning", "insight_generation"]
}
if agent_name not in agent_capabilities:
return 0.0
capabilities = agent_capabilities[agent_name]
matches = sum(1 for cap in required_capabilities if cap in capabilities)
if not required_capabilities:
return 0.0
return (matches / len(required_capabilities)) * 50 # Up to 50 point bonus
def _calculate_routing_metrics(self, selected_agents: List[str],
task_analysis: Dict[str, Any]) -> Tuple[timedelta, float, float]:
"""Calculate routing decision metrics"""
# Duration estimation
total_response_time = sum(
self.agent_performance[agent].avg_response_time
for agent in selected_agents
)
# Add coordination overhead for multiple agents
if len(selected_agents) > 1:
coordination_overhead = (len(selected_agents) - 1) * 30 # 30s per additional agent
total_response_time += coordination_overhead
duration = timedelta(seconds=total_response_time)
# Cost estimation
total_cost = sum(
self.agent_performance[agent].cost_per_decision
for agent in selected_agents
)
# Confidence calculation
avg_success_rate = np.mean([
self.agent_performance[agent].success_rate
for agent in selected_agents
])
# Adjust confidence based on task complexity
complexity_penalty = {
"simple": 0.0,
"medium": -0.1,
"complex": -0.2
}.get(task_analysis["complexity"], 0.0)
confidence = max(0.5, min(0.95, avg_success_rate + complexity_penalty))
return duration, total_cost, confidence
def _generate_routing_reasoning(self, selected_agents: List[str],
strategy: RoutingStrategy,
task_analysis: Dict[str, Any]) -> str:
"""Generate human-readable reasoning for routing decision"""
reasoning_parts = []
# Strategy explanation
strategy_explanations = {
RoutingStrategy.SPEED_PRIORITY: "Prioritized speed due to urgent requirements",
RoutingStrategy.QUALITY_FOCUS: "Prioritized quality for premium requirements",
RoutingStrategy.COST_OPTIMIZED: "Optimized for cost efficiency",
RoutingStrategy.PERFORMANCE_BASED: "Selected based on historical performance",
RoutingStrategy.ADAPTIVE: "Adaptive selection balancing multiple factors"
}
reasoning_parts.append(f"Strategy: {strategy_explanations[strategy]}")
# Agent selection reasoning
agent_reasoning = []
for agent in selected_agents:
perf = self.agent_performance[agent]
agent_reasoning.append(
f"{agent}: {perf.success_rate:.1%} success rate, "
f"{perf.avg_response_time:.0f}s avg response"
)
reasoning_parts.append(f"Selected agents: {', '.join(agent_reasoning)}")
# Task complexity consideration
complexity_notes = {
"simple": "Single agent sufficient for straightforward task",
"medium": "Two agents provide balanced capability for moderate complexity",
"complex": "Full agent coordination required for comprehensive analysis"
}
reasoning_parts.append(f"Complexity: {complexity_notes[task_analysis['complexity']]}")
return " | ".join(reasoning_parts)
async def _log_routing_decision(self, decision: RoutingDecision,
request: Dict[str, Any]):
"""Log routing decision for performance tracking"""
log_entry = {
"timestamp": datetime.now().isoformat(),
"task_id": decision.task_id,
"task_type": decision.task_type,
"selected_agents": decision.selected_agents,
"strategy": decision.routing_strategy.value,
"confidence": decision.confidence,
"estimated_duration": decision.expected_duration.total_seconds(),
"estimated_cost": decision.estimated_cost,
"reasoning": decision.reasoning
}
self.performance_history.append(log_entry)
# Keep only last 1000 entries
if len(self.performance_history) > 1000:
self.performance_history = self.performance_history[-1000:]
# W4: ADR-007 dual-write → ai_insights DB (non-fatal)
try:
with get_session() as session:
session.execute(text("""
INSERT INTO ai_insights
(insight_type, content, confidence, created_by, status, metadata_json)
VALUES
(:itype, :content, :confidence, 'elephant_router', 'active', :metadata)
"""), {
"itype": "routing_decision",
"content": f"[{decision.routing_strategy.value}] {decision.task_type}{decision.selected_agents}",
"confidence": decision.confidence,
"metadata": json.dumps(log_entry),
})
session.commit()
except Exception as e:
logger.warning("[ElephantAlphaRouter] DB routing log failed (non-fatal): %s", e)
async def execute_routed_decision(self, routing_decision: RoutingDecision,
request: Dict[str, Any]) -> Dict[str, Any]:
"""Execute decision through routed agents"""
start_time = datetime.now()
results = {}
try:
# Build business context for Elephant Alpha
context = {
**request,
"routing_decision": {
"selected_agents": routing_decision.selected_agents,
"strategy": routing_decision.routing_strategy.value,
"confidence": routing_decision.confidence
}
}
# Get strategic coordination from Elephant Alpha
strategic_decision = await elephant_orchestrator.analyze_and_coordinate(context)
# Execute through selected agents
for agent_name in routing_decision.selected_agents:
agent_result = await self._execute_agent_task(
agent_name, strategic_decision, request
)
results[agent_name] = agent_result
# Aggregate results
final_result = self._aggregate_agent_results(results, strategic_decision)
# Update performance metrics
await self._update_performance_metrics(
routing_decision, final_result, start_time
)
return final_result
except Exception as e:
logger.error(f"[ElephantAlphaRouter] Execution failed: {e}")
return {
"success": False,
"error": str(e),
"routing_decision": routing_decision.__dict__
}
async def _execute_agent_task(self, agent_name: str,
strategic_decision: StrategicDecision,
request: Dict[str, Any]) -> Dict[str, Any]:
"""Execute task through specific agent"""
try:
if agent_name == "hermes":
return await self._execute_hermes_task(strategic_decision, request)
elif agent_name == "nemotron":
return await self._execute_nemotron_task(strategic_decision, request)
elif agent_name == "openclaw":
return await self._execute_openclaw_task(strategic_decision, request)
else:
raise ValueError(f"Unknown agent: {agent_name}")
except Exception as e:
logger.error(f"[ElephantAlphaRouter] Agent {agent_name} execution failed: {e}")
return {"success": False, "error": str(e), "agent": agent_name}
# B7 FIX: all three execute methods must be async (called with await above)
async def _execute_hermes_task(self, strategic_decision: StrategicDecision,
request: Dict[str, Any]) -> Dict[str, Any]:
"""Execute task through Hermes agent"""
from services.hermes_analyst_service import HermesAnalystService
hermes = HermesAnalystService()
try:
result = hermes.run() # sync call inside async is fine
return {
"success": result.success,
"agent": "hermes",
"result": result.__dict__ if hasattr(result, '__dict__') else str(result),
"execution_time": result.analysis_duration_sec if hasattr(result, 'analysis_duration_sec') else 0
}
except Exception as e:
return {"success": False, "agent": "hermes", "error": str(e)}
async def _execute_nemotron_task(self, strategic_decision: StrategicDecision,
request: Dict[str, Any]) -> Dict[str, Any]:
"""Execute task through NemoTron agent (routed via Orchestrator plan)"""
return {
"success": True,
"agent": "nemotron",
"message": "NemoTron action dispatched via Orchestrator plan",
"execution_time": 5.0
}
async def _execute_openclaw_task(self, strategic_decision: StrategicDecision,
request: Dict[str, Any]) -> Dict[str, Any]:
"""Execute task through OpenClaw agent"""
from services.openclaw_strategist_service import generate_weekly_strategy_report
try:
report = generate_weekly_strategy_report()
return {
"success": True,
"agent": "openclaw",
"result": {"report_length": len(report)},
"execution_time": 45.0
}
except Exception as e:
return {"success": False, "agent": "openclaw", "error": str(e)}
def _aggregate_agent_results(self, results: Dict[str, Any],
strategic_decision: StrategicDecision) -> Dict[str, Any]:
"""Aggregate results from multiple agents"""
successful_agents = [
agent for agent, result in results.items()
if result.get("success", False)
]
failed_agents = [
agent for agent, result in results.items()
if not result.get("success", False)
]
# Calculate overall success
overall_success = len(successful_agents) > 0 and len(failed_agents) == 0
# Aggregate execution times
total_execution_time = sum(
result.get("execution_time", 0)
for result in results.values()
)
return {
"success": overall_success,
"strategic_decision": strategic_decision.__dict__,
"agent_results": results,
"successful_agents": successful_agents,
"failed_agents": failed_agents,
"total_execution_time": total_execution_time,
"agents_used": len(successful_agents) + len(failed_agents)
}
async def _update_performance_metrics(self, routing_decision: RoutingDecision,
result: Dict[str, Any], start_time: datetime):
"""Update agent performance metrics based on execution results"""
execution_time = (datetime.now() - start_time).total_seconds()
success = result.get("success", False)
# Update metrics for each used agent
for agent_name in routing_decision.selected_agents:
if agent_name in result.get("agent_results", {}):
await self._update_single_agent_performance(
agent_name, success, execution_time
)
async def _update_single_agent_performance(self, agent_name: str,
success: bool, execution_time: float):
"""Update performance metrics for a single agent"""
if agent_name not in self.agent_performance:
return
performance = self.agent_performance[agent_name]
# Exponential moving average updates
alpha = 0.1 # Learning rate
# Update success rate
new_success = 1.0 if success else 0.0
performance.success_rate = (
alpha * new_success +
(1 - alpha) * performance.success_rate
)
# Update response time
performance.avg_response_time = (
alpha * execution_time +
(1 - alpha) * performance.avg_response_time
)
# Update timestamp
performance.last_updated = datetime.now()
logger.info("[ElephantAlphaRouter] Updated %s performance: success_rate=%.2f, response_time=%.1fs",
agent_name, performance.success_rate, performance.avg_response_time)
# W4: ADR-007 — persist agent performance snapshot to DB (non-fatal)
try:
with get_session() as session:
session.execute(text("""
INSERT INTO ai_insights
(insight_type, content, confidence, created_by, status, metadata_json)
VALUES
(:itype, :content, :confidence, 'elephant_router', 'active', :metadata)
"""), {
"itype": "agent_performance_snapshot",
"content": f"{agent_name}: success={performance.success_rate:.3f} rt={performance.avg_response_time:.1f}s",
"confidence": performance.success_rate,
"metadata": json.dumps({
"agent": agent_name,
"success_rate": performance.success_rate,
"avg_response_time": performance.avg_response_time,
"quality_score": performance.quality_score,
"reliability_score": performance.reliability_score,
"timestamp": datetime.now().isoformat(),
}),
})
session.commit()
except Exception as e:
logger.warning("[ElephantAlphaRouter] DB perf snapshot failed (non-fatal): %s", e)
# Global router instance
decision_router = ElephantAlphaDecisionRouter()