All checks were successful
CD Pipeline / deploy (push) Successful in 1m35s
## Telegram Bot 功能修復 - 補全 16 個 await: 按鈕的 handler(日期選擇/目標設定/促銷追蹤等), 新增 _handle_await_callback + _process_await_input 完整狀態機 - cmd: 按鈕加入 ⏳ 即時回饋 + try/except 防 BadRequest - handle_callback 加頂層 try/except 錯誤兜底 - 補 momo:cmd:suggestion + momo:menu:main callback handler - 修復 _enhanced_keyword_matching context NameError ## AI 模型遷移(hermes3@111 → qwen2.5@188) - hermes_analyst_service: URL 192.168.0.111→188, hermes3→qwen2.5:7b-instruct - code_review_pipeline: 改用 HERMES_URL/HERMES_MODEL 常數 - elephant_alpha_orchestrator / nemoton_dispatcher: registry/footprint 同步 - aider_heal_executor: OLLAMA_API_BASE fallback 改 188 - ai_routes: footprint display 字串改 qwen2.5:7b-instruct ## ElephantAlpha 404 修復 - elephant_service: openrouter→NVIDIA NIM, nvidia/llama-3.1-nemotron-ultra-253b-v1 - ai_provider: 模型 ID 同步更新 ## TELEGRAM_CHAT_ID 環境變數修正 - cicd_routes + aider_heal_executor: 優先讀 TELEGRAM_CHAT_IDS[0], fallback TELEGRAM_CHAT_ID,修復通知靜默失敗 ## AI 對話 logging 改善 - telegram_ai_integration: Hermes 降級改 WARNING,OpenClaw 失敗加 exc_info - hermes_analyst_service: 連線失敗 log 加 host/model context ## DB Schema 修復 - migrations/019: action_plans 補齊全欄位,DROP NOT NULL action_type - autoheal_models: ActionPlan ORM 同步為超集 schema Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
163 lines
5.4 KiB
Python
163 lines
5.4 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Elephant Alpha AI 服務模組
|
||
負責與 OpenRouter / Elephant Alpha API 互動,提供高效率、長上下文的 Worker AI 功能
|
||
"""
|
||
|
||
import os
|
||
import time
|
||
import json
|
||
import logging
|
||
import requests
|
||
from typing import Optional, Dict, Any, List
|
||
from dataclasses import dataclass
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# Elephant Alpha 設定(NVIDIA NIM API)
|
||
NVIDIA_API_KEY = os.getenv('NVIDIA_API_KEY', '')
|
||
ELEPHANT_ALPHA_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
|
||
DEFAULT_ELEPHANT_MODEL = "nvidia/llama-3.1-nemotron-ultra-253b-v1"
|
||
ELEPHANT_TIMEOUT = int(os.getenv('ELEPHANT_TIMEOUT', '120')) # 預設 2 分鐘
|
||
|
||
# Elephant Alpha 定價 (USD per 1M tokens) - NVIDIA NIM 定價
|
||
ELEPHANT_PRICING = {
|
||
'nvidia/llama-3.1-nemotron-ultra-253b-v1': {'input': 0.10, 'output': 0.40},
|
||
}
|
||
|
||
@dataclass
|
||
class ElephantResponse:
|
||
"""Elephant Alpha 回應結構"""
|
||
success: bool
|
||
content: str
|
||
model: str
|
||
error: Optional[str] = None
|
||
total_duration: Optional[float] = None
|
||
input_tokens: int = 0
|
||
output_tokens: int = 0
|
||
total_tokens: int = 0
|
||
input_cost: float = 0.0
|
||
output_cost: float = 0.0
|
||
total_cost: float = 0.0
|
||
|
||
class ElephantService:
|
||
"""Elephant Alpha AI 服務 - 100B 效率型 Worker"""
|
||
|
||
def __init__(self, api_key: str = None, model: str = None):
|
||
"""
|
||
初始化 Elephant 服務
|
||
"""
|
||
self.api_key = api_key or NVIDIA_API_KEY
|
||
self.model = model or DEFAULT_ELEPHANT_MODEL
|
||
|
||
# W3-A: 護欄 2 — 斷線降級 cache (300s TTL,不每次 ping OpenRouter)
|
||
_connection_cache: Dict[str, Any] = {"ok": False, "checked_at": 0.0}
|
||
|
||
def check_connection(self, cache_seconds: int = 300) -> bool:
|
||
"""
|
||
檢查 API 是否可用(結果快取 300s)。
|
||
cache_seconds=300 與 Anthropic prompt cache TTL 對齊,避免每分鐘 EA loop 都打 API。
|
||
"""
|
||
if not self.api_key:
|
||
return False
|
||
|
||
now = time.time()
|
||
cache = ElephantService._connection_cache
|
||
if cache["checked_at"] and (now - cache["checked_at"]) < cache_seconds:
|
||
return cache["ok"]
|
||
|
||
try:
|
||
response = self.generate("hi", timeout=10)
|
||
result = response.success
|
||
except Exception:
|
||
result = False
|
||
|
||
ElephantService._connection_cache = {"ok": result, "checked_at": now}
|
||
return result
|
||
|
||
@staticmethod
|
||
def calculate_cost(model: str, input_tokens: int, output_tokens: int) -> Dict[str, float]:
|
||
"""計算費用"""
|
||
pricing = ELEPHANT_PRICING.get(model, ELEPHANT_PRICING['nvidia/llama-3.1-nemotron-ultra-253b-v1'])
|
||
input_cost = (input_tokens / 1_000_000) * pricing['input']
|
||
output_cost = (output_tokens / 1_000_000) * pricing['output']
|
||
return {
|
||
'input_cost': round(input_cost, 6),
|
||
'output_cost': round(output_cost, 6),
|
||
'total_cost': round(input_cost + output_cost, 6)
|
||
}
|
||
|
||
def generate(self, prompt: str, model: str = None,
|
||
system_prompt: str = None, temperature: float = 0.3,
|
||
json_mode: bool = False, timeout: int = None) -> ElephantResponse:
|
||
"""
|
||
生成文字(主介面)
|
||
"""
|
||
model_name = model or self.model
|
||
request_timeout = timeout or ELEPHANT_TIMEOUT
|
||
|
||
if not self.api_key:
|
||
return ElephantResponse(success=False, content='', model=model_name, error="API Key 未設定")
|
||
|
||
headers = {
|
||
"Authorization": f"Bearer {self.api_key}",
|
||
"Content-Type": "application/json"
|
||
}
|
||
|
||
messages = []
|
||
if system_prompt:
|
||
messages.append({"role": "system", "content": system_prompt})
|
||
messages.append({"role": "user", "content": prompt})
|
||
|
||
payload = {
|
||
"model": model_name,
|
||
"messages": messages,
|
||
"temperature": temperature,
|
||
"max_tokens": 8000
|
||
}
|
||
|
||
if json_mode:
|
||
payload["response_format"] = {"type": "json_object"}
|
||
|
||
try:
|
||
start_time = time.time()
|
||
response = requests.post(
|
||
ELEPHANT_ALPHA_URL,
|
||
json=payload,
|
||
headers=headers,
|
||
timeout=request_timeout
|
||
)
|
||
response.raise_for_status()
|
||
end_time = time.time()
|
||
|
||
data = response.json()
|
||
content = data["choices"][0]["message"]["content"]
|
||
|
||
# Token 用量
|
||
usage = data.get("usage", {})
|
||
input_tokens = usage.get("prompt_tokens", 0)
|
||
output_tokens = usage.get("completion_tokens", 0)
|
||
|
||
costs = self.calculate_cost(model_name, input_tokens, output_tokens)
|
||
|
||
return ElephantResponse(
|
||
success=True,
|
||
content=content,
|
||
model=model_name,
|
||
total_duration=end_time - start_time,
|
||
input_tokens=input_tokens,
|
||
output_tokens=output_tokens,
|
||
total_tokens=input_tokens + output_tokens,
|
||
input_cost=costs['input_cost'],
|
||
output_cost=costs['output_cost'],
|
||
total_cost=costs['total_cost']
|
||
)
|
||
|
||
except Exception as e:
|
||
logger.error(f"[Elephant] 生成失敗: {e}")
|
||
return ElephantResponse(success=False, content='', model=model_name, error=str(e))
|
||
|
||
# 單例實例
|
||
elephant_service = ElephantService()
|