Files
ewoooc/services/elephant_service.py
ogt d5c0feab5e
All checks were successful
CD Pipeline / deploy (push) Successful in 1m35s
fix: Telegram bot 全功能修復 — 16個await按鈕/AI對話/模型遷移/DB schema
## Telegram Bot 功能修復
- 補全 16 個 await: 按鈕的 handler(日期選擇/目標設定/促銷追蹤等),
  新增 _handle_await_callback + _process_await_input 完整狀態機
- cmd: 按鈕加入  即時回饋 + try/except 防 BadRequest
- handle_callback 加頂層 try/except 錯誤兜底
- 補 momo:cmd:suggestion + momo:menu:main callback handler
- 修復 _enhanced_keyword_matching context NameError

## AI 模型遷移(hermes3@111 → qwen2.5@188)
- hermes_analyst_service: URL 192.168.0.111→188, hermes3→qwen2.5:7b-instruct
- code_review_pipeline: 改用 HERMES_URL/HERMES_MODEL 常數
- elephant_alpha_orchestrator / nemoton_dispatcher: registry/footprint 同步
- aider_heal_executor: OLLAMA_API_BASE fallback 改 188
- ai_routes: footprint display 字串改 qwen2.5:7b-instruct

## ElephantAlpha 404 修復
- elephant_service: openrouter→NVIDIA NIM, nvidia/llama-3.1-nemotron-ultra-253b-v1
- ai_provider: 模型 ID 同步更新

## TELEGRAM_CHAT_ID 環境變數修正
- cicd_routes + aider_heal_executor: 優先讀 TELEGRAM_CHAT_IDS[0],
  fallback TELEGRAM_CHAT_ID,修復通知靜默失敗

## AI 對話 logging 改善
- telegram_ai_integration: Hermes 降級改 WARNING,OpenClaw 失敗加 exc_info
- hermes_analyst_service: 連線失敗 log 加 host/model context

## DB Schema 修復
- migrations/019: action_plans 補齊全欄位,DROP NOT NULL action_type
- autoheal_models: ActionPlan ORM 同步為超集 schema

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 03:30:14 +08:00

163 lines
5.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Elephant Alpha AI 服務模組
負責與 OpenRouter / Elephant Alpha API 互動,提供高效率、長上下文的 Worker AI 功能
"""
import os
import time
import json
import logging
import requests
from typing import Optional, Dict, Any, List
from dataclasses import dataclass
logger = logging.getLogger(__name__)
# Elephant Alpha 設定NVIDIA NIM API
NVIDIA_API_KEY = os.getenv('NVIDIA_API_KEY', '')
ELEPHANT_ALPHA_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
DEFAULT_ELEPHANT_MODEL = "nvidia/llama-3.1-nemotron-ultra-253b-v1"
ELEPHANT_TIMEOUT = int(os.getenv('ELEPHANT_TIMEOUT', '120')) # 預設 2 分鐘
# Elephant Alpha 定價 (USD per 1M tokens) - NVIDIA NIM 定價
ELEPHANT_PRICING = {
'nvidia/llama-3.1-nemotron-ultra-253b-v1': {'input': 0.10, 'output': 0.40},
}
@dataclass
class ElephantResponse:
"""Elephant Alpha 回應結構"""
success: bool
content: str
model: str
error: Optional[str] = None
total_duration: Optional[float] = None
input_tokens: int = 0
output_tokens: int = 0
total_tokens: int = 0
input_cost: float = 0.0
output_cost: float = 0.0
total_cost: float = 0.0
class ElephantService:
"""Elephant Alpha AI 服務 - 100B 效率型 Worker"""
def __init__(self, api_key: str = None, model: str = None):
"""
初始化 Elephant 服務
"""
self.api_key = api_key or NVIDIA_API_KEY
self.model = model or DEFAULT_ELEPHANT_MODEL
# W3-A: 護欄 2 — 斷線降級 cache (300s TTL不每次 ping OpenRouter)
_connection_cache: Dict[str, Any] = {"ok": False, "checked_at": 0.0}
def check_connection(self, cache_seconds: int = 300) -> bool:
"""
檢查 API 是否可用(結果快取 300s
cache_seconds=300 與 Anthropic prompt cache TTL 對齊,避免每分鐘 EA loop 都打 API。
"""
if not self.api_key:
return False
now = time.time()
cache = ElephantService._connection_cache
if cache["checked_at"] and (now - cache["checked_at"]) < cache_seconds:
return cache["ok"]
try:
response = self.generate("hi", timeout=10)
result = response.success
except Exception:
result = False
ElephantService._connection_cache = {"ok": result, "checked_at": now}
return result
@staticmethod
def calculate_cost(model: str, input_tokens: int, output_tokens: int) -> Dict[str, float]:
"""計算費用"""
pricing = ELEPHANT_PRICING.get(model, ELEPHANT_PRICING['nvidia/llama-3.1-nemotron-ultra-253b-v1'])
input_cost = (input_tokens / 1_000_000) * pricing['input']
output_cost = (output_tokens / 1_000_000) * pricing['output']
return {
'input_cost': round(input_cost, 6),
'output_cost': round(output_cost, 6),
'total_cost': round(input_cost + output_cost, 6)
}
def generate(self, prompt: str, model: str = None,
system_prompt: str = None, temperature: float = 0.3,
json_mode: bool = False, timeout: int = None) -> ElephantResponse:
"""
生成文字(主介面)
"""
model_name = model or self.model
request_timeout = timeout or ELEPHANT_TIMEOUT
if not self.api_key:
return ElephantResponse(success=False, content='', model=model_name, error="API Key 未設定")
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
payload = {
"model": model_name,
"messages": messages,
"temperature": temperature,
"max_tokens": 8000
}
if json_mode:
payload["response_format"] = {"type": "json_object"}
try:
start_time = time.time()
response = requests.post(
ELEPHANT_ALPHA_URL,
json=payload,
headers=headers,
timeout=request_timeout
)
response.raise_for_status()
end_time = time.time()
data = response.json()
content = data["choices"][0]["message"]["content"]
# Token 用量
usage = data.get("usage", {})
input_tokens = usage.get("prompt_tokens", 0)
output_tokens = usage.get("completion_tokens", 0)
costs = self.calculate_cost(model_name, input_tokens, output_tokens)
return ElephantResponse(
success=True,
content=content,
model=model_name,
total_duration=end_time - start_time,
input_tokens=input_tokens,
output_tokens=output_tokens,
total_tokens=input_tokens + output_tokens,
input_cost=costs['input_cost'],
output_cost=costs['output_cost'],
total_cost=costs['total_cost']
)
except Exception as e:
logger.error(f"[Elephant] 生成失敗: {e}")
return ElephantResponse(success=False, content='', model=model_name, error=str(e))
# 單例實例
elephant_service = ElephantService()