#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ services/deepseek_service.py Operation Ollama-First v5.0 / Phase 15 — DeepSeek API 直連備援 設計原則(ADR-030 多供應商策略): - DeepSeek API 直連 (api.deepseek.com),OpenAI-compatible interface - 取代部分 OpenRouter 路徑(直連 ~30-50% 便宜 + 延遲低) - 主要備援場景:PPT NIM deepseek-v3.2 失敗時 / Code Review 第三供應商 - feature flag DEEPSEEK_DIRECT_ENABLED 預設 OFF - 失敗自動 fallback 到 OpenRouter(向下相容) 模型 (2026-05): - deepseek-chat (V3.2) $0.014/$0.28 per M tokens — 通用 - deepseek-reasoner (R1-0528) $0.14/$2.19 per M tokens — 推理增強 """ from __future__ import annotations import os import time import logging from dataclasses import dataclass from typing import Optional, Dict, Any import requests logger = logging.getLogger(__name__) DEEPSEEK_API_KEY = os.getenv('DEEPSEEK_API_KEY', '') DEEPSEEK_BASE_URL = os.getenv('DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1') DEEPSEEK_DEFAULT_MODEL = os.getenv('DEEPSEEK_MODEL', 'deepseek-chat') DEEPSEEK_TIMEOUT = int(os.getenv('DEEPSEEK_TIMEOUT', '60')) def is_deepseek_direct_enabled() -> bool: """Runtime check(避免 import-time freeze)""" return os.getenv('DEEPSEEK_DIRECT_ENABLED', 'false').strip().lower() in ('true', '1', 'yes', 'on') @dataclass class DeepSeekResponse: success: bool content: str model: str input_tokens: int = 0 output_tokens: int = 0 duration_ms: int = 0 error: Optional[str] = None class DeepSeekService: """DeepSeek API 直連 — OpenAI-compatible chat completions.""" def __init__(self, model: str = DEEPSEEK_DEFAULT_MODEL): self.model = model def is_available(self) -> bool: """key 已設且 flag ON""" return bool(DEEPSEEK_API_KEY) and is_deepseek_direct_enabled() def generate( self, prompt: str, system_prompt: Optional[str] = None, max_tokens: int = 4096, temperature: float = 0.4, ) -> DeepSeekResponse: """ 直連 api.deepseek.com/v1/chat/completions 失敗安全:API key 缺 / flag OFF → 回 success=False 讓 caller fallback。 """ start = time.monotonic() if not self.is_available(): return DeepSeekResponse( success=False, content='', model=self.model, error='DEEPSEEK_DIRECT_ENABLED=false or DEEPSEEK_API_KEY 未設', ) messages = [] if system_prompt: messages.append({'role': 'system', 'content': system_prompt}) messages.append({'role': 'user', 'content': prompt}) try: resp = requests.post( f"{DEEPSEEK_BASE_URL}/chat/completions", headers={ 'Authorization': f'Bearer {DEEPSEEK_API_KEY}', 'Content-Type': 'application/json', }, json={ 'model': self.model, 'messages': messages, 'max_tokens': max_tokens, 'temperature': temperature, 'stream': False, }, timeout=DEEPSEEK_TIMEOUT, ) duration_ms = int((time.monotonic() - start) * 1000) if resp.status_code != 200: return DeepSeekResponse( success=False, content='', model=self.model, duration_ms=duration_ms, error=f'HTTP {resp.status_code}: {resp.text[:200]}', ) data = resp.json() choices = data.get('choices', []) content = '' if choices: msg = choices[0].get('message', {}) content = msg.get('content', '') or '' usage = data.get('usage', {}) or {} return DeepSeekResponse( success=True, content=content, model=data.get('model', self.model), input_tokens=int(usage.get('prompt_tokens', 0) or 0), output_tokens=int(usage.get('completion_tokens', 0) or 0), duration_ms=duration_ms, ) except requests.Timeout: duration_ms = int((time.monotonic() - start) * 1000) return DeepSeekResponse( success=False, content='', model=self.model, duration_ms=duration_ms, error=f'timeout ({DEEPSEEK_TIMEOUT}s)', ) except Exception as e: duration_ms = int((time.monotonic() - start) * 1000) return DeepSeekResponse( success=False, content='', model=self.model, duration_ms=duration_ms, error=f'{type(e).__name__}: {str(e)[:200]}', ) def check_connection(self) -> bool: """輕量檢查:發極短 message 看是否回應""" if not self.is_available(): return False try: r = self.generate('ping', max_tokens=10, temperature=0) return r.success except Exception: return False # 全域單例 deepseek_service = DeepSeekService() __all__ = [ 'DeepSeekService', 'DeepSeekResponse', 'deepseek_service', 'is_deepseek_direct_enabled', ]