All checks were successful
CD Pipeline / deploy (push) Successful in 2m50s
Operation Ollama-First v5.0 / Phase 14-18 全套(statesman 批准全部) Phase 14 — services/ppt_vision_service.py (新檔, 200+ 行) - minicpm-v:latest(GCP Primary 已拉 5.5GB,代替 qwen2-vl 不存在) - check_image(image_path) → VisionResult.issues_found 視覺異常清單 - 走 resolve_ollama_host 三主機 retry + mark_unhealthy - 繁中強制 system prompt + 結構化解析 ⚠️ marker - feature flag PPT_VISION_ENABLED 預設 OFF Phase 15 — services/deepseek_service.py (新檔, 170+ 行) - DeepSeek API 直連 (api.deepseek.com/v1),OpenAI-compatible - 取代部分 OpenRouter 路徑(直連便宜 ~30-50% + 延遲低) - deepseek-chat ($0.014/$0.28) / deepseek-reasoner ($0.14/$2.19) - feature flag DEEPSEEK_DIRECT_ENABLED 預設 OFF - DeepSeekResponse 含 input_tokens/output_tokens/duration_ms Phase 16 — services/llm_caller_registry.py (新檔, 130+ 行) - CALLER_REGISTRY frozenset 集中管理 35+ 個 caller 名(ADR-028 白名單) - assert_known_caller(strict=False) 整合到 ai_call_logger __init__ - 不在 registry → log warning(不 raise,保留擴展彈性) - list_callers_by_service() 分組除錯 - 解 critic-A11 第 3 輪 L4 修補(命名分散三層) Phase 17 — _is_low_quality_response 4 條新規則(A2 警訊深化) - 規則 5:純英文回應(中文字元 < 30%) - 規則 6:thinking-mode 漏洞(<think>...</think> 洩漏) - 規則 7:重複迴圈偵測(前 50 字出現 ≥ 3 次) - 規則 8:佔位符未填充({{var}} / [TODO] / <待填>) Phase 18 — docs/operation_ollama_first_v5_postmortem.md (新檔) - 戰役完整時間軸(Day 1-2) - 3 大決策替代分析 - 4 個 critical hotfix 教訓 - Owen 三護欄落地對照 - KPI 達成度(Wave 1 提前 4 天 / Wave 2 提前 10 天) - 統帥手動清單 + 7 條未來戰役教訓 Phase 13 補強(合併本 commit): - ai_call_logger COST_TABLE 補 7 個新模型(qwen3:14b / qwen2.5:7b-instruct / qwen2.5-coder:32b / qwen2-vl:7b / deepseek-r1:14b / gemma3:4b / minicpm-v) regression: 214 unit tests 全綠(4:02 跑完),2 skipped Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
163 lines
5.2 KiB
Python
163 lines
5.2 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
services/deepseek_service.py
|
||
Operation Ollama-First v5.0 / Phase 15 — DeepSeek API 直連備援
|
||
|
||
設計原則(ADR-030 多供應商策略):
|
||
- DeepSeek API 直連 (api.deepseek.com),OpenAI-compatible interface
|
||
- 取代部分 OpenRouter 路徑(直連 ~30-50% 便宜 + 延遲低)
|
||
- 主要備援場景:PPT NIM deepseek-v3.2 失敗時 / Code Review 第三供應商
|
||
- feature flag DEEPSEEK_DIRECT_ENABLED 預設 OFF
|
||
- 失敗自動 fallback 到 OpenRouter(向下相容)
|
||
|
||
模型 (2026-05):
|
||
- deepseek-chat (V3.2) $0.014/$0.28 per M tokens — 通用
|
||
- deepseek-reasoner (R1-0528) $0.14/$2.19 per M tokens — 推理增強
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
import os
|
||
import time
|
||
import logging
|
||
from dataclasses import dataclass
|
||
from typing import Optional, Dict, Any
|
||
|
||
import requests
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
DEEPSEEK_API_KEY = os.getenv('DEEPSEEK_API_KEY', '')
|
||
DEEPSEEK_BASE_URL = os.getenv('DEEPSEEK_BASE_URL', 'https://api.deepseek.com/v1')
|
||
DEEPSEEK_DEFAULT_MODEL = os.getenv('DEEPSEEK_MODEL', 'deepseek-chat')
|
||
DEEPSEEK_TIMEOUT = int(os.getenv('DEEPSEEK_TIMEOUT', '60'))
|
||
|
||
|
||
def is_deepseek_direct_enabled() -> bool:
|
||
"""Runtime check(避免 import-time freeze)"""
|
||
return os.getenv('DEEPSEEK_DIRECT_ENABLED', 'false').strip().lower() in ('true', '1', 'yes', 'on')
|
||
|
||
|
||
@dataclass
|
||
class DeepSeekResponse:
|
||
success: bool
|
||
content: str
|
||
model: str
|
||
input_tokens: int = 0
|
||
output_tokens: int = 0
|
||
duration_ms: int = 0
|
||
error: Optional[str] = None
|
||
|
||
|
||
class DeepSeekService:
|
||
"""DeepSeek API 直連 — OpenAI-compatible chat completions."""
|
||
|
||
def __init__(self, model: str = DEEPSEEK_DEFAULT_MODEL):
|
||
self.model = model
|
||
|
||
def is_available(self) -> bool:
|
||
"""key 已設且 flag ON"""
|
||
return bool(DEEPSEEK_API_KEY) and is_deepseek_direct_enabled()
|
||
|
||
def generate(
|
||
self,
|
||
prompt: str,
|
||
system_prompt: Optional[str] = None,
|
||
max_tokens: int = 4096,
|
||
temperature: float = 0.4,
|
||
) -> DeepSeekResponse:
|
||
"""
|
||
直連 api.deepseek.com/v1/chat/completions
|
||
失敗安全:API key 缺 / flag OFF → 回 success=False 讓 caller fallback。
|
||
"""
|
||
start = time.monotonic()
|
||
|
||
if not self.is_available():
|
||
return DeepSeekResponse(
|
||
success=False, content='', model=self.model,
|
||
error='DEEPSEEK_DIRECT_ENABLED=false or DEEPSEEK_API_KEY 未設',
|
||
)
|
||
|
||
messages = []
|
||
if system_prompt:
|
||
messages.append({'role': 'system', 'content': system_prompt})
|
||
messages.append({'role': 'user', 'content': prompt})
|
||
|
||
try:
|
||
resp = requests.post(
|
||
f"{DEEPSEEK_BASE_URL}/chat/completions",
|
||
headers={
|
||
'Authorization': f'Bearer {DEEPSEEK_API_KEY}',
|
||
'Content-Type': 'application/json',
|
||
},
|
||
json={
|
||
'model': self.model,
|
||
'messages': messages,
|
||
'max_tokens': max_tokens,
|
||
'temperature': temperature,
|
||
'stream': False,
|
||
},
|
||
timeout=DEEPSEEK_TIMEOUT,
|
||
)
|
||
duration_ms = int((time.monotonic() - start) * 1000)
|
||
|
||
if resp.status_code != 200:
|
||
return DeepSeekResponse(
|
||
success=False, content='', model=self.model,
|
||
duration_ms=duration_ms,
|
||
error=f'HTTP {resp.status_code}: {resp.text[:200]}',
|
||
)
|
||
|
||
data = resp.json()
|
||
choices = data.get('choices', [])
|
||
content = ''
|
||
if choices:
|
||
msg = choices[0].get('message', {})
|
||
content = msg.get('content', '') or ''
|
||
|
||
usage = data.get('usage', {}) or {}
|
||
return DeepSeekResponse(
|
||
success=True,
|
||
content=content,
|
||
model=data.get('model', self.model),
|
||
input_tokens=int(usage.get('prompt_tokens', 0) or 0),
|
||
output_tokens=int(usage.get('completion_tokens', 0) or 0),
|
||
duration_ms=duration_ms,
|
||
)
|
||
|
||
except requests.Timeout:
|
||
duration_ms = int((time.monotonic() - start) * 1000)
|
||
return DeepSeekResponse(
|
||
success=False, content='', model=self.model,
|
||
duration_ms=duration_ms, error=f'timeout ({DEEPSEEK_TIMEOUT}s)',
|
||
)
|
||
except Exception as e:
|
||
duration_ms = int((time.monotonic() - start) * 1000)
|
||
return DeepSeekResponse(
|
||
success=False, content='', model=self.model,
|
||
duration_ms=duration_ms,
|
||
error=f'{type(e).__name__}: {str(e)[:200]}',
|
||
)
|
||
|
||
def check_connection(self) -> bool:
|
||
"""輕量檢查:發極短 message 看是否回應"""
|
||
if not self.is_available():
|
||
return False
|
||
try:
|
||
r = self.generate('ping', max_tokens=10, temperature=0)
|
||
return r.success
|
||
except Exception:
|
||
return False
|
||
|
||
|
||
# 全域單例
|
||
deepseek_service = DeepSeekService()
|
||
|
||
|
||
__all__ = [
|
||
'DeepSeekService',
|
||
'DeepSeekResponse',
|
||
'deepseek_service',
|
||
'is_deepseek_direct_enabled',
|
||
]
|