[V-New] Ollama 主機切換:GCP 優先 / 111 自動備援架構 | services/ollama_service.py, docker-compose.yml
All checks were successful
CD Pipeline / deploy (push) Successful in 12m19s
All checks were successful
CD Pipeline / deploy (push) Successful in 12m19s
This commit is contained in:
@@ -85,8 +85,11 @@ services:
|
||||
- POSTGRES_USER=${POSTGRES_USER:-momo}
|
||||
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
|
||||
- POSTGRES_DB=${POSTGRES_DB:-momo_analytics}
|
||||
# Embedding 服務:bge-m3 on Hermes (ADR-003),永遠走內網免 auth
|
||||
- EMBEDDING_HOST=${EMBEDDING_HOST:-http://192.168.0.111:11434}
|
||||
# Ollama 主機:GCP 優先 / 111 自動備援(ADR-003)
|
||||
- OLLAMA_HOST_PRIMARY=${OLLAMA_HOST_PRIMARY:-http://34.21.145.224:11434}
|
||||
- OLLAMA_HOST_FALLBACK=${OLLAMA_HOST_FALLBACK:-http://192.168.0.111:11434}
|
||||
# EMBEDDING_HOST 若未設定,由 resolve_ollama_host() 自動決定(GCP 優先)
|
||||
- EMBEDDING_HOST=${EMBEDDING_HOST:-}
|
||||
# ADR-020: Code Review 全自動修復主開關
|
||||
# 預設 true(任何 finding 一律觸發 AiderHeal),可在 .env 顯式設 false 即時切斷
|
||||
- CODE_REVIEW_AUTO_FIX_ENABLED=${CODE_REVIEW_AUTO_FIX_ENABLED:-true}
|
||||
@@ -207,8 +210,10 @@ services:
|
||||
# H7 (2026-04-24): POSTGRES_* 改由 env_file: .env 唯一來源,移除 compose 層插值避免空值覆蓋
|
||||
- USE_POSTGRESQL=true
|
||||
- POSTGRES_PORT=5432
|
||||
# Embedding 服務:bge-m3 on Hermes (ADR-003),永遠走內網免 auth
|
||||
- EMBEDDING_HOST=${EMBEDDING_HOST:-http://192.168.0.111:11434}
|
||||
# Ollama 主機:GCP 優先 / 111 自動備援(ADR-003)
|
||||
- OLLAMA_HOST_PRIMARY=${OLLAMA_HOST_PRIMARY:-http://34.21.145.224:11434}
|
||||
- OLLAMA_HOST_FALLBACK=${OLLAMA_HOST_FALLBACK:-http://192.168.0.111:11434}
|
||||
- EMBEDDING_HOST=${EMBEDDING_HOST:-}
|
||||
env_file:
|
||||
- .env
|
||||
command: ["python", "run_scheduler.py"]
|
||||
@@ -260,7 +265,10 @@ services:
|
||||
# H7 (2026-04-24): POSTGRES_* 改由 env_file: .env 唯一來源,移除 compose 層插值避免空值覆蓋
|
||||
- USE_POSTGRESQL=true
|
||||
- POSTGRES_PORT=5432
|
||||
- EMBEDDING_HOST=${EMBEDDING_HOST:-http://192.168.0.111:11434}
|
||||
# Ollama 主機:GCP 優先 / 111 自動備援(ADR-003)
|
||||
- OLLAMA_HOST_PRIMARY=${OLLAMA_HOST_PRIMARY:-http://34.21.145.224:11434}
|
||||
- OLLAMA_HOST_FALLBACK=${OLLAMA_HOST_FALLBACK:-http://192.168.0.111:11434}
|
||||
- EMBEDDING_HOST=${EMBEDDING_HOST:-}
|
||||
env_file:
|
||||
- .env
|
||||
command: ["python", "run_telegram_bot.py"]
|
||||
|
||||
@@ -15,14 +15,61 @@ from dataclasses import dataclass
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Ollama 設定 - 支援環境變數覆蓋
|
||||
# 預設使用外網 URL (透過 Nginx 反向代理),本地開發可透過環境變數指定內網
|
||||
# 注意:外網訪問時 API 路徑在 /ollama/ 下
|
||||
OLLAMA_HOST = os.getenv('OLLAMA_HOST', 'http://192.168.0.111:11434')
|
||||
# V-New: GCP 優先、111 自動備援架構
|
||||
# OLLAMA_HOST_PRIMARY => GCP Ollama 主機(34.21.145.224)
|
||||
# OLLAMA_HOST_FALLBACK => 111 內網 Hermes 備援
|
||||
OLLAMA_HOST_PRIMARY = os.getenv('OLLAMA_HOST_PRIMARY', 'http://34.21.145.224:11434')
|
||||
OLLAMA_HOST_FALLBACK = os.getenv('OLLAMA_HOST_FALLBACK', 'http://192.168.0.111:11434')
|
||||
# OLLAMA_HOST 優先使用舊環境變數(向下相容),若未設定則以 PRIMARY 為主
|
||||
OLLAMA_HOST = os.getenv('OLLAMA_HOST', OLLAMA_HOST_PRIMARY)
|
||||
DEFAULT_MODEL = os.getenv('OLLAMA_MODEL', 'llama3.1:8b') # 較快速的模型
|
||||
TIMEOUT = int(os.getenv('OLLAMA_TIMEOUT', '120')) # 秒 - 2 分鐘
|
||||
COPY_TIMEOUT = int(os.getenv('OLLAMA_COPY_TIMEOUT', '180')) # 文案生成專用超時 - 3 分鐘
|
||||
EMBED_TIMEOUT = int(os.getenv('OLLAMA_EMBED_TIMEOUT', os.getenv('EMBEDDING_TIMEOUT', '45')))
|
||||
|
||||
# ── GCP 優先 / 111 備援:解析實際可用的 Ollama 主機 ──────────────────────────
|
||||
_resolved_host_cache: dict = {'host': None, 'ts': 0}
|
||||
_RESOLVE_TTL = 120 # 主機健康狀態快取 120 秒
|
||||
|
||||
def resolve_ollama_host(primary: str = OLLAMA_HOST_PRIMARY,
|
||||
fallback: str = OLLAMA_HOST_FALLBACK) -> str:
|
||||
"""
|
||||
V-New: 自動探測 GCP Ollama 主機是否可用。
|
||||
- 可用 → 回傳 primary(GCP)
|
||||
- 不可用 → fallback 到 111 內網 Hermes
|
||||
結果快取 120 秒,避免每次請求都觸發 TCP 連線探測。
|
||||
"""
|
||||
import time
|
||||
import socket
|
||||
now = time.time()
|
||||
if (_resolved_host_cache['host'] is not None and
|
||||
now - _resolved_host_cache['ts'] < _RESOLVE_TTL):
|
||||
return _resolved_host_cache['host']
|
||||
|
||||
def _is_reachable(url: str, timeout: float = 3.0) -> bool:
|
||||
try:
|
||||
# 解析 host:port,嘗試 TCP 建立連線
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(url)
|
||||
host = parsed.hostname
|
||||
port = parsed.port or 11434
|
||||
sock = socket.create_connection((host, port), timeout=timeout)
|
||||
sock.close()
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
if _is_reachable(primary):
|
||||
selected = primary
|
||||
logger.info(f"[OllamaHost] GCP 主機可用,使用 Primary: {primary}")
|
||||
else:
|
||||
selected = fallback
|
||||
logger.warning(f"[OllamaHost] GCP 主機無法連線,自動切換 Fallback: {fallback}")
|
||||
|
||||
_resolved_host_cache['host'] = selected
|
||||
_resolved_host_cache['ts'] = now
|
||||
return selected
|
||||
|
||||
|
||||
@dataclass
|
||||
class OllamaResponse:
|
||||
@@ -41,8 +88,9 @@ class OllamaService:
|
||||
_connection_cache = {'status': None, 'timestamp': 0}
|
||||
_CACHE_TTL = 60 # 快取 60 秒
|
||||
|
||||
def __init__(self, host: str = OLLAMA_HOST, model: str = DEFAULT_MODEL):
|
||||
self.host = host
|
||||
def __init__(self, host: str = None, model: str = DEFAULT_MODEL):
|
||||
# V-New: 若未指定 host,使用 resolve_ollama_host() 自動選擇 GCP 或 111
|
||||
self.host = host or resolve_ollama_host()
|
||||
self.model = model
|
||||
self.available_models = []
|
||||
|
||||
@@ -534,7 +582,9 @@ class OllamaService:
|
||||
避免 self.host 若指向公開 ollama.wooo.work 時回 401。
|
||||
可透過 host 參數 override。
|
||||
"""
|
||||
target_host = (host or os.getenv("EMBEDDING_HOST", "http://192.168.0.111:11434")).rstrip("/")
|
||||
# V-New: Embedding 也遵循 GCP 優先、111 備援邏輯
|
||||
# EMBEDDING_HOST 若有明確設定則優先使用;否則透過 resolve_ollama_host 自動決定
|
||||
target_host = (host or os.getenv("EMBEDDING_HOST") or resolve_ollama_host()).rstrip("/")
|
||||
request_timeout = timeout or EMBED_TIMEOUT
|
||||
try:
|
||||
payload = {"model": model, "input": text}
|
||||
|
||||
Reference in New Issue
Block a user