fix: route telegram vision through ollama first
All checks were successful
CD Pipeline / deploy (push) Successful in 1m4s

This commit is contained in:
OoO
2026-05-18 14:07:49 +08:00
parent b361ca7723
commit c021945047
10 changed files with 283 additions and 48 deletions

View File

@@ -315,6 +315,9 @@ RAG_EMBED_NORMALIZE=true
PPT_VISION_ENABLED=false
PPT_VISION_MODEL=minicpm-v:latest
PPT_VISION_TIMEOUT=60
PPT_AUTO_GENERATION_ENABLED=true
PPT_AUTO_REPORT_TYPES=all
PPT_AUTO_DEFAULT_CATEGORY=美妝保養
DEEPSEEK_DIRECT_ENABLED=false
DEEPSEEK_API_KEY=
DEEPSEEK_BASE_URL=https://api.deepseek.com/v1
@@ -344,6 +347,9 @@ OLLAMA_EMBED_TIMEOUT=45
OPENCLAW_QA_OLLAMA_FIRST=true
OPENCLAW_QA_OLLAMA_MODEL=qwen3:14b
OPENCLAW_QA_OLLAMA_TIMEOUT=60
OPENCLAW_IMAGE_VISION_MODEL=minicpm-v:latest
OPENCLAW_IMAGE_OLLAMA_TIMEOUT=45
OPENCLAW_IMAGE_GEMINI_MODEL=gemini-1.5-flash
NEMOTRON_OLLAMA_FIRST=true
NEMOTRON_OLLAMA_MODEL=qwen3:14b
NEMOTRON_OLLAMA_TIMEOUT=180

12
app.py
View File

@@ -29,7 +29,7 @@ except OSError as e:
# ================= 🔧 2. 核心模組導入 =================
try:
from flask import Flask, render_template, jsonify, request, send_file, redirect, url_for, send_from_directory, flash, session
from flask import Flask, render_template, jsonify, request, send_file, redirect, url_for, flash, session
from werkzeug.utils import secure_filename
from pyngrok import ngrok, conf
import schedule
@@ -146,16 +146,6 @@ def add_static_asset_version(endpoint, values):
values['v'] = SYSTEM_VERSION
@app.route('/favicon.ico')
def favicon():
"""使用既有品牌圖示回應瀏覽器預設 favicon 探測,避免全站 404 噪音。"""
return send_from_directory(
os.path.join(STATIC_DIR, 'images'),
'logo_circle.svg',
mimetype='image/svg+xml',
max_age=604800,
)
# ==========================================
# 🔒 Flask 安全配置
# ==========================================

View File

@@ -18,6 +18,7 @@
- PPT vision、PPT 文案 final fallback、MCP 離線 final fallback 等特殊 Ollama 路徑也不得只打單一 host如需 `/api/generate`,一律透過 `OllamaService.generate()`
- Code Review pipeline 也必須 Ollama-firstHermes scan 與 OpenClaw assessment 都走 `OllamaService` 三主機 retryGemini telemetry 只能以 `code_review_openclaw_gemini` 出現,表示 Ollama/可選 Claude 備援都失敗後才啟用。
- OpenClaw Telegram Q&A 主路徑也不得綁單一 host`_call_qwen3_qa()` 必須透過 `OllamaService` 跑 GCP-A → GCP-B → 111並把實際落點寫入 `ai_calls.provider`
- OpenClaw Telegram 圖片商品辨識也必須 Ollama-first`_identify_product_name_with_ollama_vision()` 透過 `OllamaService` 嘗試 GCP-A → GCP-B → 111Gemini 只允許以 `openclaw_bot_image_gemini` caller 作為失敗後備援。
## 一、四 AI Agent 路由架構

View File

@@ -56,6 +56,7 @@
- PPT vision、PPT 文案 final fallback 與 MCP 離線 final fallback 已改走 `OllamaService.generate()``OllamaService.generate()` 支援 `options``keep_alive` 與 vision `images`,特殊 `/api/generate` 路徑同樣取得三主機 retry。
- OpenClaw QA / daily Hermes template / NemoTron qwen3 的 flag 文件與測試已對齊 Ollama-first 預設 ON顯式 `false` 才是 Gemini/NIM legacy 緊急退路。OpenClaw QA 已移除單一 `OPENCLAW_QA_OLLAMA_HOST` 主機覆寫,`_call_qwen3_qa()` 改走 `OllamaService` 的 GCP-A → GCP-B → 111 retry 並回寫實際 provider。
- Code Review pipeline 已對齊 Ollama-first`_hermes_scan()``_openclaw_assess()` 都先走 `OllamaService` 的 GCP-A → GCP-B → 111 retryGemini 僅在 Ollama與可選 Claude失敗後以 `code_review_openclaw_gemini` caller 記錄備援,不再以 `code_review_openclaw` 直接 Gemini-first。
- Telegram 圖片商品辨識已對齊 Ollama-first`routes/openclaw_bot_routes.py` 會先用 `OPENCLAW_IMAGE_VISION_MODEL` 透過 `OllamaService` retry GCP-A → GCP-B → 111Gemini 只以 `openclaw_bot_image_gemini` caller 作為圖片辨識備援。
- `.env.example` 已補齊 Python runtime 實際讀取的環境變數,`tests/test_phase3f_cleanup_contracts.py::test_env_example_documents_runtime_os_env_keys` 會掃 `app.py/config.py/scheduler.py/run_scheduler.py/routes/services/utils``os.getenv()` / `os.environ.get()`;只允許 `PYTEST_CURRENT_TEST``MOMO_ALLOW_INSECURE_CONFIG_FOR_TESTS` 兩個測試內部 key 不進範例。
- `docker-compose*.yml` 使用的 `${VAR}` 也已納入 `.env.example` 契約,包含 MCP compose 的 `TAVILY_API_KEY``EXA_API_KEY``MCP_POSTGRES_PASSWORD``FIRECRAWL_AUTH_KEY`,以及 image tag / Grafana / pgAdmin / Metabase / Grist 變數;`test_env_example_documents_docker_compose_variables` 會守住。
- Market Intel `seed_writer_cli_status` route 已補 API 層回歸:即使 `execute=true` 且環境有 `MARKET_INTEL_SEED_WRITE_APPROVAL`API 仍不得回吐 token / `approval_token_hint` / 固定 token 文案,且不得 ready 或寫入;`tests/test_market_intel_skeleton.py::test_seed_writer_cli_status_route_never_leaks_approval_token` 會守住。

View File

@@ -4,7 +4,7 @@
OpenClaw Telegram 群組智能助理 v5
─────────────────────────────────────────
核心功能:
• 群組自然對話,Gemini Flash 主引擎2~5s
• 群組自然對話,Ollama-first 三主機級聯Gemini 僅備援
• Inline Keyboard 15 個功能入口
• 全商品查詢帶出商品ID
• AI 分析強制比對內部DB + 外部MCP情報
@@ -91,7 +91,7 @@ except ImportError:
# V-New: 引入 Ollama 探測機制
try:
from services.ollama_service import OllamaService
from services.ollama_service import OllamaService, get_host_label, get_provider_tag
_OLLAMA_AVAILABLE = True
except ImportError:
_OLLAMA_AVAILABLE = False
@@ -103,6 +103,11 @@ except ImportError:
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY', '')
GEMINI_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta/models'
GEMINI_MODEL = 'gemini-2.0-flash'
IMAGE_VISION_OLLAMA_MODEL = os.getenv(
'OPENCLAW_IMAGE_VISION_MODEL',
os.getenv('PPT_VISION_MODEL', 'minicpm-v:latest'),
)
IMAGE_VISION_GEMINI_MODEL = os.getenv('OPENCLAW_IMAGE_GEMINI_MODEL', 'gemini-1.5-flash')
PPT_CACHE_TTL_HOURS = max(1, int(os.getenv('OPENCLAW_PPT_CACHE_TTL_HOURS', '24')))
TAIPEI_TZ = timezone(timedelta(hours=8))
@@ -8678,6 +8683,123 @@ def _handle_event_ignore_callback(data: str, cq: dict, chat_id, message_id) -> N
sys_log.info(f"[EA HITL] event_ignore event_id={event_id} by={user_label_raw}")
def _clean_vision_product_name(raw: str) -> str:
"""把 vision 模型回應收斂成可直接丟給比價查詢的商品名稱。"""
text = (raw or '').strip()
if not text:
return ''
text = re.sub(r"^```(?:text)?\s*", "", text, flags=re.IGNORECASE).strip()
text = re.sub(r"\s*```$", "", text).strip()
first_line = next((line.strip() for line in text.splitlines() if line.strip()), '')
first_line = re.sub(r"^(商品名稱|品名|辨識結果|結果)\s*[:]\s*", "", first_line).strip()
first_line = first_line.strip("`*_ - ")
if not first_line:
return ''
refusal_patterns = ('無法辨識', '看不清', '無法確認', '不確定', 'unknown', 'not sure')
lowered = first_line.lower()
if any(pattern in lowered for pattern in refusal_patterns):
return ''
return first_line[:60]
def _identify_product_name_with_ollama_vision(img_b64: str, request_id: str) -> str:
"""圖片比價的主辨識路徑Ollama vision 三主機級聯。"""
if not _OLLAMA_AVAILABLE:
return ''
prompt = (
"這是一張商品圖片。請辨識商品名稱,包含品牌、型號、規格。"
"只回商品名稱,不要解釋,不要 markdown不超過 30 字;"
"如果是多個商品,只取最顯眼的一個。必須使用繁體中文。"
)
timeout = int(os.getenv('OPENCLAW_IMAGE_OLLAMA_TIMEOUT', '45'))
with log_ai_call(
caller='openclaw_bot_image',
provider='gcp_ollama',
model=IMAGE_VISION_OLLAMA_MODEL,
request_id=request_id,
meta={'route': 'ollama_first', 'task': 'image_product_recognition'},
) as ctx:
try:
resp = OllamaService(model=IMAGE_VISION_OLLAMA_MODEL).generate(
prompt=prompt,
model=IMAGE_VISION_OLLAMA_MODEL,
temperature=0.1,
timeout=timeout,
options={'num_predict': 64},
images=[img_b64],
)
ctx.set_provider(get_provider_tag(resp.host or ''))
ctx.set_tokens(input=resp.input_tokens, output=resp.output_tokens)
ctx.add_meta('host', resp.host)
ctx.add_meta('host_label', get_host_label(resp.host or ''))
if not resp.success:
ctx.set_error(resp.error or 'ollama vision failed')
ctx.fallback_to_caller('openclaw_bot_image_gemini')
return ''
product_name = _clean_vision_product_name(resp.content)
if not product_name:
ctx.set_error('empty_or_unusable_vision_response')
ctx.fallback_to_caller('openclaw_bot_image_gemini')
return product_name
except Exception as exc:
ctx.set_error(f"{type(exc).__name__}: {exc}")
ctx.fallback_to_caller('openclaw_bot_image_gemini')
sys_log.warning(f"[VisionSearch] Ollama vision failed: {exc}")
return ''
def _identify_product_name_with_gemini_vision(img_b64: str, request_id: str) -> str:
"""圖片比價的雲端備援:只有 Ollama vision 失敗後才呼叫。"""
if not GEMINI_API_KEY:
return ''
vision_payload = {
'contents': [{
'parts': [
{'text': (
'這是一張商品圖片。請辨識商品名稱(品牌、型號、規格),'
'輸出格式:只回商品名稱,不超過 30 字,繁體中文。'
'如果是多個商品,只取最顯眼的一個。'
)},
{'inline_data': {'mime_type': 'image/jpeg', 'data': img_b64}},
],
}],
}
with log_ai_call(
caller='openclaw_bot_image_gemini',
provider='gemini',
model=IMAGE_VISION_GEMINI_MODEL,
request_id=request_id,
meta={'fallback_from': 'openclaw_bot_image', 'task': 'image_product_recognition'},
) as ctx:
try:
vis_r = requests.post(
f"{GEMINI_BASE_URL}/{IMAGE_VISION_GEMINI_MODEL}:generateContent?key={GEMINI_API_KEY}",
json=vision_payload, timeout=20,
)
vis_r.raise_for_status()
body = vis_r.json()
usage = body.get('usageMetadata', {}) or {}
ctx.set_tokens(
input=usage.get('promptTokenCount', 0),
output=usage.get('candidatesTokenCount', 0),
)
raw = (
body
.get('candidates', [{}])[0]
.get('content', {})
.get('parts', [{}])[0]
.get('text', '')
)
product_name = _clean_vision_product_name(raw)
if not product_name:
ctx.set_error('empty_or_unusable_vision_response')
return product_name
except Exception as exc:
ctx.set_error(f"{type(exc).__name__}: {exc}")
sys_log.warning(f"[VisionSearch] Gemini vision fallback failed: {exc}")
return ''
# ── Webhook ───────────────────────────────────────────────────
@openclaw_bot_bp.route('/bot/telegram/webhook', methods=['POST'])
def telegram_webhook():
@@ -8943,7 +9065,7 @@ def telegram_webhook():
# 已通過授權的 private chat
question = text_raw
# ── 圖片訊息:Gemini Vision 商品辨識 ─────────────────────
# ── 圖片訊息:Ollama-first Vision 商品辨識 ─────────────────
if not question and msg.get('photo'):
send_typing(chat_id)
try:
@@ -8962,43 +9084,22 @@ def telegram_webhook():
img_data = requests.get(img_url, timeout=15).content
import base64 as _b64
img_b64 = _b64.b64encode(img_data).decode()
# Gemini Vision 辨識商品名稱
vision_payload = {
'contents': [{
'parts': [
{'text': (
'這是一張商品圖片。請辨識商品名稱(品牌、型號、規格),'
'輸出格式:只回商品名稱,不超過 30 字,繁體中文。'
'如果是多個商品,只取最顯眼的一個。'
)},
{'inline_data': {'mime_type': 'image/jpeg', 'data': img_b64}}
]
}]
}
vis_r = requests.post(
f"{GEMINI_BASE_URL}/gemini-1.5-flash:generateContent?key={GEMINI_API_KEY}",
json=vision_payload, timeout=20
)
if vis_r.ok:
product_name = (
vis_r.json()
.get('candidates', [{}])[0]
.get('content', {})
.get('parts', [{}])[0]
.get('text', '').strip()
)
if product_name:
send_message(chat_id,
f"🔍 辨識到商品:*{product_name}*\n正在搜尋 momo 比價...",
msg_id, parse_mode='Markdown')
# 直接執行比價
handle_cmd('competitor', product_name, chat_id, msg_id)
else:
send_message(chat_id, "⚠️ 無法辨識圖片中的商品,請嘗試更清晰的圖片", msg_id)
req_id = f"img-{chat_id or 0}-{msg_id or 0}"
product_name = _identify_product_name_with_ollama_vision(img_b64, req_id)
if not product_name:
product_name = _identify_product_name_with_gemini_vision(img_b64, req_id)
if product_name:
send_message(chat_id,
f"🔍 辨識到商品:*{product_name}*\n正在搜尋 momo 比價...",
msg_id, parse_mode='Markdown')
# 直接執行比價
handle_cmd('competitor', product_name, chat_id, msg_id)
else:
send_message(
chat_id,
"⚠️ 圖片辨識失敗,請直接輸入商品名稱搜尋",
"⚠️ 無法辨識圖片中的商品,請直接輸入商品名稱搜尋",
msg_id,
[_row(('🔍 文字搜尋', 'await:search_compare'))],
)

View File

@@ -27,6 +27,18 @@ sys_log = SystemLogger("SystemPublicRoutes").get_logger()
TAIPEI_TZ = timezone(timedelta(hours=8))
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
public_url = os.getenv('PUBLIC_URL', '服務啟動中...')
STATIC_DIR = os.path.join(BASE_DIR, 'web/static')
@system_public_bp.route('/favicon.ico')
def favicon():
"""使用既有品牌圖示回應瀏覽器預設 favicon 探測,避免全站 404 噪音。"""
return send_from_directory(
os.path.join(STATIC_DIR, 'images'),
'logo_circle.svg',
mimetype='image/svg+xml',
max_age=604800,
)
@system_public_bp.route('/health')

View File

@@ -80,6 +80,8 @@ CALLER_REGISTRY: frozenset = frozenset({
'tg_bot_copy_v2', # second copy entrance
'openclaw_bot_main', # OpenClaw Bot 主鏈 Ollama
'openclaw_bot_gemini', # Bot Gemini fallback
'openclaw_bot_image', # Bot 圖片商品辨識 Ollama-first
'openclaw_bot_image_gemini', # Bot 圖片商品辨識 Gemini fallback
'openclaw_bot_nim', # Bot NIM fallback
# 其他

View File

@@ -41,6 +41,7 @@ def test_registry_contains_core_callers():
'sales_copy', 'trend_match', 'trend_qa', 'product_insights',
# Bot
'openclaw_bot_main', 'openclaw_bot_gemini', 'openclaw_bot_nim',
'openclaw_bot_image', 'openclaw_bot_image_gemini',
}
missing = must_have - CALLER_REGISTRY

View File

@@ -149,6 +149,124 @@ def test_is_authorized_private_mode_switch(monkeypatch):
assert bot._is_authorized("private", 777, 42) is False
def test_photo_message_uses_ollama_vision_before_gemini(monkeypatch):
from routes import openclaw_bot_routes as bot
sent = []
handled = []
class FakeResponse:
def __init__(self, json_data=None, content=b"fake-image"):
self._json_data = json_data or {}
self.content = content
def json(self):
return self._json_data
def fake_get(url, **_kwargs):
if "getFile" in url:
return FakeResponse({"result": {"file_path": "photos/product.jpg"}})
return FakeResponse(content=b"fake-image")
monkeypatch.setattr(bot.requests, "get", fake_get)
monkeypatch.setattr(bot, "_is_authorized", lambda _chat_type, _chat_id, _uid: True)
monkeypatch.setattr(bot, "send_typing", lambda _chat_id: None)
monkeypatch.setattr(
bot,
"send_message",
lambda *args, **kwargs: sent.append((args, kwargs)),
)
monkeypatch.setattr(
bot,
"handle_cmd",
lambda cmd, arg, chat_id, reply_to: handled.append((cmd, arg, chat_id, reply_to)),
)
monkeypatch.setattr(
bot,
"_identify_product_name_with_ollama_vision",
lambda img_b64, request_id: "理膚寶水 B5 修復霜",
)
monkeypatch.setattr(
bot,
"_identify_product_name_with_gemini_vision",
lambda img_b64, request_id: (_ for _ in ()).throw(AssertionError("Gemini should not run first")),
)
app = _build_request_app()
payload = {
"update_id": 10030,
"message": {
"message_id": 80,
"chat": {"id": 777, "type": "private"},
"from": {"id": 777777},
"photo": [{"file_id": "small"}, {"file_id": "large"}],
},
}
with app.test_request_context("/bot/telegram/webhook", method="POST", json=payload):
bot.telegram_webhook()
assert handled == [("competitor", "理膚寶水 B5 修復霜", 777, 80)]
assert "理膚寶水 B5 修復霜" in sent[0][0][1]
def test_photo_message_falls_back_to_gemini_when_ollama_empty(monkeypatch):
from routes import openclaw_bot_routes as bot
handled = []
calls = []
class FakeResponse:
def __init__(self, json_data=None, content=b"fake-image"):
self._json_data = json_data or {}
self.content = content
def json(self):
return self._json_data
def fake_get(url, **_kwargs):
if "getFile" in url:
return FakeResponse({"result": {"file_path": "photos/product.jpg"}})
return FakeResponse(content=b"fake-image")
def fake_ollama(_img_b64, _request_id):
calls.append("ollama")
return ""
def fake_gemini(_img_b64, _request_id):
calls.append("gemini")
return "飛利浦 Sonicare"
monkeypatch.setattr(bot.requests, "get", fake_get)
monkeypatch.setattr(bot, "_is_authorized", lambda _chat_type, _chat_id, _uid: True)
monkeypatch.setattr(bot, "send_typing", lambda _chat_id: None)
monkeypatch.setattr(bot, "send_message", lambda *args, **kwargs: None)
monkeypatch.setattr(
bot,
"handle_cmd",
lambda cmd, arg, chat_id, reply_to: handled.append((cmd, arg, chat_id, reply_to)),
)
monkeypatch.setattr(bot, "_identify_product_name_with_ollama_vision", fake_ollama)
monkeypatch.setattr(bot, "_identify_product_name_with_gemini_vision", fake_gemini)
app = _build_request_app()
payload = {
"update_id": 10031,
"message": {
"message_id": 81,
"chat": {"id": 777, "type": "private"},
"from": {"id": 777777},
"photo": [{"file_id": "small"}, {"file_id": "large"}],
},
}
with app.test_request_context("/bot/telegram/webhook", method="POST", json=payload):
bot.telegram_webhook()
assert calls == ["ollama", "gemini"]
assert handled == [("competitor", "飛利浦 Sonicare", 777, 81)]
def test_obs_heal_audit_uses_current_callback_user(monkeypatch):
from types import SimpleNamespace
from routes import openclaw_bot_routes as bot

View File

@@ -142,6 +142,9 @@ def test_env_example_documents_runtime_and_ai_automation_variables():
"OPENCLAW_DAILY_HERMES_TEMPLATE",
"OPENCLAW_OLLAMA_MODEL",
"OPENCLAW_PPT_CACHE_TTL_HOURS",
"OPENCLAW_IMAGE_GEMINI_MODEL",
"OPENCLAW_IMAGE_OLLAMA_TIMEOUT",
"OPENCLAW_IMAGE_VISION_MODEL",
"OPENCLAW_QA_OLLAMA_FIRST",
"OPENCLAW_QA_OLLAMA_MODEL",
"OPENCLAW_QA_OLLAMA_TIMEOUT",