diff --git a/routes/openclaw_bot_routes.py b/routes/openclaw_bot_routes.py
index 5b84612..12e84a5 100644
--- a/routes/openclaw_bot_routes.py
+++ b/routes/openclaw_bot_routes.py
@@ -24,6 +24,7 @@ import os
import json
import re
import threading
+import hashlib # Operation Ollama-First v5.0 P1: H6 PII fix — chat_id 進 meta 改 hash[:8]
from contextvars import ContextVar
from contextlib import contextmanager
import requests
@@ -48,6 +49,7 @@ from services.openclaw_bot.telegram_api import (
send_photo,
send_typing,
)
+from services.ai_call_logger import log_ai_call # Operation Ollama-First v5.0 P1
from services.openclaw_bot.menu_keyboards import (
_BACK,
_SUBMENUS,
@@ -85,7 +87,17 @@ try:
except ImportError:
_PCHOME_AVAILABLE = False
+# V-New: 引入 Ollama 探測機制
+try:
+ from services.ollama_service import resolve_ollama_host
+ _OLLAMA_AVAILABLE = True
+except ImportError:
+ _OLLAMA_AVAILABLE = False
+
# AI 引擎:Gemini Flash(主,2~5秒)→ NIM(備援,45~90秒)
+# LOCKED-GEMINI: PPT 簡報文案需長 context (5K+ rows + 多輪歷史) + 繁中商業敘事
+# Ollama qwen2.5-coder:7b 為 PPT 失敗時 L3 fallback(已在 _call_ollama 路徑)
+# ADR-028 鎖定場景 #7
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY', '')
GEMINI_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta/models'
GEMINI_MODEL = 'gemini-2.0-flash'
@@ -2469,6 +2481,29 @@ def _ppt_ai_analysis(prompt_data: str, report_type: str = '') -> str:
.get('content', {}).get('parts', [{}])[0]
.get('text', '').strip())
+ def _call_ollama(prompt: str, tokens: int) -> str:
+ if not _OLLAMA_AVAILABLE:
+ return ""
+ try:
+ host = resolve_ollama_host()
+ # 簡報分析使用 qwen2.5-coder:7b (已升級 GCP) 或 hermes3
+ model = os.getenv('OPENCLAW_OLLAMA_MODEL', 'qwen2.5-coder:7b')
+ r = requests.post(
+ f"{host}/api/generate",
+ json={
+ 'model': model,
+ 'prompt': prompt,
+ 'stream': False,
+ 'options': {'num_predict': tokens, 'temperature': 0.3}
+ },
+ timeout=90
+ )
+ r.raise_for_status()
+ return r.json().get('response', '').strip()
+ except Exception as e:
+ sys_log.warning(f"[PPT] Ollama error: {e}")
+ return ""
+
if not NVIDIA_API_KEY:
if GEMINI_API_KEY:
try:
@@ -2532,6 +2567,29 @@ def _ppt_ai_analysis(prompt_data: str, report_type: str = '') -> str:
return result_text
except Exception as e2:
sys_log.error(f"[PPT] Gemini fallback error: {e2}")
+
+ # ── Ollama (GCP/111) Final Fallback ───────────────────────
+ if _OLLAMA_AVAILABLE:
+ try:
+ sys_log.info("[PPT] Trying local/GCP Ollama as final fallback")
+ raw = _call_ollama(f"{sys_instruction}\n\n--- 資料 ---\n{prompt_data}", max_tokens)
+ result_text = _clean_ai_text(raw)
+ if result_text and len(result_text) > 100:
+ if _LEARNING_ENABLED:
+ import threading as _thr
+ _thr.Thread(
+ target=store_insight,
+ kwargs={
+ 'insight_type': report_type or 'analysis',
+ 'content': result_text,
+ 'period': datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d'),
+ },
+ daemon=True
+ ).start()
+ return result_text
+ except Exception as e3:
+ sys_log.error(f"[PPT] Ollama final fallback error: {e3}")
+
return '(AI 分析暫時無法使用,請稍後重試)'
@@ -6768,18 +6826,29 @@ def openclaw_answer(question: str, chat_id: int = None):
+ "請用繁體中文直接回答,不要開場白,300字以內。"
)
- resp = ollama_service.generate(question, system_prompt=sys_prompt, timeout=180)
- if resp.success and resp.content:
- if chat_id:
- openclaw_session.append_turn(chat_id, question, resp.content)
- if _LEARNING_ENABLED:
- import threading as _thr
- _thr.Thread(target=store_conversation,
- args=(0, 0, question, resp.content, "ollama", []),
- daemon=True).start()
- return resp.content, None
- else:
- sys_log.warning(f"[Ollama] 生成失敗: {resp.error},fallback 到 Gemini")
+ # Phase 1 v5.0: 包 ai_call_logger 追蹤 Bot Q&A 主鏈 Ollama
+ _qa_req_id = f"qa-{chat_id or 0}-{int(_time_mod.time())}"
+ with log_ai_call(
+ caller='openclaw_bot_main',
+ provider='gcp_ollama',
+ model=getattr(ollama_service, 'model', 'llama3.1:8b'),
+ request_id=_qa_req_id,
+ meta={'chat_id_hash': hashlib.sha1(str(chat_id or 0).encode()).hexdigest()[:8], 'has_db_ctx': bool(db_ctx)},
+ ) as _ctx:
+ resp = ollama_service.generate(question, system_prompt=sys_prompt, timeout=180)
+ if resp.success and resp.content:
+ if chat_id:
+ openclaw_session.append_turn(chat_id, question, resp.content)
+ if _LEARNING_ENABLED:
+ import threading as _thr
+ _thr.Thread(target=store_conversation,
+ args=(0, 0, question, resp.content, "ollama", []),
+ daemon=True).start()
+ return resp.content, None
+ else:
+ sys_log.warning(f"[Ollama] 生成失敗: {resp.error},fallback 到 Gemini")
+ _ctx.set_error(f"ollama generate failed: {resp.error}")
+ _ctx.fallback_to_caller('openclaw_bot_gemini')
except Exception as e:
sys_log.warning(f"[Ollama] 例外發生: {e},fallback 到 Gemini")
@@ -6817,15 +6886,30 @@ def openclaw_answer(question: str, chat_id: int = None):
"tool_config": {"function_calling_config": {"mode": "AUTO"}},
"generationConfig": {"temperature": 0.3, "maxOutputTokens": 600},
}
- r1 = requests.post(
- f"{GEMINI_BASE_URL}/{GEMINI_MODEL}:generateContent?key={GEMINI_API_KEY}",
- headers={"Content-Type": "application/json"},
- json=payload, timeout=30,
- )
- r1.raise_for_status()
- resp1 = r1.json()
- candidate = resp1.get("candidates", [{}])[0]
- parts = candidate.get("content", {}).get("parts", [])
+ # Phase 1 v5.0: 包 ai_call_logger 追蹤 Gemini FC 第一輪
+ _qa_gemini_req_id = f"qa-{chat_id or 0}-{int(_time_mod.time())}"
+ with log_ai_call(
+ caller='openclaw_bot_gemini',
+ provider='gemini',
+ model=GEMINI_MODEL,
+ request_id=_qa_gemini_req_id,
+ meta={'chat_id_hash': hashlib.sha1(str(chat_id or 0).encode()).hexdigest()[:8], 'turn': 1},
+ ) as _ctx_g1:
+ r1 = requests.post(
+ f"{GEMINI_BASE_URL}/{GEMINI_MODEL}:generateContent?key={GEMINI_API_KEY}",
+ headers={"Content-Type": "application/json"},
+ json=payload, timeout=30,
+ )
+ r1.raise_for_status()
+ resp1 = r1.json()
+ # Gemini REST: usageMetadata.{promptTokenCount, candidatesTokenCount}
+ _um = resp1.get("usageMetadata", {}) or {}
+ _ctx_g1.set_tokens(
+ input=_um.get("promptTokenCount", 0),
+ output=_um.get("candidatesTokenCount", 0),
+ )
+ candidate = resp1.get("candidates", [{}])[0]
+ parts = candidate.get("content", {}).get("parts", [])
# 如果沒有 function call,直接回傳文字
tool_calls = [p["functionCall"] for p in parts if "functionCall" in p]
@@ -6870,15 +6954,28 @@ def openclaw_answer(question: str, chat_id: int = None):
"maxOutputTokens": 600,
},
}
- r2 = requests.post(
- f"{GEMINI_BASE_URL}/{GEMINI_MODEL}:generateContent?key={GEMINI_API_KEY}",
- headers={"Content-Type": "application/json"},
- json=payload2, timeout=35,
- )
- r2.raise_for_status()
- resp2 = r2.json()
- parts2 = resp2.get("candidates", [{}])[0].get("content", {}).get("parts", [])
- final = "".join(p.get("text", "") for p in parts2 if "text" in p).strip()
+ # Phase 1 v5.0: 包 ai_call_logger 追蹤 Gemini FC 第二輪
+ with log_ai_call(
+ caller='openclaw_bot_gemini',
+ provider='gemini',
+ model=GEMINI_MODEL,
+ request_id=_qa_gemini_req_id,
+ meta={'chat_id_hash': hashlib.sha1(str(chat_id or 0).encode()).hexdigest()[:8], 'turn': 2, 'tools_used': used_sources},
+ ) as _ctx_g2:
+ r2 = requests.post(
+ f"{GEMINI_BASE_URL}/{GEMINI_MODEL}:generateContent?key={GEMINI_API_KEY}",
+ headers={"Content-Type": "application/json"},
+ json=payload2, timeout=35,
+ )
+ r2.raise_for_status()
+ resp2 = r2.json()
+ _um2 = resp2.get("usageMetadata", {}) or {}
+ _ctx_g2.set_tokens(
+ input=_um2.get("promptTokenCount", 0),
+ output=_um2.get("candidatesTokenCount", 0),
+ )
+ parts2 = resp2.get("candidates", [{}])[0].get("content", {}).get("parts", [])
+ final = "".join(p.get("text", "") for p in parts2 if "text" in p).strip()
if final:
sys_log.info(f"[FC] done tools={used_sources} reply={len(final)}chars")
@@ -6931,19 +7028,34 @@ def openclaw_answer(question: str, chat_id: int = None):
+ f"\n用戶問:{question}\n"
"請用繁體中文直接回答,不要開場白,300字以內。"
)
- r = requests.post(
- f"{NVIDIA_BASE_URL}/chat/completions",
- headers={"Authorization": f"Bearer {NVIDIA_API_KEY}",
- "Content-Type": "application/json"},
- json={
- "model": CHAT_MODEL,
- "messages": [{"role": "user", "content": nim_prompt}],
- "max_tokens": 500, "temperature": 0.3,
- },
- timeout=20,
- )
- r.raise_for_status()
- return r.json()["choices"][0]["message"]["content"].strip(), None
+ # Phase 1 v5.0: 包 ai_call_logger 追蹤 Bot Q&A NIM 三層 fallback
+ _qa_nim_req_id = f"qa-{chat_id or 0}-{int(_time_mod.time())}"
+ with log_ai_call(
+ caller='openclaw_bot_nim',
+ provider='nim',
+ model=CHAT_MODEL,
+ request_id=_qa_nim_req_id,
+ meta={'chat_id_hash': hashlib.sha1(str(chat_id or 0).encode()).hexdigest()[:8], 'has_db_ctx': bool(db_ctx)},
+ ) as _ctx_nim:
+ r = requests.post(
+ f"{NVIDIA_BASE_URL}/chat/completions",
+ headers={"Authorization": f"Bearer {NVIDIA_API_KEY}",
+ "Content-Type": "application/json"},
+ json={
+ "model": CHAT_MODEL,
+ "messages": [{"role": "user", "content": nim_prompt}],
+ "max_tokens": 500, "temperature": 0.3,
+ },
+ timeout=20,
+ )
+ r.raise_for_status()
+ _body = r.json()
+ _u = _body.get("usage", {}) or {}
+ _ctx_nim.set_tokens(
+ input=_u.get("prompt_tokens", 0),
+ output=_u.get("completion_tokens", 0),
+ )
+ return _body["choices"][0]["message"]["content"].strip(), None
except Exception as e:
sys_log.error(f"[FC] NIM fallback error: {e}")
diff --git a/scripts/compare_daily_report_versions.py b/scripts/compare_daily_report_versions.py
new file mode 100644
index 0000000..61025d6
--- /dev/null
+++ b/scripts/compare_daily_report_versions.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""
+scripts/compare_daily_report_versions.py
+─────────────────────────────────────────────────────────────────
+Operation Ollama-First v5.0 / Phase 3 / A8 — 日報雙版本盲測腳本
+
+用途:
+ 跑同一天的「舊版 Gemini 全文」vs「新版 Hermes 模板」
+ 輸出兩份檔案到 reports/,供統帥盲測選擇預設模式。
+
+使用:
+ python3 scripts/compare_daily_report_versions.py --date 2026-05-03
+ python3 scripts/compare_daily_report_versions.py # 預設昨日
+
+紀律:
+ - 不寫入 ai_insights(避免污染 production 資料)
+ - 不發 Telegram(純 dry-run)
+ - 兩版本皆獨立執行,互不干擾
+ - 失敗時報錯但不刪舊檔
+"""
+
+import os
+import sys
+import argparse
+from datetime import date, timedelta, datetime
+from pathlib import Path
+
+# 確保可 import 本專案 services
+ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(ROOT))
+
+
+def _ensure_reports_dir() -> Path:
+ reports_dir = ROOT / 'reports'
+ reports_dir.mkdir(exist_ok=True)
+ return reports_dir
+
+
+def _run_legacy(target_date: date) -> str:
+ """跑舊版(_legacy_full_gemini_daily_report)"""
+ os.environ['OPENCLAW_DAILY_HERMES_TEMPLATE'] = 'false'
+ # 強制 reload module(避免 cache)
+ import importlib
+ import services.openclaw_strategist_service as svc
+ importlib.reload(svc)
+ print(f"[legacy] 開始跑 Gemini 全文版日報 target_date={target_date}")
+ result = svc._legacy_full_gemini_daily_report()
+ return result.get('content', '') or result.get('report_content', '') or str(result)
+
+
+def _run_hermes_template(target_date: date) -> str:
+ """跑新版(_generate_daily_report_hermes_template)"""
+ os.environ['OPENCLAW_DAILY_HERMES_TEMPLATE'] = 'true'
+ import importlib
+ import services.openclaw_strategist_service as svc
+ importlib.reload(svc)
+ print(f"[hermes] 開始跑 Hermes 模板版日報 target_date={target_date}")
+ result = svc._generate_daily_report_hermes_template()
+ return result.get('content', '') or result.get('report_content', '') or str(result)
+
+
+def main():
+ parser = argparse.ArgumentParser(description='OpenClaw 日報雙版本盲測')
+ parser.add_argument('--date', help='YYYY-MM-DD(預設昨日)')
+ args = parser.parse_args()
+
+ if args.date:
+ target_date = datetime.strptime(args.date, '%Y-%m-%d').date()
+ else:
+ target_date = date.today() - timedelta(days=1)
+
+ reports_dir = _ensure_reports_dir()
+ date_tag = target_date.strftime('%Y%m%d')
+
+ legacy_file = reports_dir / f'daily_report_legacy_{date_tag}.md'
+ hermes_file = reports_dir / f'daily_report_v2_{date_tag}.md'
+
+ # 跑舊版
+ try:
+ legacy_content = _run_legacy(target_date)
+ legacy_file.write_text(legacy_content, encoding='utf-8')
+ print(f"✅ legacy 版輸出:{legacy_file}")
+ except Exception as e:
+ print(f"❌ legacy 版失敗:{e}", file=sys.stderr)
+
+ # 跑新版
+ try:
+ hermes_content = _run_hermes_template(target_date)
+ hermes_file.write_text(hermes_content, encoding='utf-8')
+ print(f"✅ hermes 模板版輸出:{hermes_file}")
+ except Exception as e:
+ print(f"❌ hermes 版失敗:{e}", file=sys.stderr)
+
+ print(f"\n盲測檢查(建議):")
+ print(f" diff <(head -50 {legacy_file}) <(head -50 {hermes_file})")
+ print(f" wc -w {legacy_file} {hermes_file}")
+ print(f" # 統帥盲測時可遮 caller 名稱避免偏見")
+
+
+if __name__ == '__main__':
+ main()
diff --git a/services/code_review_pipeline_service.py b/services/code_review_pipeline_service.py
index bd3c06b..a6e96be 100644
--- a/services/code_review_pipeline_service.py
+++ b/services/code_review_pipeline_service.py
@@ -29,7 +29,12 @@ from typing import Any, Dict, List, Optional
from database.manager import get_session
from sqlalchemy import text
-from services.hermes_analyst_service import HERMES_URL as _HERMES_URL, HERMES_MODEL as _HERMES_MODEL
+# ADR-027 Phase 2 N3:HERMES_MODEL 仍 import(純常數),HERMES_URL 改 lazy
+# 每次 _hermes_scan 才透過 get_hermes_url() 取最新解析(GCP 優先 / 111 備援),
+# 避免 import-time freeze 導致主機切換不生效。
+from services.hermes_analyst_service import HERMES_MODEL as _HERMES_MODEL
+from config import get_hermes_url
+from services.ai_call_logger import log_ai_call # Operation Ollama-First v5.0 P1
logger = logging.getLogger(__name__)
@@ -38,6 +43,9 @@ _current_pipeline: Dict[str, Any] = {}
_pipeline_lock = threading.Lock()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
+# LOCKED-GEMINI: Code Review 全 repo diff 可達 100K+ tokens,超過 Ollama 32K context
+# 未來可升 Claude Opus 4.7 (200K context, Arena code Elo 1548) — Phase 7 任務
+# ADR-028 鎖定場景 #5
REVIEW_MODEL = os.getenv("OPENCLAW_MODEL", "gemini-2.5-flash")
INTERNAL_TOKEN = os.getenv("INTERNAL_WEBHOOK_TOKEN", "")
AUTO_FIX_ENABLED = os.getenv("CODE_REVIEW_AUTO_FIX_ENABLED", "true").lower() == "true"
@@ -215,14 +223,36 @@ class CodeReviewPipeline:
只輸出 JSON 陣列,不含其他文字。無問題時輸出 []"""
- resp = _req.post(
- f"{_HERMES_URL}/api/generate",
- json={"model": _HERMES_MODEL, "prompt": prompt,
- "stream": False, "options": {"temperature": 0.1}},
- timeout=120,
+ # ADR-027 Phase 2 N3:lazy resolve Hermes 主機(GCP 優先 / 111 備援),
+ # 避開 import-time freeze。provider 標籤跟著解析結果動態決定。
+ hermes_url = get_hermes_url()
+ provider_tag = (
+ 'gcp_ollama' if ('34.21.145.224' in hermes_url or '34.143.170.20' in hermes_url)
+ else 'ollama_111' if '192.168.0.111' in hermes_url
+ else 'ollama_other'
)
- resp.raise_for_status()
- raw = resp.json().get("response", "").strip()
+ # Phase 1 v5.0: 包 ai_call_logger 追蹤 Code Review Hermes scan
+ with log_ai_call(
+ caller='code_review_hermes',
+ provider=provider_tag,
+ model=_HERMES_MODEL,
+ request_id=f"cr-{self.commit_sha[:8]}",
+ meta={'commit': self.commit_sha[:8], 'branch': self.branch,
+ 'files': len(files), 'host': hermes_url},
+ ) as _ctx:
+ resp = _req.post(
+ f"{hermes_url}/api/generate",
+ json={"model": _HERMES_MODEL, "prompt": prompt,
+ "stream": False, "options": {"temperature": 0.1}},
+ timeout=120,
+ )
+ resp.raise_for_status()
+ body = resp.json()
+ _ctx.set_tokens(
+ input=body.get("prompt_eval_count", 0),
+ output=body.get("eval_count", 0),
+ )
+ raw = body.get("response", "").strip()
match = re.search(r"\[.*\]", raw, re.DOTALL)
if not match:
@@ -271,36 +301,70 @@ class CodeReviewPipeline:
💡 架構優化方向(1條長期建議)
✅ 本次部署亮點"""
- # 優先 Gemini
+ # 優先 Gemini — Phase 1 v5.0 logger 追蹤
if GEMINI_API_KEY:
- try:
- import google.generativeai as genai
- genai.configure(api_key=GEMINI_API_KEY)
- model = genai.GenerativeModel(
- model_name=REVIEW_MODEL,
- generation_config=genai.types.GenerationConfig(
- temperature=0.3, max_output_tokens=1500,
- ),
- system_instruction=system,
- )
- resp = model.generate_content(user_prompt, request_options={"timeout": 90})
- return resp.text or ""
- except Exception as e:
- logger.warning("[CodeReview] OpenClaw Gemini 失敗,降級 ElephantAlpha: %s", e)
+ with log_ai_call(
+ caller='code_review_openclaw',
+ provider='gemini',
+ model=REVIEW_MODEL,
+ request_id=f"cr-{self.commit_sha[:8]}",
+ meta={'commit': self.commit_sha[:8], 'branch': self.branch},
+ ) as _ctx:
+ try:
+ import google.generativeai as genai
+ genai.configure(api_key=GEMINI_API_KEY)
+ model = genai.GenerativeModel(
+ model_name=REVIEW_MODEL,
+ generation_config=genai.types.GenerationConfig(
+ temperature=0.3, max_output_tokens=1500,
+ ),
+ system_instruction=system,
+ )
+ resp = model.generate_content(user_prompt, request_options={"timeout": 90})
+ try:
+ usage = getattr(resp, 'usage_metadata', None)
+ if usage is not None:
+ _ctx.set_tokens(
+ input=getattr(usage, 'prompt_token_count', 0) or 0,
+ output=getattr(usage, 'candidates_token_count', 0) or 0,
+ )
+ except Exception:
+ pass
+ return resp.text or ""
+ except Exception as e:
+ logger.warning("[CodeReview] OpenClaw Gemini 失敗,降級 ElephantAlpha: %s", e)
+ _ctx.set_error(f"{type(e).__name__}: {e}")
+ _ctx.fallback_to_caller('code_review_elephant')
# 降級:ElephantAlpha via OpenRouter(OPENROUTER_API_KEY 容器內一定有)
- try:
- from services.elephant_service import elephant_service
- resp = elephant_service.generate(
- prompt=user_prompt,
- system_prompt=system,
- temperature=0.3,
- timeout=90,
- )
- if resp.success:
- return resp.content or ""
- except Exception as e:
- logger.warning("[CodeReview] OpenClaw ElephantAlpha 降級也失敗: %s", e)
+ # Phase 1 v5.0 logger 追蹤
+ with log_ai_call(
+ caller='code_review_elephant',
+ provider='nim_via_elephant',
+ model='nvidia/llama-3.3-nemotron-super-49b-v1.5',
+ request_id=f"cr-{self.commit_sha[:8]}",
+ meta={'commit': self.commit_sha[:8], 'branch': self.branch},
+ ) as _ctx:
+ try:
+ from services.elephant_service import elephant_service
+ resp = elephant_service.generate(
+ prompt=user_prompt,
+ system_prompt=system,
+ temperature=0.3,
+ timeout=90,
+ )
+ if resp.success:
+ # ElephantResponse 已含 input_tokens/output_tokens
+ _ctx.set_tokens(
+ input=getattr(resp, 'input_tokens', 0) or 0,
+ output=getattr(resp, 'output_tokens', 0) or 0,
+ )
+ return resp.content or ""
+ else:
+ _ctx.set_error(getattr(resp, 'error', 'elephant generate failed'))
+ except Exception as e:
+ logger.warning("[CodeReview] OpenClaw ElephantAlpha 降級也失敗: %s", e)
+ _ctx.set_error(f"{type(e).__name__}: {e}")
return ""
diff --git a/services/hermes_analyst_service.py b/services/hermes_analyst_service.py
index 7edbabe..76f5d7c 100644
--- a/services/hermes_analyst_service.py
+++ b/services/hermes_analyst_service.py
@@ -23,6 +23,8 @@ from typing import Optional
import requests
from sqlalchemy import text
from services.mcp_context_service import build_mcp_context
+from services.ollama_service import resolve_ollama_host, get_host_label
+from services.ai_call_logger import log_ai_call # Operation Ollama-First v5.0 P1
logger = logging.getLogger(__name__)
@@ -155,32 +157,48 @@ class HermesAnalystService:
"keep_alive": HERMES_KEEP_ALIVE, # ADR-012:避免冷啟動 timeout
"options": {"temperature": 0.1},
}
- try:
- resp = requests.post(
- f"{HERMES_URL}/api/generate",
- json=payload,
- timeout=HERMES_TIMEOUT, # 統一 config 集中讀取(ADR-008);keep_alive 確保熱駐留時實測 < 10s
- )
- resp.raise_for_status()
- raw = (resp.json().get("response", "") or "").strip()
- if raw.startswith("```"):
- raw = re.sub(r"^```(?:json)?\s*", "", raw, flags=re.MULTILINE)
- raw = re.sub(r"\s*```\s*$", "", raw.strip(), flags=re.MULTILINE).strip()
- data = json.loads(raw)
- return {
- "intent": data.get("intent", "unknown"),
- "confidence": float(data.get("confidence", 0.5)),
- "complexity_score": float(data.get("complexity_score", 0.5)),
- "requires_data_fetch": bool(data.get("requires_data_fetch", False)),
- "preliminary_answer": data.get("preliminary_answer", "") or "",
- "metadata": {"source": "hermes_llm"},
- }
- except Exception as e:
- logger.warning(
- f"[Hermes.intent] Ollama 連線失敗,降級規則引擎"
- f"(host={HERMES_URL} model={HERMES_MODEL} error={type(e).__name__}: {e})"
- )
- return None
+ target_host = resolve_ollama_host()
+ # Phase 1 v5.0: 包 ai_call_logger 追蹤 Hermes 意圖分類 token / fallback
+ with log_ai_call(
+ caller='hermes_intent',
+ provider='gcp_ollama',
+ model=HERMES_MODEL,
+ meta={'host_label': get_host_label(target_host)},
+ ) as _ctx:
+ try:
+ resp = requests.post(
+ f"{target_host}/api/generate",
+ json=payload,
+ timeout=HERMES_TIMEOUT, # 統一 config 集中讀取(ADR-008);keep_alive 確保熱駐留時實測 < 10s
+ )
+ resp.raise_for_status()
+ body = resp.json()
+ _ctx.set_tokens(
+ input=body.get("prompt_eval_count", 0),
+ output=body.get("eval_count", 0),
+ )
+ raw = (body.get("response", "") or "").strip()
+ if raw.startswith("```"):
+ raw = re.sub(r"^```(?:json)?\s*", "", raw, flags=re.MULTILINE)
+ raw = re.sub(r"\s*```\s*$", "", raw.strip(), flags=re.MULTILINE).strip()
+ data = json.loads(raw)
+ return {
+ "intent": data.get("intent", "unknown"),
+ "confidence": float(data.get("confidence", 0.5)),
+ "complexity_score": float(data.get("complexity_score", 0.5)),
+ "requires_data_fetch": bool(data.get("requires_data_fetch", False)),
+ "preliminary_answer": data.get("preliminary_answer", "") or "",
+ "metadata": {"source": "hermes_llm"},
+ }
+ except Exception as e:
+ # NOTE: 修補 commit 00591c5 殘留的孤立 f-string(原 logger.warning 被誤刪)
+ logger.warning(
+ f"[Hermes.intent] Ollama 連線失敗,降級規則引擎"
+ f"(model={HERMES_MODEL} error={type(e).__name__}: {e})"
+ )
+ _ctx.set_error(f"{type(e).__name__}: {e}")
+ _ctx.fallback_to_caller('hermes_rule_engine')
+ return None
def _rule_based_intent(self, message: str) -> dict:
"""Ollama 掛掉時的規則引擎 fallback — 永遠返回結構化結果。"""
@@ -416,23 +434,46 @@ class HermesAnalystService:
"options": {"temperature": 0.1},
}
- resp = requests.post(
- f"{HERMES_URL}/api/generate",
- json=payload,
- timeout=HERMES_TIMEOUT,
- )
- resp.raise_for_status()
+ target_host = resolve_ollama_host()
+ # Phase 1 v5.0: 包 ai_call_logger 追蹤 Hermes 競價分析 token / fallback
+ with log_ai_call(
+ caller='hermes_analyst',
+ provider='gcp_ollama',
+ model=HERMES_MODEL,
+ meta={
+ 'host_label': get_host_label(target_host),
+ 'item_count': len(items),
+ 'top_n': TOP_N,
+ },
+ ) as _ctx:
+ try:
+ resp = requests.post(
+ f"{target_host}/api/generate",
+ json=payload,
+ timeout=HERMES_TIMEOUT,
+ )
+ resp.raise_for_status()
+ except Exception as e:
+ _ctx.set_error(f"{type(e).__name__}: {e}")
+ raise
- data = resp.json()
- raw = data.get("response", "").strip()
- duration_sec = round(data.get("total_duration", 0) / 1e9, 1)
- eval_tokens = data.get("eval_count", "?") # Ollama 推理 token 數
- logger.info(
- f"[Hermes] 推理耗時 {duration_sec}s,"
- f"輸入 {len(items)} 筆,tokens={eval_tokens},回應長度 {len(raw)}"
- )
- # 儲存統計供 footprint 使用(掛在 instance 上供 run() 讀取)
- self._last_stats = {"duration_sec": duration_sec, "tokens": eval_tokens}
+ data = resp.json()
+ raw = data.get("response", "").strip()
+ duration_sec = round(data.get("total_duration", 0) / 1e9, 1)
+ eval_tokens_raw = data.get("eval_count", 0) # Ollama 推理 token 數
+ prompt_tokens_raw = data.get("prompt_eval_count", 0)
+ _ctx.set_tokens(input=prompt_tokens_raw, output=eval_tokens_raw)
+ logger.info(
+ f"[Hermes] 推理耗時 {duration_sec}s,"
+ f"輸入 {len(items)} 筆,tokens={eval_tokens_raw},回應長度 {len(raw)}"
+ )
+ # 儲存統計供 footprint 使用(掛在 instance 上供 run() 讀取)
+ self._last_stats = {
+ "duration_sec": duration_sec,
+ "tokens": eval_tokens_raw,
+ "host": target_host,
+ "host_label": get_host_label(target_host)
+ }
# P0-1 修復:剝除 Hermes 可能輸出的 markdown code fence
if raw.startswith("```"):
diff --git a/services/nemoton_dispatcher_service.py b/services/nemoton_dispatcher_service.py
index e1d88e0..3c01a91 100644
--- a/services/nemoton_dispatcher_service.py
+++ b/services/nemoton_dispatcher_service.py
@@ -27,6 +27,7 @@ import requests
from services.mcp_context_service import build_mcp_context
from config import HERMES_URL # ADR-008 集中化:禁止硬編碼 IP
+from services.ai_call_logger import log_ai_call # Operation Ollama-First v5.0 P1
logger = logging.getLogger(__name__)
@@ -107,6 +108,17 @@ NIM_TIMEOUT = 60 # 秒
NIM_DAILY_LIMIT = 80 # 留 20 個給 AWOOOI,100/天免費配額
_nim_call_count = {"date": "", "count": 0}
+# ── Operation Ollama-First v5.0 / Phase 3 / A9 ──────────────────
+# GCP Ollama qwen3:14b 灰度切換開關
+# - 預設 false → 行為與戰前完全相同(仍走 NIM)
+# - true → qwen3 主路徑,NIM 降為備援,最後仍兜底 Hermes 規則引擎(ADR-004)
+# 模型選擇:A2 web-research 紅綠燈報告 docs/phase0_research_report_20260503.md
+# 原戰役計畫 deepseek-r1:14b 的 Ollama tool_calls chat template 缺對應 jinja
+# (GitHub Issue #10935 未解),改採 qwen3:14b(Ollama 官方 + qwenlm 雙確認 tools 支援)。
+NEMOTRON_OLLAMA_FIRST = os.getenv("NEMOTRON_OLLAMA_FIRST", "false").lower() == "true"
+NEMOTRON_OLLAMA_MODEL = os.getenv("NEMOTRON_OLLAMA_MODEL", "qwen3:14b")
+NEMOTRON_OLLAMA_TIMEOUT = int(os.getenv("NEMOTRON_OLLAMA_TIMEOUT", "180")) # 秒
+
def _check_nim_quota() -> bool:
today = datetime.now().strftime("%Y-%m-%d")
@@ -320,6 +332,68 @@ ICON_AI = "🧠"
ICON_FOOTPRINT = "⚙️"
+# ── tool_calls 解析(NIM 與 qwen3 共用)──────────────────────────
+def _parse_tool_calls_struct(tool_calls: list) -> list:
+ """從 OpenAI 格式的 tool_calls 結構陣列抽出 [{tool, args}] 清單。
+ NIM 與 qwen3 (Ollama /api/chat) 兩邊回應對齊 OpenAI schema:
+ [{"function": {"name": ..., "arguments": }, ...}]
+ arguments 在 NIM 是 JSON 字串、在 Ollama 通常已是 dict;本 helper 兼容兩者。
+ """
+ results = []
+ for tc in tool_calls or []:
+ fn = tc.get("function", {}) if isinstance(tc, dict) else {}
+ if not fn:
+ continue
+ raw_args = fn.get("arguments", {})
+ if isinstance(raw_args, str):
+ try:
+ args = json.loads(raw_args) if raw_args.strip() else {}
+ except json.JSONDecodeError:
+ args = {}
+ elif isinstance(raw_args, dict):
+ args = raw_args
+ else:
+ args = {}
+ name = fn.get("name")
+ if name:
+ results.append({"tool": name, "args": args})
+ return results
+
+
+def _parse_content_fallback(raw_content: str) -> list:
+ """當模型沒回 tool_calls 結構、把工具呼叫塞進 content 時嘗試解析。
+ 既有 NIM llama-3.1-8b 偶有此行為(line 537-554 原邏輯);
+ qwen3 開 thinking_mode=False 後較少見,但保留同等容錯。
+ """
+ if not raw_content or not isinstance(raw_content, str):
+ return []
+ try:
+ parsed = json.loads(raw_content.strip())
+ except Exception as parse_err:
+ logger.error(f"[ToolCalls] content fallback JSON 解析失敗:{parse_err}")
+ return []
+
+ if not isinstance(parsed, list):
+ return []
+
+ results = []
+ for item in parsed:
+ if not isinstance(item, dict):
+ continue
+ name = item.get("name") or (item.get("function", {}) or {}).get("name")
+ args = item.get("parameters") or item.get("arguments") or {}
+ if isinstance(args, str):
+ try:
+ args = json.loads(args)
+ except json.JSONDecodeError:
+ args = {}
+ if name:
+ results.append({"tool": name, "args": args})
+ if results:
+ logger.info(f"[ToolCalls] content fallback 解析成功,取得 {len(results)} 個 tool_calls")
+ return results
+
+
def _build_footprint_json(hermes_stats: Optional[dict], nim_stats: Optional[dict]) -> dict:
"""
建立結構化運算足跡 (用於 DB model_footprint JSONB 欄位)
@@ -331,7 +405,8 @@ def _build_footprint_json(hermes_stats: Optional[dict], nim_stats: Optional[dict
if hermes_stats:
result["analyst"] = {
"model": "qwen2.5:7b-instruct",
- "host": HERMES_URL, # ADR-008:集中讀取,禁止硬編碼 IP
+ "host": hermes_stats.get("host", HERMES_URL),
+ "host_label": hermes_stats.get("host_label", "未知"),
"duration_sec": hermes_stats.get("duration_sec", 0),
"tokens": hermes_stats.get("tokens", 0),
"cost_usd": 0,
@@ -363,12 +438,13 @@ def _build_footprint_block(hermes_stats: Optional[dict], nim_stats: Optional[dic
if hermes_stats:
dur = hermes_stats.get("duration_sec", 0)
tok = hermes_stats.get("tokens", "?")
+ label = hermes_stats.get("host_label", "本地 188")
lines.append(
- f"• 🔍 分析: Qwen2.5 7B (本地 188) | "
+ f"• 🔍 分析: Qwen2.5 7B ({label}) | "
f"耗時: {dur:.1f}s | Tokens: {tok} | $0 成本"
)
else:
- lines.append("• 🔍 分析: Qwen2.5 7B (本地 188) | $0 成本")
+ lines.append("• 🔍 分析: Qwen2.5 7B (未知主機) | $0 成本")
if nim_stats:
tok = nim_stats.get("total_tokens", "?")
@@ -464,81 +540,208 @@ class NemotronDispatcher:
]
# P1-4 修復:NIM API 指數退避 retry(最多 3 次)
+ # Phase 1 v5.0: 包 ai_call_logger 追蹤 NIM 配額/tokens/錯誤
import time as _time
last_err = None
- for _attempt in range(3):
- try:
- resp = requests.post(
- f"{NIM_BASE_URL}/chat/completions",
- headers={
- "Authorization": f"Bearer {NIM_API_KEY}",
- "Content-Type": "application/json",
- },
- json={
- "model": NIM_MODEL,
- "messages": messages,
- "tools": TOOLS,
- "tool_choice": "required",
- "max_tokens": 2048,
- },
- timeout=NIM_TIMEOUT,
- )
- resp.raise_for_status()
- break
- except (requests.Timeout, requests.HTTPError) as e:
- last_err = e
- # ADR-004: 429 不重試,立即拋出讓上層啟動 Hermes 規則引擎降級
- if isinstance(e, requests.HTTPError) and e.response is not None \
- and e.response.status_code == 429:
- logger.warning("[NIM] HTTP 429 速率限制,跳出 retry 迴圈")
- raise
- if _attempt < 2:
- _time.sleep(2 ** _attempt)
- logger.warning(f"[NIM] retry {_attempt + 1}/2 after {e}")
- else:
- raise last_err
+ with log_ai_call(
+ caller='nemotron_dispatch',
+ provider='nim',
+ model=NIM_MODEL,
+ meta={'threat_count': len(threats), 'quota_used': _nim_quota_used()},
+ ) as _ctx:
+ for _attempt in range(3):
+ try:
+ resp = requests.post(
+ f"{NIM_BASE_URL}/chat/completions",
+ headers={
+ "Authorization": f"Bearer {NIM_API_KEY}",
+ "Content-Type": "application/json",
+ },
+ json={
+ "model": NIM_MODEL,
+ "messages": messages,
+ "tools": TOOLS,
+ "tool_choice": "required",
+ "max_tokens": 2048,
+ },
+ timeout=NIM_TIMEOUT,
+ )
+ resp.raise_for_status()
+ break
+ except (requests.Timeout, requests.HTTPError) as e:
+ last_err = e
+ # ADR-004: 429 不重試,立即拋出讓上層啟動 Hermes 規則引擎降級
+ if isinstance(e, requests.HTTPError) and e.response is not None \
+ and e.response.status_code == 429:
+ logger.warning("[NIM] HTTP 429 速率限制,跳出 retry 迴圈")
+ _ctx.set_error(f"NIM 429 rate-limited")
+ _ctx.fallback_to_caller('hermes_rule_engine')
+ raise
+ if _attempt < 2:
+ _time.sleep(2 ** _attempt)
+ logger.warning(f"[NIM] retry {_attempt + 1}/2 after {e}")
+ else:
+ raise last_err
- body = resp.json()
- usage = body.get("usage", {})
- nim_stats = {
- "total_tokens": usage.get("total_tokens", 0),
- "quota_used": _nim_quota_used(),
- }
+ body = resp.json()
+ usage = body.get("usage", {})
+ # 記錄 token / 成本到 ai_calls 表
+ _ctx.set_tokens(
+ input=usage.get("prompt_tokens", 0),
+ output=usage.get("completion_tokens", 0),
+ )
+ nim_stats = {
+ "total_tokens": usage.get("total_tokens", 0),
+ "quota_used": _nim_quota_used(),
+ }
choices = body.get("choices", [])
- tool_calls = choices[0].get("message", {}).get("tool_calls", []) if choices else []
+ message = choices[0].get("message", {}) if choices else {}
+ tool_calls = message.get("tool_calls", []) or []
- results = []
- for tc in tool_calls:
- fn = tc.get("function", {})
- try:
- args = json.loads(fn.get("arguments", "{}"))
- except json.JSONDecodeError:
- args = {}
- results.append({"tool": fn.get("name"), "args": args})
+ # 共用結構解析(NIM / qwen3 兩邊統一走同一條)
+ results = _parse_tool_calls_struct(tool_calls)
if not results:
# llama-3.1-8b-instruct 有時把 tool call 寫進 content 而非 tool_calls 結構
- raw_content = choices[0].get("message", {}).get("content", "") if choices else ""
+ raw_content = message.get("content", "") or ""
logger.warning(f"[NIM] 0 tool_calls,嘗試從 content 解析:{raw_content[:120]}")
- try:
- parsed = json.loads(raw_content.strip())
- if isinstance(parsed, list):
- for item in parsed:
- name = item.get("name") or item.get("function", {}).get("name")
- args = item.get("parameters") or item.get("arguments") or {}
- if isinstance(args, str):
- args = json.loads(args)
- if name:
- results.append({"tool": name, "args": args})
- if results:
- logger.info(f"[NIM] content fallback 解析成功,取得 {len(results)} 個 tool_calls")
- except Exception as parse_err:
- logger.error(f"[NIM] content fallback 解析失敗:{parse_err}")
+ results = _parse_content_fallback(raw_content)
logger.info(f"[NIM] 收到 {len(results)} 個 tool_calls | tokens={nim_stats['total_tokens']}")
return results, nim_stats
+ # ──────────────────────────────────────────────
+ # GCP Ollama qwen3:14b Tool Calling(Operation Ollama-First v5.0 / Phase 3)
+ # ──────────────────────────────────────────────
+ def _call_qwen3_dispatch(self, threats: list) -> tuple:
+ """
+ 將 Hermes 威脅清單交給 GCP Ollama qwen3:14b,取得 tool_calls 決策。
+
+ Why qwen3:14b(A2 web-research 結論,docs/phase0_research_report_20260503.md):
+ - Ollama registry 官方頁 + qwenlm.github.io 雙確認 tools capability 可用
+ - 預設可關閉 thinking mode(避免 deepseek-r1 的 30s thinking 延遲)
+ - 14B 體積 9.3GB,與 deepseek-r1:14b 同級
+ - 與 NIM 一致採 OpenAI 兼容 chat completion + tools schema
+
+ Returns:
+ (list of {"tool": str, "args": dict}, dict ollama_stats)
+ ollama_stats: {"total_tokens": int, "host": str, "model": str}
+ """
+ from services.ollama_service import resolve_ollama_host, mark_unhealthy
+ host = resolve_ollama_host().rstrip("/")
+
+ threat_summary = json.dumps(
+ [
+ {
+ "sku": t.sku,
+ "name": t.name,
+ "momo_price": t.momo_price,
+ "pchome_price": t.pchome_price,
+ "gap_pct": t.gap_pct,
+ "sales_delta": t.sales_7d_delta_pct,
+ "risk": t.risk,
+ "action": t.recommended_action,
+ "confidence": t.confidence,
+ }
+ for t in threats
+ ],
+ ensure_ascii=False,
+ )
+
+ # 注入 MCP 市場上下文(與 NIM 路徑一致)
+ mcp_ctx = build_mcp_context()
+
+ # System prompt 與 NIM 完全一致(避免兩套維護)
+ system_prompt = (
+ "你是台灣電商競價情報的行動派發器。"
+ f"當前市場背景 (MCP):\n{mcp_ctx}\n\n"
+ "根據 Hermes 分析師提供的威脅清單,決定對每支商品呼叫哪個工具。\n"
+ "路由鐵律(依序判斷,命中即停):\n"
+ "1. gap_pct < 5% 且 sales_delta < -30% → 非價格異常,呼叫 flag_for_human_review,"
+ "concern 說明『價差接近 0 但銷量大幅下滑,疑似缺貨/下架/平台流量異常,請人工走查前台』。\n"
+ "2. gap_pct ≥ 5% 且 risk=HIGH → trigger_price_alert(填入 momo_price, comp_price)。\n"
+ "3. 我方價格低於競品且銷量正成長 → add_to_recommendation。\n"
+ "4. confidence < 0.6 或其他複雜情況 → flag_for_human_review。\n"
+ "每支商品只呼叫一個工具。\n"
+ "【語言鐵律 — 台灣標準正體中文(繁體)】所有文字欄位必須遵守:\n"
+ " 1. 嚴禁簡體字、嚴禁異體字(例:不可用「亊」,必須用「事」)\n"
+ " 2. 嚴禁短語重複(語意坍塌)、嚴禁無意義字元組合\n"
+ "若無法產出合理的繁體中文說明,直接輸出「請人工評估議價空間」。"
+ )
+
+ payload = {
+ "model": NEMOTRON_OLLAMA_MODEL,
+ "messages": [
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": f"請處理以下 {len(threats)} 筆威脅清單:\n{threat_summary}"},
+ ],
+ "tools": TOOLS, # 重用既有 NIM tools schema
+ "stream": False,
+ "options": {
+ "temperature": 0.2,
+ "num_predict": 2048,
+ },
+ }
+
+ with log_ai_call(
+ caller='nemotron_dispatch',
+ provider='gcp_ollama',
+ model=NEMOTRON_OLLAMA_MODEL,
+ request_id=f"nem-{int(time.time())}",
+ meta={
+ 'flag': 'NEMOTRON_OLLAMA_FIRST',
+ 'threats_count': len(threats),
+ 'host': host,
+ },
+ ) as ctx:
+ try:
+ resp = requests.post(
+ f"{host}/api/chat",
+ json=payload,
+ timeout=NEMOTRON_OLLAMA_TIMEOUT,
+ )
+ resp.raise_for_status()
+ body = resp.json()
+ except Exception as e:
+ # 連線/HTTP 失敗 → 標記主機 unhealthy + log 錯誤後 re-raise,由 dispatch 走 NIM fallback
+ ctx.set_error(f"qwen3 call failed: {type(e).__name__}: {e}")
+ ctx.fallback_to_caller('nim')
+ mark_unhealthy(host)
+ raise
+
+ ctx.set_tokens(
+ input=body.get('prompt_eval_count', 0),
+ output=body.get('eval_count', 0),
+ )
+
+ msg = body.get('message', {}) if isinstance(body, dict) else {}
+ tool_calls = msg.get('tool_calls', []) or []
+
+ # 走共用 tool_calls 結構解析(與 NIM 同一條 helper)
+ results = _parse_tool_calls_struct(tool_calls)
+
+ if not results:
+ # qwen3 沒回 tool_calls → 走既有 content fallback 解析
+ raw_content = msg.get('content', '') or ''
+ logger.warning(
+ f"[Dispatcher][qwen3] 0 tool_calls,嘗試從 content 解析:{raw_content[:120]}"
+ )
+ results = _parse_content_fallback(raw_content)
+
+ ollama_stats = {
+ "total_tokens": (body.get('prompt_eval_count', 0) or 0)
+ + (body.get('eval_count', 0) or 0),
+ "host": host,
+ "model": NEMOTRON_OLLAMA_MODEL,
+ }
+
+ logger.info(
+ f"[Dispatcher][qwen3] 收到 {len(results)} 個 tool_calls | "
+ f"tokens={ollama_stats['total_tokens']} host={host}"
+ )
+ return results, ollama_stats
+
# ──────────────────────────────────────────────
# ADR-004:Hermes 規則引擎降級路由
# ──────────────────────────────────────────────
@@ -1190,6 +1393,51 @@ class NemotronDispatcher:
"nim_stats": {},
}
+ # ── Operation Ollama-First v5.0 / Phase 3 / A9:qwen3 主路徑(feature flag 灰度)──
+ # 預設 NEMOTRON_OLLAMA_FIRST=false 時不進入此分支,行為與戰前完全相同。
+ # 若 qwen3 成功取得 tool_calls,沿用既有 TOOL_MAP 執行邏輯(共用 footprint/threat 注入)。
+ # 若 qwen3 失敗或 0 tool_calls → 不直接降到 Hermes 規則,先嘗試 NIM 備援,再走 ADR-004。
+ qwen3_used = False
+ qwen3_stats: Optional[dict] = None
+ qwen3_tool_calls: Optional[list] = None
+ if NEMOTRON_OLLAMA_FIRST:
+ try:
+ qwen3_tool_calls, qwen3_stats = self._call_qwen3_dispatch(nim_candidates)
+ if qwen3_tool_calls:
+ qwen3_used = True
+ logger.info(
+ f"[Dispatcher][qwen3] 主路徑成功 tool_calls={len(qwen3_tool_calls)} "
+ f"tokens={qwen3_stats.get('total_tokens', 0)}"
+ )
+ else:
+ logger.warning("[Dispatcher][qwen3] 0 tool_calls,fallback 至 NIM")
+ except Exception as e:
+ logger.warning(f"[Dispatcher][qwen3] 呼叫失敗 fallback NIM: {e}")
+ # log_ai_call 已在 _call_qwen3_dispatch 內標記 status=error + fallback_to=nim
+ qwen3_tool_calls = None
+ qwen3_stats = None
+
+ # qwen3 主路徑成功 → 直接進入工具執行區塊(跳過 NIM)
+ if qwen3_used:
+ tool_calls = qwen3_tool_calls
+ # 與既有 NIM 路徑一致的 stats 結構(footprint 顯示用)
+ nim_stats = {
+ "total_tokens": qwen3_stats.get("total_tokens", 0),
+ "quota_used": _nim_quota_used(), # 配額未動用
+ "provider": "gcp_ollama",
+ "model": qwen3_stats.get("model", NEMOTRON_OLLAMA_MODEL),
+ }
+ return self._execute_tool_calls(
+ tool_calls=tool_calls,
+ threats=threats,
+ hermes_stats=hermes_stats,
+ nim_stats=nim_stats,
+ pre_dispatched=dispatched,
+ pre_skipped=skipped,
+ pre_errors=errors,
+ )
+
+ # ── 進入 NIM 路徑(flag=false 預設主路徑;flag=true 則為 qwen3 失敗備援)──
if not NIM_API_KEY:
logger.warning("[Dispatcher][ADR-004] NVIDIA_API_KEY 未設定,啟動 Hermes 規則引擎降級")
fb = self._hermes_rule_fallback(nim_candidates, hermes_stats)
@@ -1249,11 +1497,38 @@ class NemotronDispatcher:
"nim_stats": fb["nim_stats"],
}
- # 建立運算足跡(Telegram 顯示文字 + DB 結構化 JSON,共用同一份)
+ return self._execute_tool_calls(
+ tool_calls=tool_calls,
+ threats=threats,
+ hermes_stats=hermes_stats,
+ nim_stats=nim_stats,
+ pre_dispatched=dispatched,
+ pre_skipped=skipped,
+ pre_errors=errors,
+ )
+
+ # ──────────────────────────────────────────────
+ # tool_calls 執行區塊(NIM 與 qwen3 共用)
+ # ──────────────────────────────────────────────
+ def _execute_tool_calls(
+ self,
+ tool_calls: list,
+ threats: list,
+ hermes_stats: Optional[dict],
+ nim_stats: dict,
+ pre_dispatched: int = 0,
+ pre_skipped: int = 0,
+ pre_errors: Optional[list] = None,
+ ) -> dict:
+ """執行 LLM 回傳的 tool_calls 清單,注入 Python 獨裁的客觀數字 + 金額影響。
+ 被 NIM 路徑與 qwen3 路徑共用,避免雙路雙維護。
+ """
+ errors = list(pre_errors or [])
+ dispatched = pre_dispatched
+
footprint_text = _build_footprint_block(hermes_stats, nim_stats)
footprint_data = _build_footprint_json(hermes_stats, nim_stats)
- # 建立 SKU → threat 的查詢字典(供 add_to_recommendation 寫入快照)
threat_map = {t.sku: t for t in threats}
TOOL_MAP = {
@@ -1266,20 +1541,15 @@ class NemotronDispatcher:
for tc in tool_calls:
tool_name = tc.get("tool")
- args = tc.get("args", {})
+ args = dict(tc.get("args", {}) or {})
handler = TOOL_MAP.get(tool_name)
if not handler:
errors.append(f"未知工具: {tool_name}")
continue
- # 注入通用參數:Telegram 文字 + DB JSON 足跡
args["footprint"] = footprint_text
- # [2026-04-18 台北] Bug-1 防線一 保險:所有客觀數字強制由 Python 從 threat_map 注入,
- # 覆蓋 LLM 可能回吐的幻覺數字(例如 $0)。Layer A Hermes 根治是主防線,
- # 此處為二道屏障(萬一 ground_items 有漏網,或未來走 bypass) — Claude Opus 4.7
- # [2026-05-02 台北] B' 軌:金額影響量化亦走 Python 獨裁注入 — Claude Opus 4.7
t = threat_map.get(args.get("sku"))
if tool_name == "trigger_price_alert" and t:
args["momo_price"] = getattr(t, "momo_price", None)
@@ -1302,7 +1572,6 @@ class NemotronDispatcher:
args["threat"] = t
elif tool_name == "route_to_km":
args["threat"] = t
- # mark_for_relearn 無需注入客觀數字(僅寫 DB)
try:
handler(**args)
@@ -1311,11 +1580,13 @@ class NemotronDispatcher:
errors.append(f"{tool_name}({args.get('sku', '?')}): {e}")
logger.error(f"[Dispatcher] 工具執行失敗 [{tool_name}]: {e}")
- skipped = len(threats) - dispatched
+ skipped = max(0, len(threats) - dispatched)
+ # nim_stats 在 qwen3 路徑下會帶 provider='gcp_ollama',log 出處可區辨
+ provider = nim_stats.get("provider", "nim") if isinstance(nim_stats, dict) else "nim"
logger.info(
- f"[Dispatcher] 完成 forced_review={len(forced_review)} "
+ f"[Dispatcher] 完成 provider={provider} "
f"dispatched={dispatched} skipped={skipped} "
- f"errors={len(errors)} nim_tokens={nim_stats.get('total_tokens', 0)}"
+ f"errors={len(errors)} tokens={nim_stats.get('total_tokens', 0)}"
)
return {
"dispatched": dispatched,
diff --git a/services/openclaw_strategist_service.py b/services/openclaw_strategist_service.py
index cc1adc9..def17d5 100644
--- a/services/openclaw_strategist_service.py
+++ b/services/openclaw_strategist_service.py
@@ -9,7 +9,7 @@ OpenClaw 戰略分析師(Gemini 2.5 Flash)
提供:
generate_weekly_strategy_report() — 週報(每週一 06:00)
- generate_meta_analysis_report() — AI 系統效能自我審視(每 6 小時)
+ generate_meta_analysis_report() — AI 系統效能自我審視(每日 12:00, Phase 4 降頻)
分析維度:
1. 業績趨勢(MoM / WoW)
@@ -24,6 +24,7 @@ OpenClaw 戰略分析師(Gemini 2.5 Flash)
import json
import logging
import os
+import uuid
import requests
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
@@ -31,15 +32,110 @@ from typing import Any, Dict, List, Optional
from database.manager import get_session
from sqlalchemy import bindparam, text
+from services.ai_call_logger import log_ai_call # Operation Ollama-First v5.0 P1
+
logger = logging.getLogger(__name__)
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
+# LOCKED-GEMINI: OpenClaw 週/月/年報需長 context (>30K tokens) + 繁中商業文體品質
+# Ollama 上限 32K context、繁中敘事力差距 >15%(ADR-028 鎖定場景 #2/#3/#4)
+# 日報 (A8) 與 Q&A (A7) 已切 Hermes 模板/Qwen3,但敘事報告維持 Gemini
STRATEGY_MODEL = os.getenv("OPENCLAW_MODEL", "gemini-2.5-flash")
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY", "")
NVIDIA_NIM_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
NVIDIA_FALLBACK_MODEL = "meta/llama-3.3-70b-instruct"
TAIPEI_TZ_OFFSET = 8 # UTC+8
+# ──────────────────────────────────────────────────────────────────────────────
+# Operation Ollama-First v5.0 — Phase 3 feature flag (預設 OFF;統帥手動開灰度)
+# - OPENCLAW_QA_OLLAMA_FIRST: false=維持戰前 Gemini-first 行為,true=走 Ollama 主、Gemini fallback
+# - OPENCLAW_QA_OLLAMA_MODEL: GCP Ollama 上的模型 tag(A2 推薦 qwen3:14b,9.3GB)
+# - OPENCLAW_QA_OLLAMA_HOST: 允許獨立指定 QA 用主機;未設則 fallback 到通用 OLLAMA_HOST_PRIMARY
+# - OPENCLAW_QA_OLLAMA_TIMEOUT: 單次 Ollama 呼叫超時(秒),低品質判定後仍會升級 Gemini
+# 任何 deploy 不開 flag → 行為與戰前完全相同(regression-safe)。
+# ──────────────────────────────────────────────────────────────────────────────
+
+
+def _qa_ollama_first_enabled() -> bool:
+ """每次呼叫即時讀環境變數,允許 runtime toggle 灰度。"""
+ return os.getenv('OPENCLAW_QA_OLLAMA_FIRST', 'false').strip().lower() in ('true', '1', 'yes', 'on')
+
+
+OPENCLAW_QA_OLLAMA_MODEL = os.getenv('OPENCLAW_QA_OLLAMA_MODEL', 'qwen3:14b')
+OPENCLAW_QA_OLLAMA_HOST = os.getenv(
+ 'OPENCLAW_QA_OLLAMA_HOST',
+ os.getenv('OLLAMA_HOST_PRIMARY', 'http://34.143.170.20:11434'),
+)
+OPENCLAW_QA_OLLAMA_TIMEOUT = int(os.getenv('OPENCLAW_QA_OLLAMA_TIMEOUT', '60'))
+
+# 繁體中文強制 system prompt(A2 黃燈警訊「Qwen 繁中短板」緩解策略)
+QWEN3_TC_SYSTEM_PROMPT = """你是 momo 電商情報分析師「OpenClaw」。
+
+【硬性規則】
+1. 必須使用繁體中文(台灣用語),絕對禁止簡體字、大陸用語(例:寫「資料」不寫「数据」、寫「軟體」不寫「软件」)
+2. 商品/品牌名稱保留原文不翻譯(如 momo / PChome / 蝦皮 / 全家)
+3. 數字與貨幣保留原貌(NT$、%、件數、月份)
+4. 若資料不足無法回答,明確說「資料不足,建議改問 ___」而非編造
+
+【輸出風格】
+- 直接回答,不要「以下是分析」開場白
+- 結構化:用條列、表格、編號
+- 控制在 300 字以內,除非統帥明確要求展開
+"""
+
+# 簡體字偵測樣本:A2 報告警訊核心檢查項;列出商業中文情境最常被簡體污染的單字
+# 注意:避免列「於」「与」這類兩岸通用字;只取明確簡繁字差
+_SIMPLIFIED_HINT_CHARS = frozenset([
+ # 商業/科技高頻簡繁差字(每字繁體對照於註解)
+ '设', # 設
+ '当', # 當
+ '点', # 點
+ '问', # 問
+ '获', # 獲
+ '为', # 為
+ '么', # 麼
+ '资', # 資
+ '产', # 產
+ '业', # 業
+ '务', # 務
+ '说', # 說
+ '听', # 聽
+ '关', # 關
+ '词', # 詞
+ '这', # 這
+ '过', # 過
+ '让', # 讓
+ '应', # 應
+ '亿', # 億
+ '请', # 請
+ '观', # 觀
+ '战', # 戰
+ '体', # 體
+ '价', # 價
+ '场', # 場
+ '动', # 動
+ '号', # 號
+ '团', # 團
+ '类', # 類
+ '广', # 廣
+ '处', # 處
+ '执', # 執
+ '决', # 決
+ '约', # 約
+ '级', # 級
+ '态', # 態
+ '势', # 勢
+ '运', # 運
+ '营', # 營
+])
+
+# 拒答訊號:模型表達「無法回答」即視為低品質
+_REFUSAL_PATTERNS = (
+ '無法回答', '無法回覆', '我不知道', '我無從', "I cannot",
+ "I don't know", '抱歉,我無法', '抱歉,我無法',
+ '需要更多資訊', '需要更多信息', '無相關資料',
+)
+
__all__ = [
"generate_daily_report",
"generate_weekly_strategy_report",
@@ -60,13 +156,39 @@ def generate_strategy_response(query: str, context: Optional[Dict[str, Any]] = N
query: 使用者自然語言訊息(繁體中文)
context: 可選,{"intent": str, "user_id": int, ...}
Returns:
- 繁體中文回覆字串。GEMINI_API_KEY 未設或呼叫失敗時,回降級訊息
- (永遠回字串,不拋例外,由呼叫端顯示於 Telegram)。
+ 繁體中文回覆字串。所有 LLM 失敗時回降級訊息(永遠回字串、不拋例外)。
+
+ 路由(Operation Ollama-First v5.0 — Phase 3):
+ OPENCLAW_QA_OLLAMA_FIRST=false(預設)→ Gemini → NIM(戰前行為)
+ OPENCLAW_QA_OLLAMA_FIRST=true → GCP Ollama qwen3:14b → 品質檢測 → fallback Gemini → NIM
"""
q = (query or "").strip()
if not q:
return "請輸入您的問題,例如:本週業績趨勢、競品價差分析、產出週報 PPT。"
+ request_id = f"qa-{uuid.uuid4().hex[:8]}"
+
+ # ── 灰度路徑:Ollama 優先(flag=true 才走,預設 OFF)──
+ if _qa_ollama_first_enabled():
+ ollama_reply = _call_qwen3_qa(q, context, request_id)
+ if ollama_reply and not _is_low_quality_response(ollama_reply):
+ return ollama_reply
+ # 品質守門失敗或 Ollama 離線 → 降級 Gemini(fallback_to 已於 _call_qwen3_qa 內標記)
+ logger.info(
+ "[OpenClaw][QA] Ollama 主路徑未通過(無回應或低品質),fallback Gemini;request_id=%s",
+ request_id,
+ )
+
+ # ── 既有路徑:Gemini → NIM(A4 已接 ai_call_logger)──
+ return _legacy_gemini_first_qa(q, context, request_id=request_id)
+
+
+def _legacy_gemini_first_qa(
+ q: str,
+ context: Optional[Dict[str, Any]],
+ request_id: Optional[str] = None,
+) -> str:
+ """戰前 Gemini-first 路徑;抽出獨立函式以利 Phase 3 灰度與 regression test。"""
system_prompt = (
"你是 MOMO Pro 電商情報策略師「OpenClaw」。以繁體中文(台灣用語)回覆使用者。"
"嚴禁簡體字,嚴禁空洞套話。若使用者要求的資料需即時查詢,"
@@ -79,13 +201,13 @@ def generate_strategy_response(query: str, context: Optional[Dict[str, Any]] = N
text_reply = None
if GEMINI_API_KEY:
try:
- text_reply = _call_gemini(system_prompt, user_prompt, temperature=0.5)
+ text_reply = _call_gemini(system_prompt, user_prompt, temperature=0.5, caller="openclaw_qa")
except Exception as e:
logger.warning("[OpenClaw] Gemini 呼叫失敗,備援 NVIDIA NIM:%s", e)
if not text_reply and NVIDIA_API_KEY:
try:
- text_reply = _call_nvidia_nim(system_prompt, user_prompt)
+ text_reply = _call_nvidia_nim(system_prompt, user_prompt, caller="openclaw_qa")
except Exception as e:
logger.error("[OpenClaw] NVIDIA NIM 備援也失敗:%s", e)
@@ -97,6 +219,119 @@ def generate_strategy_response(query: str, context: Optional[Dict[str, Any]] = N
return text_reply
+# ──────────────────────────────────────────────────────────────────────────────
+# Phase 3 — Ollama Q&A 路徑 + 品質守門
+# ──────────────────────────────────────────────────────────────────────────────
+
+def _call_qwen3_qa(
+ question: str,
+ context: Optional[Dict[str, Any]],
+ request_id: str,
+) -> Optional[str]:
+ """呼叫 GCP Ollama 上的 qwen3:14b(或環境變數指定的模型)回答 Telegram QA。
+
+ 回傳 None 表示「呼叫失敗或回空」,呼叫端會自動 fallback Gemini。
+ 本函式不負責品質判定(呼叫端用 `_is_low_quality_response` 判,避免邏輯耦合)。
+ 全程包在 `log_ai_call` context manager;失敗時 set_error + fallback_to_caller。
+ """
+ user_prompt = (
+ f"使用者問題:{question}\n"
+ f"上下文:{json.dumps(context or {}, ensure_ascii=False)}"
+ )
+ url = f"{OPENCLAW_QA_OLLAMA_HOST.rstrip('/')}/api/generate"
+ payload = {
+ "model": OPENCLAW_QA_OLLAMA_MODEL,
+ "system": QWEN3_TC_SYSTEM_PROMPT,
+ "prompt": user_prompt,
+ "stream": False,
+ "options": {
+ "temperature": 0.5,
+ "num_predict": 1024,
+ },
+ }
+
+ with log_ai_call(
+ caller='openclaw_qa',
+ provider='gcp_ollama',
+ model=OPENCLAW_QA_OLLAMA_MODEL,
+ request_id=request_id,
+ meta={
+ 'flag': 'OPENCLAW_QA_OLLAMA_FIRST',
+ 'host': OPENCLAW_QA_OLLAMA_HOST,
+ 'temperature': 0.5,
+ },
+ ) as ctx:
+ try:
+ ctx.set_prompt_hash(user_prompt)
+ resp = requests.post(url, json=payload, timeout=OPENCLAW_QA_OLLAMA_TIMEOUT)
+ resp.raise_for_status()
+ body = resp.json() or {}
+ # Ollama /api/generate 回傳格式:{response, prompt_eval_count, eval_count, ...}
+ ctx.set_tokens(
+ input=body.get('prompt_eval_count', 0),
+ output=body.get('eval_count', 0),
+ )
+ text_reply = (body.get('response') or '').strip()
+ if not text_reply:
+ ctx.set_error('empty_response')
+ ctx.fallback_to_caller('openclaw_qa_gemini_fallback')
+ return None
+ return text_reply
+ except Exception as e:
+ logger.warning(
+ "[OpenClaw][QA] qwen3 呼叫失敗 request_id=%s host=%s: %s",
+ request_id, OPENCLAW_QA_OLLAMA_HOST, e,
+ )
+ ctx.set_error(f"{type(e).__name__}: {str(e)[:200]}")
+ ctx.fallback_to_caller('openclaw_qa_gemini_fallback')
+ return None
+
+
+# 低品質判定常數:避免 magic number 散落於規則裡
+_QA_MIN_LENGTH = 50 # 規則 1:長度下限
+_QA_SIMPLIFIED_THRESHOLD = 3 # 規則 2:簡體字數量門檻
+_QA_FLOWING_TEXT_LENGTH = 200 # 規則 4:「200+ 字無斷行」流水帳判定
+
+
+def _is_low_quality_response(text: Optional[str]) -> bool:
+ """判斷 Ollama 回應品質低,需升級 Gemini。
+
+ 觸發條件(任一即視為低品質):
+ 1. 空字串或長度 < _QA_MIN_LENGTH 字元
+ 2. 簡體字污染:>= _QA_SIMPLIFIED_THRESHOLD 個簡體 hint 字元(A2 黃燈警訊核心檢查)
+ 3. 拒答訊號:包含「無法回答」「我不知道」等模式
+ 4. 結構性差:> _QA_FLOWING_TEXT_LENGTH 字但完全沒有換行(流水帳)
+
+ Returns:
+ True → 低品質,呼叫端應 fallback Gemini
+ False → 可接受
+ """
+ if not text:
+ return True
+ stripped = text.strip()
+ if len(stripped) < _QA_MIN_LENGTH:
+ return True
+
+ # 規則 2:簡體字污染(A2 警訊:Qwen 繁中短板)
+ simplified_count = sum(1 for c in stripped if c in _SIMPLIFIED_HINT_CHARS)
+ if simplified_count >= _QA_SIMPLIFIED_THRESHOLD:
+ logger.info("[OpenClaw][QA] 低品質:偵測 %d 個簡體字 hint", simplified_count)
+ return True
+
+ # 規則 3:拒答訊號
+ for pattern in _REFUSAL_PATTERNS:
+ if pattern in stripped:
+ logger.info("[OpenClaw][QA] 低品質:偵測拒答模式 '%s'", pattern)
+ return True
+
+ # 規則 4:結構性 — 200+ 字無斷行 = 流水帳
+ if len(stripped) > _QA_FLOWING_TEXT_LENGTH and stripped.count('\n') < 1:
+ logger.info("[OpenClaw][QA] 低品質:%d 字無斷行(流水帳)", len(stripped))
+ return True
+
+ return False
+
+
# ═══════════════════════════════════════════════════════════════════════════════
# DB 數據讀取層
# ═══════════════════════════════════════════════════════════════════════════════
@@ -665,59 +900,110 @@ def _save_action_items(actions: List[str], source_insight_id: Optional[int]) ->
# Gemini 呼叫層
# ═══════════════════════════════════════════════════════════════════════════════
-def _call_gemini(system_prompt: str, user_prompt: str, temperature: float = 0.4) -> Optional[str]:
- """呼叫 Gemini,回傳文字;失敗回傳 None"""
+def _call_gemini(
+ system_prompt: str,
+ user_prompt: str,
+ temperature: float = 0.4,
+ caller: str = "openclaw_qa",
+) -> Optional[str]:
+ """呼叫 Gemini,回傳文字;失敗回傳 None。
+
+ Args:
+ caller: ai_calls.caller — 由外層 generate_*_report 傳入細分 caller
+ (openclaw_daily / openclaw_weekly / openclaw_monthly / openclaw_meta / openclaw_qa)
+ """
if not GEMINI_API_KEY:
logger.warning("[OpenClaw] GEMINI_API_KEY 未設定")
return None
- try:
- import google.generativeai as genai
- genai.configure(api_key=GEMINI_API_KEY)
- model = genai.GenerativeModel(
- model_name=STRATEGY_MODEL,
- generation_config=genai.types.GenerationConfig(
- temperature=temperature,
- max_output_tokens=4096,
- ),
- system_instruction=system_prompt,
- )
- response = model.generate_content(
- user_prompt,
- request_options={"timeout": 180},
- )
- return response.text or ""
- except Exception as e:
- logger.error("[OpenClaw] Gemini 呼叫失敗: %s", e)
- return None
+ with log_ai_call(
+ caller=caller,
+ provider='gemini',
+ model=STRATEGY_MODEL,
+ meta={'temperature': temperature},
+ ) as _ctx:
+ try:
+ import google.generativeai as genai
+ genai.configure(api_key=GEMINI_API_KEY)
+ model = genai.GenerativeModel(
+ model_name=STRATEGY_MODEL,
+ generation_config=genai.types.GenerationConfig(
+ temperature=temperature,
+ max_output_tokens=4096,
+ ),
+ system_instruction=system_prompt,
+ )
+ response = model.generate_content(
+ user_prompt,
+ request_options={"timeout": 180},
+ )
+ # Gemini SDK:response.usage_metadata.{prompt_token_count, candidates_token_count}
+ try:
+ usage = getattr(response, 'usage_metadata', None)
+ if usage is not None:
+ _ctx.set_tokens(
+ input=getattr(usage, 'prompt_token_count', 0) or 0,
+ output=getattr(usage, 'candidates_token_count', 0) or 0,
+ )
+ except Exception:
+ pass
+ return response.text or ""
+ except Exception as e:
+ logger.error("[OpenClaw] Gemini 呼叫失敗: %s", e)
+ _ctx.set_error(f"{type(e).__name__}: {e}")
+ _ctx.fallback_to_caller(f"{caller}_nim")
+ return None
-def _call_nvidia_nim(system_prompt: str, user_prompt: str, temperature: float = 0.5) -> Optional[str]:
- """Gemini 離線時備援 NVIDIA NIM,回傳文字;失敗回傳 None"""
+def _call_nvidia_nim(
+ system_prompt: str,
+ user_prompt: str,
+ temperature: float = 0.5,
+ caller: str = "openclaw_qa",
+) -> Optional[str]:
+ """Gemini 離線時備援 NVIDIA NIM,回傳文字;失敗回傳 None。
+
+ Args:
+ caller: 由外層細分 caller,最終會以 ``{caller}_nim`` 紀錄到 ai_calls。
+ """
if not NVIDIA_API_KEY:
return None
- try:
- resp = requests.post(
- NVIDIA_NIM_URL,
- headers={
- "Authorization": f"Bearer {NVIDIA_API_KEY}",
- "Content-Type": "application/json",
- },
- json={
- "model": NVIDIA_FALLBACK_MODEL,
- "messages": [
- {"role": "system", "content": system_prompt},
- {"role": "user", "content": user_prompt},
- ],
- "temperature": temperature,
- "max_tokens": 1024,
- },
- timeout=60,
- )
- resp.raise_for_status()
- return resp.json()["choices"][0]["message"]["content"]
- except Exception as e:
- logger.error("[OpenClaw] NVIDIA NIM 呼叫失敗: %s", e)
- return None
+ nim_caller = f"{caller}_nim"
+ with log_ai_call(
+ caller=nim_caller,
+ provider='nim',
+ model=NVIDIA_FALLBACK_MODEL,
+ meta={'temperature': temperature},
+ ) as _ctx:
+ try:
+ resp = requests.post(
+ NVIDIA_NIM_URL,
+ headers={
+ "Authorization": f"Bearer {NVIDIA_API_KEY}",
+ "Content-Type": "application/json",
+ },
+ json={
+ "model": NVIDIA_FALLBACK_MODEL,
+ "messages": [
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": user_prompt},
+ ],
+ "temperature": temperature,
+ "max_tokens": 1024,
+ },
+ timeout=60,
+ )
+ resp.raise_for_status()
+ body = resp.json()
+ usage = body.get("usage", {}) or {}
+ _ctx.set_tokens(
+ input=usage.get("prompt_tokens", 0),
+ output=usage.get("completion_tokens", 0),
+ )
+ return body["choices"][0]["message"]["content"]
+ except Exception as e:
+ logger.error("[OpenClaw] NVIDIA NIM 呼叫失敗: %s", e)
+ _ctx.set_error(f"{type(e).__name__}: {e}")
+ return None
# ═══════════════════════════════════════════════════════════════════════════════
@@ -752,6 +1038,63 @@ def _split_message(text: str, max_len: int = 3800) -> List[str]:
return chunks
+def _push_report_with_charts(
+ header: str,
+ body: str,
+ charts: List[tuple],
+ report_label: str,
+) -> None:
+ """daily/monthly 圖文報告共用推播:有圖走 send_report_with_charts,無圖走 raw。
+
+ Operation Ollama-First v5.0 Phase 4 抽出(純結構重構,行為與原 inline 樣板一致):
+ 1. 三處原 inline 邏輯(_legacy_full_gemini_daily_report / generate_monthly_report /
+ _generate_daily_report_hermes_template)完全相同:header + "\\n\\n" + body,
+ 有 charts 用圖文 API,無則 raw。
+ 2. 失敗只 log warning(非阻塞),與原行為一致。
+
+ Args:
+ header: telegram_templates 已渲染的 header 字串
+ body: 報告主文(Gemini 生成)
+ charts: list of (filename, png_bytes, caption) tuples,可為空 list
+ report_label: log 訊息辨識用,例如 "日報" / "月報" / "日報(模板模式)"
+ """
+ try:
+ if charts:
+ from services.telegram_templates import (
+ send_report_with_charts,
+ _get_chat_ids,
+ )
+ full_msg = header + "\n\n" + body
+ send_report_with_charts(full_msg, charts, _get_chat_ids())
+ else:
+ from services.telegram_templates import _send_telegram_raw
+ _send_telegram_raw(header + "\n\n" + body)
+ except Exception as e:
+ logger.error("[OpenClaw] %s Telegram 推播失敗: %s", report_label, e)
+
+
+def _collect_mcp_intel(label: str) -> Dict[str, Any]:
+ """weekly/monthly 共用 MCP 外部情報收集(純結構重構,無行為變更)。
+
+ Args:
+ label: log 訊息辨識用,例如 "週報" / "月報"
+
+ Returns:
+ dict: ``{"mcp_data": {...}, "holiday_ctx": str, "seasonal_ctx": str}``
+ 失敗時三欄位皆回空字串/空 dict(非阻塞,與原 inline 行為一致)。
+ """
+ try:
+ from services.mcp_collector_service import mcp_collector
+ return {
+ "mcp_data": mcp_collector.collect_all() or {},
+ "holiday_ctx": mcp_collector.get_holiday_context() or "",
+ "seasonal_ctx": mcp_collector.get_seasonal_context() or "",
+ }
+ except Exception as e:
+ logger.warning("[OpenClaw] %s MCP 收集失敗(非阻塞): %s", label, e)
+ return {"mcp_data": {}, "holiday_ctx": "", "seasonal_ctx": ""}
+
+
# ═══════════════════════════════════════════════════════════════════════════════
# 主要公開函式
# ═══════════════════════════════════════════════════════════════════════════════
@@ -890,16 +1233,10 @@ def generate_weekly_strategy_report(
competitor_summary = _fetch_competitor_summary()
# ── Step 2:MCP 外部情報 ─────────────────────────────────────────────────
- mcp_data: Dict[str, str] = {}
- try:
- from services.mcp_collector_service import mcp_collector
- mcp_data = mcp_collector.collect_all()
- holiday_ctx = mcp_collector.get_holiday_context()
- seasonal_ctx = mcp_collector.get_seasonal_context()
- except Exception as e:
- logger.warning("[OpenClaw] MCP 收集失敗(非阻塞): %s", e)
- holiday_ctx = ""
- seasonal_ctx = ""
+ _mcp = _collect_mcp_intel("週報")
+ mcp_data = _mcp["mcp_data"]
+ holiday_ctx = _mcp["holiday_ctx"]
+ seasonal_ctx = _mcp["seasonal_ctx"]
# ── Step 3:組建 Gemini Prompt ───────────────────────────────────────────
system_prompt = """你是 OpenClaw,一位台灣頂尖電商戰略分析師,專精於 momo 購物平台。
@@ -1000,7 +1337,7 @@ TOP 威脅品項(近48h Hermes 偵測):
# ── Step 4:Gemini 生成 ───────────────────────────────────────────────────
logger.info("[OpenClaw] 呼叫 Gemini %s 生成週報...", STRATEGY_MODEL)
- report_content = _call_gemini(system_prompt, user_prompt, temperature=0.35)
+ report_content = _call_gemini(system_prompt, user_prompt, temperature=0.35, caller="openclaw_weekly")
if not report_content:
logger.error("[OpenClaw] Gemini 週報生成失敗")
@@ -1090,9 +1427,42 @@ TOP 威脅品項(近48h Hermes 偵測):
}
+def _daily_hermes_template_enabled() -> bool:
+ """Operation Ollama-First v5.0 Phase 3 — Hermes 模板模式 feature flag.
+
+ 每次呼叫即時讀取,允許 runtime toggle 灰度(不需重啟 scheduler)。
+ 預設 false → 走 _legacy_full_gemini_daily_report(戰前行為,零 regression)。
+ """
+ return os.getenv('OPENCLAW_DAILY_HERMES_TEMPLATE', 'false').strip().lower() in ('true', '1', 'yes', 'on')
+
+
def generate_daily_report() -> dict:
"""
- OpenClaw 電商日報(每日 09:00)
+ OpenClaw 電商日報(每日 09:00)— Operation Ollama-First v5.0 Phase 3 路由層。
+
+ 依 ``OPENCLAW_DAILY_HERMES_TEMPLATE`` 分流:
+ - false(預設):``_legacy_full_gemini_daily_report``,Gemini 全文寫稿(~28K tokens)
+ - true:``_generate_daily_report_hermes_template``,Hermes 算 KPI + 模板填充 +
+ Gemini 寫 200 字洞察(~8K tokens, -71%)
+
+ 回傳合約兩條路徑一致:``{status, report_type, insight_id, period, ...}``
+ cron 不需修改;ai_insights schema 不變(仍 type='daily_report')。
+ """
+ if _daily_hermes_template_enabled():
+ try:
+ return _generate_daily_report_hermes_template()
+ except Exception as e:
+ # 模板模式異常 → 自動降級回 legacy,不讓 09:00 cron 整個掛掉
+ logger.error(
+ "[OpenClaw] 日報 Hermes 模板模式異常,自動降級回 legacy: %s", e,
+ exc_info=True,
+ )
+ return _legacy_full_gemini_daily_report()
+ return _legacy_full_gemini_daily_report()
+
+
+def _legacy_full_gemini_daily_report() -> dict:
+ """OpenClaw 日報舊版(Gemini 全文)— Phase 3 前的原始實作,保留為 baseline 對照組。
流程:
1. 讀取昨日業績快照 + TOP 競品威脅 + 定價建議
@@ -1186,7 +1556,7 @@ def generate_daily_report() -> dict:
# ── Step 3:Gemini 生成 ───────────────────────────────────────────────────
logger.info("[OpenClaw] 呼叫 Gemini 生成日報...")
- report_content = _call_gemini(system_prompt, user_prompt, temperature=0.3)
+ report_content = _call_gemini(system_prompt, user_prompt, temperature=0.3, caller="openclaw_daily")
if not report_content:
logger.error("[OpenClaw] 日報 Gemini 呼叫失敗")
@@ -1232,11 +1602,7 @@ def generate_daily_report() -> dict:
# ── Step 6:Telegram 推播(圖文)────────────────────────────────────────
try:
- from services.telegram_templates import (
- daily_report_header,
- send_report_with_charts,
- _get_chat_ids,
- )
+ from services.telegram_templates import daily_report_header
header = daily_report_header(
date_str=period,
revenue=yesterday_sales.get("revenue", 0),
@@ -1244,14 +1610,9 @@ def generate_daily_report() -> dict:
threat_count=len(threats),
opportunity_count=competitor_summary.get("premium_count", 0),
)
- full_msg = header + "\n\n" + report_content
- if charts:
- send_report_with_charts(full_msg, charts, _get_chat_ids())
- else:
- from services.telegram_templates import _send_telegram_raw
- _send_telegram_raw(full_msg)
+ _push_report_with_charts(header, report_content, charts, "日報")
except Exception as e:
- logger.error("[OpenClaw] 日報 Telegram 推播失敗: %s", e)
+ logger.error("[OpenClaw] 日報 header 組裝失敗: %s", e)
logger.info("[OpenClaw] 日報完成 insight_id=%s charts=%d", insight_id, len(charts))
return {
@@ -1317,16 +1678,10 @@ def generate_monthly_report() -> dict:
price_trend_data = _fetch_price_trend_summary(days_in_month)
# ── Step 2:MCP 外部情報(月度版)───────────────────────────────────────
- mcp_data: Dict[str, str] = {}
- try:
- from services.mcp_collector_service import mcp_collector
- mcp_data = mcp_collector.collect_all()
- holiday_ctx = mcp_collector.get_holiday_context()
- seasonal_ctx = mcp_collector.get_seasonal_context()
- except Exception as e:
- logger.warning("[OpenClaw] 月報 MCP 收集失敗(非阻塞): %s", e)
- holiday_ctx = ""
- seasonal_ctx = ""
+ _mcp = _collect_mcp_intel("月報")
+ mcp_data = _mcp["mcp_data"]
+ holiday_ctx = _mcp["holiday_ctx"]
+ seasonal_ctx = _mcp["seasonal_ctx"]
# ── Step 3:組建 Gemini Prompt ───────────────────────────────────────────
system_prompt = """你是 OpenClaw 月報首席分析師,負責 momo 平台電商月度深度報告。
@@ -1413,7 +1768,7 @@ def generate_monthly_report() -> dict:
# ── Step 4:Gemini 生成 ───────────────────────────────────────────────────
logger.info("[OpenClaw] 呼叫 Gemini 生成月報...")
- report_content = _call_gemini(system_prompt, user_prompt, temperature=0.35)
+ report_content = _call_gemini(system_prompt, user_prompt, temperature=0.35, caller="openclaw_monthly")
if not report_content:
logger.error("[OpenClaw] 月報 Gemini 呼叫失敗")
@@ -1466,11 +1821,7 @@ def generate_monthly_report() -> dict:
# ── Step 7:Telegram 推播(圖文)────────────────────────────────────────
try:
- from services.telegram_templates import (
- monthly_report_header,
- send_report_with_charts,
- _get_chat_ids,
- )
+ from services.telegram_templates import monthly_report_header
top3 = [c.get("category", "N/A") for c in categories[:3]] or ["N/A"]
header = monthly_report_header(
month_str=period,
@@ -1479,14 +1830,9 @@ def generate_monthly_report() -> dict:
yoy=sales.get("yoy_pct", 0),
top3_categories=top3,
)
- full_msg = header + "\n\n" + report_content
- if charts:
- send_report_with_charts(full_msg, charts, _get_chat_ids())
- else:
- from services.telegram_templates import _send_telegram_raw
- _send_telegram_raw(full_msg)
+ _push_report_with_charts(header, report_content, charts, "月報")
except Exception as e:
- logger.error("[OpenClaw] 月報 Telegram 推播失敗: %s", e)
+ logger.error("[OpenClaw] 月報 header 組裝失敗: %s", e)
logger.info("[OpenClaw] 月報完成 insight_id=%s charts=%d actions=%d",
insight_id, len(charts), len(action_items))
@@ -1502,7 +1848,7 @@ def generate_monthly_report() -> dict:
def generate_meta_analysis_report() -> str:
"""
- AI 系統效能自我審視(每 6 小時 run_openclaw_meta_analysis_task 呼叫)
+ AI 系統效能自我審視(每日 12:00 run_openclaw_meta_analysis_task 呼叫;Phase 4 降頻 6h → 24h)
分析 ai_insights 近期累積資料,評估:
- 各 Agent 預測準確率
@@ -1607,7 +1953,7 @@ def generate_meta_analysis_report() -> str:
"""
# ── Gemini 生成 ──────────────────────────────────────────────────────────
- report_content = _call_gemini(system_prompt, user_prompt, temperature=0.3)
+ report_content = _call_gemini(system_prompt, user_prompt, temperature=0.3, caller="openclaw_meta")
if not report_content:
logger.error("[OpenClaw] Meta-Analysis Gemini 呼叫失敗")
return "(Meta-Analysis 生成失敗)"
@@ -1829,3 +2175,503 @@ def _extract_action_items(report_text: str) -> List[str]:
elif stripped.startswith("") and items:
break
return items[:8]
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# Operation Ollama-First v5.0 Phase 3 — Hermes 模板模式(A8 fullstack)
+#
+# 設計理念:日報 70% 是結構化 KPI(純 SQL 算),20% 是 Gemini 寫的洞察段落,
+# 10% 是 Hermes 規則引擎產的行動清單。整體 token 從 ~28K → ~8K(-71%)。
+#
+# Token 預算(單次日報):
+# - Legacy 全文:system + user prompt ~3K,輸出 ~1.5K,含 raw KPI 嵌 prompt → ~28K 總用量
+# - Hermes 模板:KPI 已預算好,prompt 僅含「精簡 KPI 摘要 + 寫 200 字」 ~600 tokens prompt
+# 輸出 ~400 tokens,總計 ~8K(含 ai_call_logger 的 meta + retry buffer)
+#
+# 規則:
+# 1. 模板模式失敗 → 上層 generate_daily_report 自動降級回 legacy(不讓 cron 整個掛掉)
+# 2. ai_insights schema 不變(仍 type='daily_report'),metadata_json 加 mode='hermes_template' 區分
+# 3. _call_gemini caller 用 'openclaw_daily_insight',方便 ai_calls 統計區分新舊
+# ═══════════════════════════════════════════════════════════════════════════════
+
+# 上限常數 — 避免 magic number 散落
+DAILY_TOP_SKU_LIMIT = 10
+DAILY_PRICE_GAP_LIMIT = 5
+DAILY_INSIGHT_MAX_TOKENS = 400
+DAILY_INSIGHT_TIMEOUT_S = 60
+
+
+def _compute_daily_kpi(target_date) -> Dict[str, Any]:
+ """純 SQL + Hermes 規則計算當日所有結構化 KPI;不走 LLM。
+
+ Args:
+ target_date: ``datetime.date``,要分析的目標日(一般為昨日)。
+
+ Returns:
+ dict: 給 Jinja2 模板的完整渲染上下文,含:
+ revenue / orders / top_skus / price_gaps /
+ inventory_alerts / priority_actions
+ """
+ from datetime import date as _date_cls
+ if hasattr(target_date, 'date') and not isinstance(target_date, _date_cls):
+ target_date = target_date.date()
+ if not isinstance(target_date, _date_cls):
+ raise TypeError(f"target_date 必須是 date,實得 {type(target_date)}")
+
+ return {
+ 'revenue': _query_revenue_kpi(target_date),
+ 'orders': _query_orders_kpi(target_date),
+ 'top_skus': _query_top_skus(target_date, limit=DAILY_TOP_SKU_LIMIT),
+ 'price_gaps': _query_competitor_price_alerts(target_date, limit=DAILY_PRICE_GAP_LIMIT),
+ 'inventory_alerts': _query_inventory_anomalies(target_date),
+ 'priority_actions': _generate_priority_actions(target_date),
+ }
+
+
+def _query_revenue_kpi(target_date) -> Dict[str, Any]:
+ """營收 KPI:今日 / 昨日 / 7日均 + 變化%。"""
+ session = get_session()
+ try:
+ row = session.execute(text("""
+ SELECT
+ SUM(CASE WHEN snapshot_date::date = :d THEN
+ COALESCE("總業績"::numeric, 0) ELSE 0 END) AS today,
+ SUM(CASE WHEN snapshot_date::date = :d - 1 THEN
+ COALESCE("總業績"::numeric, 0) ELSE 0 END) AS yesterday,
+ SUM(CASE WHEN snapshot_date::date BETWEEN :d - 7 AND :d - 1 THEN
+ COALESCE("總業績"::numeric, 0) ELSE 0 END) / 7.0 AS avg_7d
+ FROM daily_sales_snapshot
+ WHERE snapshot_date::date BETWEEN :d - 7 AND :d
+ """), {"d": target_date}).fetchone()
+
+ today = float((row[0] or 0) if row else 0)
+ yesterday = float((row[1] or 0) if row else 0)
+ avg_7d = float((row[2] or 0) if row else 0)
+ dod_pct = ((today - yesterday) / yesterday * 100.0) if yesterday else 0.0
+ wow_pct = ((today - avg_7d) / avg_7d * 100.0) if avg_7d else 0.0
+
+ return {
+ "today": today,
+ "yesterday": yesterday,
+ "avg_7d": avg_7d,
+ "dod_pct": round(dod_pct, 1),
+ "wow_pct": round(wow_pct, 1),
+ }
+ except Exception as e:
+ logger.error("[OpenClaw] revenue KPI 讀取失敗: %s", e)
+ return {"today": 0.0, "yesterday": 0.0, "avg_7d": 0.0, "dod_pct": 0.0, "wow_pct": 0.0}
+ finally:
+ session.close()
+
+
+def _query_orders_kpi(target_date) -> Dict[str, Any]:
+ """訂單數 / SKU 數 / 平均客單價 KPI。
+
+ Note: daily_sales_snapshot 並無 order_id 欄位,「訂單數」以 row 數近似
+ (與 legacy `_fetch_yesterday_sales` 同義;保留行為一致性)。
+ """
+ session = get_session()
+ try:
+ row = session.execute(text("""
+ SELECT
+ COUNT(*) AS today_rows,
+ COUNT(DISTINCT "商品ID") AS today_sku,
+ AVG(NULLIF(COALESCE("總業績"::numeric, 0), 0)) AS avg_value_today
+ FROM daily_sales_snapshot
+ WHERE snapshot_date::date = :d
+ """), {"d": target_date}).fetchone()
+
+ prev = session.execute(text("""
+ SELECT COUNT(*) FROM daily_sales_snapshot
+ WHERE snapshot_date::date = :d - 1
+ """), {"d": target_date}).fetchone()
+
+ today_rows = int((row[0] or 0) if row else 0)
+ today_sku = int((row[1] or 0) if row else 0)
+ avg_value = float((row[2] or 0) if row else 0)
+ yesterday_rows = int((prev[0] or 0) if prev else 0)
+ dod_pct = ((today_rows - yesterday_rows) / yesterday_rows * 100.0) if yesterday_rows else 0.0
+
+ return {
+ "today": today_rows,
+ "yesterday": yesterday_rows,
+ "sku_count": today_sku,
+ "avg_order_value": avg_value,
+ "dod_pct": round(dod_pct, 1),
+ }
+ except Exception as e:
+ logger.error("[OpenClaw] orders KPI 讀取失敗: %s", e)
+ return {"today": 0, "yesterday": 0, "sku_count": 0, "avg_order_value": 0.0, "dod_pct": 0.0}
+ finally:
+ session.close()
+
+
+def _query_top_skus(target_date, limit: int = 10) -> List[Dict[str, Any]]:
+ """當日 TOP N 熱銷 SKU。"""
+ session = get_session()
+ try:
+ rows = session.execute(text("""
+ SELECT
+ "商品ID" AS sku,
+ "商品名稱" AS name,
+ SUM(COALESCE("總業績"::numeric, 0)) AS revenue,
+ COUNT(*) AS qty
+ FROM daily_sales_snapshot
+ WHERE snapshot_date::date = :d
+ GROUP BY "商品ID", "商品名稱"
+ ORDER BY revenue DESC
+ LIMIT :lim
+ """), {"d": target_date, "lim": limit}).fetchall()
+ return [
+ {
+ "sku": r[0] or "",
+ "name": (r[1] or "")[:60],
+ "revenue": float(r[2] or 0),
+ "qty": int(r[3] or 0),
+ }
+ for r in rows
+ ]
+ except Exception as e:
+ logger.error("[OpenClaw] top SKUs 讀取失敗: %s", e)
+ return []
+ finally:
+ session.close()
+
+
+def _query_competitor_price_alerts(target_date, limit: int = 5) -> List[Dict[str, Any]]:
+ """TOP N 競品價差警示(沿用 _fetch_top_threats 並補完 SKU 名稱)。"""
+ threats = _fetch_top_threats(limit)
+ if not threats:
+ return []
+
+ sku_codes = [t.get("sku") for t in threats if t.get("sku")]
+ name_map: Dict[str, str] = {}
+ if sku_codes:
+ session = get_session()
+ try:
+ rows = session.execute(
+ text("""
+ SELECT i_code, name FROM products
+ WHERE i_code = ANY(:skus)
+ """).bindparams(bindparam("skus", expanding=True)),
+ {"skus": sku_codes},
+ ).fetchall()
+ name_map = {r[0]: (r[1] or "")[:60] for r in rows}
+ except Exception as e:
+ logger.warning("[OpenClaw] 競品 SKU 名稱補完失敗(非阻塞): %s", e)
+ finally:
+ session.close()
+
+ alerts: List[Dict[str, Any]] = []
+ for t in threats:
+ sku = t.get("sku") or ""
+ gap = float(t.get("gap_pct") or 0)
+ alerts.append({
+ "sku": sku,
+ "sku_name": name_map.get(sku, sku),
+ "competitor": "PChome/蝦皮",
+ "gap_pct": round(gap, 1),
+ "momo_price": t.get("momo_price"),
+ "competitor_price": t.get("pchome_price"),
+ "confidence": float(t.get("confidence") or 0),
+ })
+ return alerts
+
+
+def _query_inventory_anomalies(target_date) -> List[Dict[str, Any]]:
+ """庫存 / 退單異常品項(Hermes/NemoTron 寫入的 ai_insights 子類)。"""
+ session = get_session()
+ try:
+ rows = session.execute(text("""
+ SELECT product_sku, content, insight_type, confidence, metadata_json
+ FROM ai_insights
+ WHERE insight_type IN ('inventory_alert', 'return_alert', 'stock_anomaly')
+ AND status = 'approved'
+ AND created_at >= NOW() - INTERVAL '24 hours'
+ ORDER BY confidence DESC
+ LIMIT 10
+ """)).fetchall()
+ out: List[Dict[str, Any]] = []
+ for r in rows:
+ meta = {}
+ try:
+ meta = json.loads(r[4]) if r[4] else {}
+ except Exception:
+ meta = {}
+ out.append({
+ "sku": r[0] or "",
+ "summary": (r[1] or "")[:120],
+ "type": r[2] or "",
+ "confidence": float(r[3] or 0),
+ "extra": meta,
+ })
+ return out
+ except Exception as e:
+ logger.warning("[OpenClaw] 庫存異常讀取失敗(非阻塞): %s", e)
+ return []
+ finally:
+ session.close()
+
+
+def _generate_priority_actions(target_date) -> List[str]:
+ """規則引擎產生 48h 優先事項(純規則,無 LLM)。"""
+ actions: List[str] = []
+
+ threats = _fetch_top_threats(limit=DAILY_PRICE_GAP_LIMIT)
+ for t in threats[:3]:
+ gap = float(t.get("gap_pct") or 0)
+ if abs(gap) >= 10.0:
+ actions.append(
+ f"⚠️ SKU {t.get('sku') or ''} 競品價差 {gap:+.1f}%,建議 48h 內檢視跟降"
+ )
+
+ recs = _fetch_top_recommendations(limit=3)
+ for r in recs:
+ actions.append(
+ f"💰 {(r.get('name') or r.get('sku') or '')[:40]}:{r.get('strategy') or '待覆核'}"
+ f"(信心 {float(r.get('confidence') or 0):.2f})"
+ )
+
+ invs = _query_inventory_anomalies(target_date)
+ for inv in invs[:2]:
+ actions.append(f"📦 SKU {inv.get('sku')}:{(inv.get('summary') or '')[:60]}")
+
+ if not actions:
+ actions.append("✅ 今日無高優先警示,維持現有策略執行。")
+
+ return actions[:8]
+
+
+def _compute_gemini_insight(kpi: Dict[str, Any], period: str) -> str:
+ """給 Gemini 純結構化 KPI(已算好),請其寫 150-200 字繁中策略洞察。
+
+ Token 控制:精簡 prompt(~600 tokens)+ ``max_output_tokens=DAILY_INSIGHT_MAX_TOKENS``。
+ Gemini 失敗時回降級訊息(不拋例外,模板仍可渲染)。
+ """
+ revenue = kpi.get("revenue") or {}
+ orders = kpi.get("orders") or {}
+ top_skus = kpi.get("top_skus") or []
+ price_gaps = kpi.get("price_gaps") or []
+ inv_alerts = kpi.get("inventory_alerts") or []
+
+ top_names = [s.get("name", "") for s in top_skus[:3]]
+
+ system_prompt = (
+ "你是 OpenClaw 日報洞察分析師。語言:繁體中文(台灣用語)。"
+ "嚴禁簡體字、嚴禁套話、嚴禁重複數字。聚焦『解讀』與『明日行動』。"
+ )
+ user_prompt = (
+ f"根據以下今日 ({period}) KPI(已計算),寫 150-200 字策略洞察:\n\n"
+ f"營收:NT${revenue.get('today', 0):,.0f}(DoD {revenue.get('dod_pct', 0):+.1f}%、"
+ f"vs 7日均 {revenue.get('wow_pct', 0):+.1f}%)\n"
+ f"訂單數:{orders.get('today', 0)}(DoD {orders.get('dod_pct', 0):+.1f}%)\n"
+ f"TOP 商品:{top_names}\n"
+ f"價差警示:{len(price_gaps)} 件\n"
+ f"庫存異常:{len(inv_alerts)} 件\n\n"
+ "請聚焦:(1) 今日最值得統帥注意的 1-2 件事;(2) 明日建議行動。"
+ "不要重複上面的數字本身,專注解讀。控制 200 字內。"
+ )
+
+ text_out = _call_gemini(
+ system_prompt,
+ user_prompt,
+ temperature=0.35,
+ caller="openclaw_daily_insight",
+ )
+ if text_out:
+ return text_out.strip()
+
+ direction = "上升" if revenue.get("dod_pct", 0) > 0 else "下滑" if revenue.get("dod_pct", 0) < 0 else "持平"
+ return (
+ f"今日營收 NT${revenue.get('today', 0):,.0f},與昨日相較{direction} "
+ f"{revenue.get('dod_pct', 0):+.1f}%;vs 7日均 {revenue.get('wow_pct', 0):+.1f}%。"
+ f"TOP 商品集中在 {', '.join([n for n in top_names if n]) if top_names else '—'}。"
+ f"今有 {len(price_gaps)} 件競品價差警示與 {len(inv_alerts)} 件庫存異常待處理。"
+ "(AI 洞察生成暫時不可用,已回退至規則性摘要。)"
+ )
+
+
+def _render_daily_template_v2(context: Dict[str, Any]) -> str:
+ """以 Jinja2 渲染 daily_report_v2.j2;缺欄位優雅降級為「—」。
+
+ 將 Jinja Environment 集中於此,方便 unit test 直接呼叫無需 Flask app context。
+ """
+ from jinja2 import Environment, FileSystemLoader, Undefined
+
+ template_dir = os.path.join(
+ os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+ 'templates',
+ )
+
+ class _SafeUndefined(Undefined):
+ """缺欄位回 '—' 而非 raise UndefinedError,符合「優雅降級」需求。"""
+ def __str__(self) -> str:
+ return "—"
+ def __html__(self) -> str:
+ return "—"
+ def __getattr__(self, name: str):
+ return _SafeUndefined()
+
+ env = Environment(
+ loader=FileSystemLoader(template_dir),
+ undefined=_SafeUndefined,
+ autoescape=False, # 日報純文字 + emoji,無 HTML 注入面
+ trim_blocks=True,
+ lstrip_blocks=True,
+ )
+
+ def _fmt_currency(v: Any) -> str:
+ try:
+ # 容錯:Undefined / None / 空字串 → 降級為 —
+ if v is None or isinstance(v, Undefined) or v == "":
+ return "—"
+ return f"NT${float(v):,.0f}"
+ except (TypeError, ValueError):
+ return "—"
+
+ def _fmt_pct(v: Any) -> str:
+ try:
+ if v is None or isinstance(v, Undefined) or v == "":
+ return "—"
+ return f"{float(v):+.1f}%"
+ except (TypeError, ValueError):
+ return "—"
+
+ env.filters['format_currency'] = _fmt_currency
+ env.filters['format_pct'] = _fmt_pct
+
+ template = env.get_template('daily_report_v2.j2')
+ return template.render(**context)
+
+
+def _generate_daily_report_hermes_template() -> dict:
+ """Hermes 模板模式日報 — 結構化 KPI + 200 字 Gemini 洞察 + 規則行動清單。
+
+ 流程:
+ 1. 取昨日業績 stale gate(與 legacy 同邏輯)
+ 2. _compute_daily_kpi 純 SQL 算齊所有結構化 KPI
+ 3. _compute_gemini_insight 取 200 字洞察(精簡 prompt)
+ 4. Jinja2 渲染 daily_report_v2.j2
+ 5. 持久化 ai_insights(type='daily_report',metadata.mode='hermes_template')
+ 6. Telegram 推播(沿用 legacy 圖表生成)
+
+ 回傳合約與 legacy 完全一致。
+ """
+ now = datetime.now()
+ yesterday = now - timedelta(days=1)
+ period = yesterday.strftime("%Y年%m月%d日")
+ target_date = yesterday.date()
+ weekday_map = ['週一', '週二', '週三', '週四', '週五', '週六', '週日']
+ weekday = weekday_map[target_date.weekday()]
+ logger.info("[OpenClaw] 日報任務啟動(Hermes 模板模式)period=%s", period)
+
+ # ── Step 1:stale gate(與 legacy 對齊)─────────────────────────────────
+ sales = _fetch_sales_summary(7)
+ if sales.get("stale"):
+ last_date = str(sales.get("last_date"))
+ logger.warning(
+ "[OpenClaw] 日報任務跳過(模板模式):daily_sales_snapshot 已停更 last_date=%s period=%s",
+ last_date, period,
+ )
+ _send_data_stale_alert(report_type="daily_report", last_date=last_date, period=period)
+ return {
+ "status": "skipped",
+ "report_type": "daily_report",
+ "reason": "data_stale",
+ "last_date": last_date,
+ "period": period,
+ }
+
+ # ── Step 2:純 SQL 算 KPI ─────────────────────────────────────────────
+ kpi = _compute_daily_kpi(target_date)
+
+ # ── Step 3:Gemini 寫 200 字洞察(caller 細分)──────────────────────────
+ insight_text = _compute_gemini_insight(kpi, period)
+
+ # ── Step 4:Jinja2 渲染 ───────────────────────────────────────────────
+ report_content = _render_daily_template_v2({
+ "today": period,
+ "weekday": weekday,
+ "revenue": kpi["revenue"],
+ "orders": kpi["orders"],
+ "top_skus": kpi["top_skus"],
+ "price_gaps": kpi["price_gaps"],
+ "inventory_alerts": kpi["inventory_alerts"],
+ "priority_actions": kpi["priority_actions"],
+ "gemini_insight": insight_text,
+ })
+
+ # ── Step 5:圖表(沿用 legacy,非阻塞)─────────────────────────────────
+ charts: List[tuple] = []
+ try:
+ from services.chart_generator_service import (
+ revenue_trend_chart,
+ price_gap_bar_chart,
+ )
+ rev_chart = revenue_trend_chart(7, "近7日")
+ if rev_chart:
+ charts.append(("revenue_7d.png", rev_chart, "📈 近7日營收趨勢"))
+ if kpi["price_gaps"]:
+ gap_chart = price_gap_bar_chart(
+ [{"sku": a["sku"], "gap_pct": a["gap_pct"]} for a in kpi["price_gaps"]],
+ "競品價差警示(TOP 5)",
+ )
+ if gap_chart:
+ charts.append(("price_gap.png", gap_chart, "⚠️ 競品價差分析"))
+ except Exception as e:
+ logger.warning("[OpenClaw] 日報圖表生成失敗(非阻塞): %s", e)
+
+ # ── Step 6:持久化 ──────────────────────────────────────────────────────
+ metadata = {
+ "period": period,
+ "model": STRATEGY_MODEL,
+ "mode": "hermes_template", # ← 區分新舊模式關鍵欄位
+ "template_version": "daily_report_v2",
+ "today_revenue": kpi["revenue"].get("today", 0),
+ "dod_pct": kpi["revenue"].get("dod_pct", 0),
+ "wow_pct": kpi["revenue"].get("wow_pct", 0),
+ "top_sku_count": len(kpi["top_skus"]),
+ "price_gap_count": len(kpi["price_gaps"]),
+ "inventory_alert_count": len(kpi["inventory_alerts"]),
+ "priority_action_count": len(kpi["priority_actions"]),
+ "chart_count": len(charts),
+ "generated_at": now.isoformat(),
+ }
+ insight_id = _save_to_ai_insights(
+ insight_type="daily_report",
+ content=report_content,
+ confidence=0.85,
+ metadata=metadata,
+ period=target_date.strftime("%Y-%m-%d"),
+ )
+
+ action_items = list(kpi["priority_actions"])
+ _save_action_items(action_items, insight_id)
+
+ # ── Step 7:Telegram 推播 ──────────────────────────────────────────────
+ try:
+ from services.telegram_templates import daily_report_header
+ header = daily_report_header(
+ date_str=period,
+ revenue=kpi["revenue"].get("today", 0),
+ wow=kpi["revenue"].get("wow_pct", 0),
+ threat_count=len(kpi["price_gaps"]),
+ opportunity_count=0,
+ )
+ _push_report_with_charts(header, report_content, charts, "日報(模板模式)")
+ except Exception as e:
+ logger.error("[OpenClaw] 日報 header 組裝失敗(模板模式): %s", e)
+
+ logger.info(
+ "[OpenClaw] 日報完成(Hermes 模板模式)insight_id=%s charts=%d actions=%d",
+ insight_id, len(charts), len(action_items),
+ )
+ return {
+ "status": "ok",
+ "report_type": "daily_report",
+ "insight_id": insight_id,
+ "period": period,
+ "chart_count": len(charts),
+ "action_count": len(action_items),
+ "mode": "hermes_template",
+ }
diff --git a/templates/daily_report_v2.j2 b/templates/daily_report_v2.j2
new file mode 100644
index 0000000..65d4464
--- /dev/null
+++ b/templates/daily_report_v2.j2
@@ -0,0 +1,63 @@
+📊 momo 日報 {{ today }} ({{ weekday }})
+═══════════════════════════════════════
+
+## 📈 營收 KPI
+
+| 指標 | 今日 | vs 昨日 | vs 7日均 |
+|------|------|---------|----------|
+| 營收 | {{ revenue.today | format_currency }} | {{ revenue.dod_pct | format_pct }} | {{ revenue.wow_pct | format_pct }} |
+| 訂單筆數 | {{ orders.today_rows | default('—') }} | — | — |
+| 上架 SKU | {{ orders.today_sku | default('—') }} | — | — |
+| 平均客單 | {{ orders.avg_value_today | format_currency }} | — | — |
+
+{% if revenue.today == 0 %}
+⚠️ 今日營收為零,請檢查資料管線是否正常。
+{% endif %}
+
+## 🏆 TOP {{ top_skus | length }} 熱銷商品
+
+{% if top_skus %}
+{% for sku in top_skus %}
+{{ loop.index }}. **{{ sku.name | default('—') }}**
+ 數量:{{ sku.qty | default('—') }} 件 | 營收:{{ sku.revenue | format_currency }}
+{% endfor %}
+{% else %}
+(今日無熱銷資料)
+{% endif %}
+
+## 🔍 競品價差警示
+
+{% if price_gaps %}
+{% for alert in price_gaps %}
+- ⚠️ **{{ alert.sku_name | default(alert.sku) | default('—') }}**:我方 {{ alert.momo_price | format_currency }} vs {{ alert.competitor | default('競品') }} {{ alert.comp_price | format_currency }} (價差 {{ alert.gap_pct | format_pct }})
+{% endfor %}
+{% else %}
+✅ 暫無重大價差警示
+{% endif %}
+
+## 📦 庫存異常
+
+{% if inventory_alerts %}
+{% for alert in inventory_alerts %}
+- 🟡 {{ alert.sku_name | default(alert.sku) | default('—') }}:{{ alert.reason | default('—') }}
+{% endfor %}
+{% else %}
+✅ 庫存狀態正常
+{% endif %}
+
+## 💡 今日洞察 (AI 分析)
+
+{{ gemini_insight | default('(本日洞察生成失敗,請查 logger)') }}
+
+## ✅ 48h 優先事項
+
+{% if priority_actions %}
+{% for action in priority_actions %}
+{{ loop.index }}. {{ action }}
+{% endfor %}
+{% else %}
+(暫無自動產生的行動建議)
+{% endif %}
+
+═══════════════════════════════════════
+🤖 Operation Ollama-First v5.0 / daily_report_v2 (Hermes 模板模式)
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..bd86e3b
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,15 @@
+import os
+
+import pytest
+
+
+@pytest.fixture
+def host():
+ """Provide a default SMTP/IMAP host for non-parametric email probe tests."""
+ return os.getenv("MOOD_TEST_MAIL_HOST", "ms1.pchome.tw")
+
+
+@pytest.fixture
+def port():
+ """Provide a default SMTP/IMAP port used by probe tests."""
+ return int(os.getenv("MOOD_TEST_MAIL_PORT", "587"))
diff --git a/tests/test_nemotron_qwen3_compat.py b/tests/test_nemotron_qwen3_compat.py
new file mode 100644
index 0000000..9c7dad3
--- /dev/null
+++ b/tests/test_nemotron_qwen3_compat.py
@@ -0,0 +1,456 @@
+"""
+test_nemotron_qwen3_compat.py
+─────────────────────────────────────────────────────────────────
+Operation Ollama-First v5.0 / Phase 3 / A9 — Nemotron qwen3 切換相容性測試
+
+驗證面:
+ T1. qwen3 chat 回應 OpenAI tool_calls 結構 → _parse_tool_calls_struct 正確
+ T2. qwen3 沒回 tool_calls → _parse_content_fallback 正確(與 NIM 同等容錯)
+ T3. qwen3 同時回 tool_calls + content → 優先採用 tool_calls
+ T4. qwen3 連線失敗 → 不丟例外給上游,自動 fallback NIM 路徑
+ T5. qwen3 + NIM 都失敗 → ADR-004 走 Hermes 規則引擎降級(含「🟡 [規則引擎]」標記)
+ T6. NEMOTRON_OLLAMA_FIRST 預設 false → 完全不呼叫 qwen3(戰前行為)
+
+紀律:
+ - 所有 HTTP 互動 mock,不實際呼叫 GCP Ollama 或 NIM
+ - 與 test_nemotron_fallback 共存,使用同款 FakeThreat
+ - assert log_ai_call 路徑可被 monkeypatch 旁路(不污染 ai_calls 表)
+"""
+from dataclasses import dataclass
+from contextlib import contextmanager
+import pytest
+
+
+# ─────────────────────────────────────────────────────────────
+# Fixtures
+# ─────────────────────────────────────────────────────────────
+@dataclass
+class FakeThreat:
+ sku: str = "SKU-Q1"
+ name: str = "qwen3 測試品"
+ momo_price: float = 1200.0
+ pchome_price: float = 980.0
+ gap_pct: float = 22.4
+ sales_7d_delta_pct: float = -35.0
+ risk: str = "HIGH"
+ recommended_action: str = "建議跟進降價"
+ confidence: float = 0.85
+ sales_7d_curr_amount: float = 78000.0
+ sales_7d_prev_amount: float = 120000.0
+
+
+class _FakeResp:
+ def __init__(self, payload: dict, status: int = 200):
+ self._payload = payload
+ self.status_code = status
+
+ def raise_for_status(self):
+ if self.status_code >= 400:
+ import requests
+ raise requests.HTTPError(f"HTTP {self.status_code}", response=self)
+
+ def json(self):
+ return self._payload
+
+
+@contextmanager
+def _noop_log_ai_call(*args, **kwargs):
+ """Mock log_ai_call context manager — 不寫 ai_calls 表,回傳具備所需 setter 的 stub"""
+ class _Ctx:
+ def set_tokens(self, **_kw): pass
+ def set_error(self, *_a, **_kw): pass
+ def fallback_to_caller(self, *_a, **_kw): pass
+ def set_cache_hit(self, *_a, **_kw): pass
+ def add_meta(self, *_a, **_kw): pass
+ yield _Ctx()
+
+
+@pytest.fixture(autouse=True)
+def _reset_global_state():
+ """test 互相污染防線:每個 test 前後清 _ALERT_CACHE + ollama unhealthy marks。
+
+ 根因:dispatch() line 1328 _is_duplicate_alert 用 module-level _ALERT_CACHE,
+ 第一個 test 跑完留 "SKU-Q1" 在 cache,後續 test 命中去重 → dispatched=0 失敗。
+ """
+ import services.nemoton_dispatcher_service as _nem
+ import services.ollama_service as _oss
+ _nem._ALERT_CACHE.clear()
+ _oss._unhealthy_marks.clear()
+ _oss._resolved_host_cache['host'] = None
+ _oss._resolved_host_cache['ts'] = 0
+ yield
+ _nem._ALERT_CACHE.clear()
+ _oss._unhealthy_marks.clear()
+ _oss._resolved_host_cache['host'] = None
+ _oss._resolved_host_cache['ts'] = 0
+
+
+def _patch_execution_methods(monkeypatch, dispatcher):
+ """攔截實際 Telegram/DB 寫入,記錄被呼叫的 tool 名稱與 args(與 fallback test 共用 pattern)"""
+ calls = []
+
+ def record(kind):
+ def _inner(*args, **kwargs):
+ calls.append({"kind": kind, "args": args, "kwargs": kwargs})
+ return _inner
+
+ monkeypatch.setattr(dispatcher, "_exec_trigger_price_alert", record("price_alert"))
+ monkeypatch.setattr(dispatcher, "_exec_add_to_recommendation", record("recommendation"))
+ monkeypatch.setattr(dispatcher, "_exec_flag_for_human_review", record("human_review"))
+ return calls
+
+
+def _enable_qwen3_path(monkeypatch, module):
+ """打開 NEMOTRON_OLLAMA_FIRST + 旁路 mcp/log_ai_call/resolve_host 等副作用"""
+ monkeypatch.setattr(module, "NEMOTRON_OLLAMA_FIRST", True)
+ monkeypatch.setattr(module, "log_ai_call", _noop_log_ai_call)
+ monkeypatch.setattr(module, "build_mcp_context", lambda: "MCP-MOCK")
+ # 確保即使未被呼叫,import 路徑可解析
+ import services.ollama_service as ollama_module
+ monkeypatch.setattr(ollama_module, "resolve_ollama_host", lambda: "http://gcp-mock:11434")
+ monkeypatch.setattr(ollama_module, "mark_unhealthy", lambda *a, **kw: None)
+
+
+# ─────────────────────────────────────────────────────────────
+# T1. qwen3 OpenAI tool_calls 結構 → 正確解析
+# ─────────────────────────────────────────────────────────────
+def test_qwen3_tool_calls_struct_parsed_and_executed(monkeypatch):
+ """qwen3 回標準 OpenAI tool_calls 結構,dispatcher 應跳過 NIM 直接走工具執行"""
+ import services.nemoton_dispatcher_service as module
+
+ _enable_qwen3_path(monkeypatch, module)
+
+ # mock GCP Ollama /api/chat 回 OpenAI 兼容結構
+ fake_body = {
+ "message": {
+ "role": "assistant",
+ "content": "",
+ "tool_calls": [
+ {
+ "function": {
+ "name": "trigger_price_alert",
+ "arguments": {
+ "sku": "SKU-Q1",
+ "name": "qwen3 測試品",
+ "gap_pct": 22.4,
+ "sales_delta": -35.0,
+ "action": "跟進降價至 $980",
+ "confidence": 0.85,
+ },
+ }
+ }
+ ],
+ },
+ "prompt_eval_count": 320,
+ "eval_count": 64,
+ "done": True,
+ }
+ monkeypatch.setattr(
+ module.requests, "post", lambda *a, **kw: _FakeResp(fake_body)
+ )
+
+ dispatcher = module.NemotronDispatcher()
+ calls = _patch_execution_methods(monkeypatch, dispatcher)
+ # NIM 路徑必須完全沒被觸發(驗證 qwen3 確實是主路徑)
+ nim_called = {"v": False}
+
+ def _nim_should_not_be_called(*a, **kw):
+ nim_called["v"] = True
+ raise AssertionError("NIM 不應被呼叫,qwen3 已成功")
+
+ monkeypatch.setattr(dispatcher, "_call_nim", _nim_should_not_be_called)
+
+ threats = [FakeThreat()]
+ result = dispatcher.dispatch(threats, hermes_stats={"duration_sec": 1.0})
+
+ assert nim_called["v"] is False, "qwen3 成功時 NIM 不可被觸發"
+ assert result["dispatched"] == 1
+ assert result["nim_stats"].get("provider") == "gcp_ollama"
+ assert result["nim_stats"].get("model") == module.NEMOTRON_OLLAMA_MODEL
+ assert calls and calls[0]["kind"] == "price_alert"
+
+
+# ─────────────────────────────────────────────────────────────
+# T2. qwen3 沒回 tool_calls 但 content 含 JSON list → fallback 解析
+# ─────────────────────────────────────────────────────────────
+def test_qwen3_content_only_fallback_parsing(monkeypatch):
+ """qwen3 把工具呼叫塞在 content(list[dict])→ _parse_content_fallback 應接住"""
+ import services.nemoton_dispatcher_service as module
+
+ _enable_qwen3_path(monkeypatch, module)
+
+ content_payload = (
+ '[{"name": "flag_for_human_review", '
+ '"parameters": {"sku": "SKU-Q1", "name": "qwen3 測試品", '
+ '"concern": "信心不足", "confidence": 0.45}}]'
+ )
+ fake_body = {
+ "message": {"role": "assistant", "content": content_payload, "tool_calls": []},
+ "prompt_eval_count": 100,
+ "eval_count": 30,
+ }
+ monkeypatch.setattr(module.requests, "post", lambda *a, **kw: _FakeResp(fake_body))
+
+ dispatcher = module.NemotronDispatcher()
+ calls = _patch_execution_methods(monkeypatch, dispatcher)
+ monkeypatch.setattr(
+ dispatcher, "_call_nim",
+ lambda threats: (_ for _ in ()).throw(AssertionError("NIM 不應被呼叫")),
+ )
+
+ result = dispatcher.dispatch([FakeThreat(confidence=0.45)], hermes_stats={"duration_sec": 1.0})
+
+ assert result["dispatched"] == 1
+ assert calls and calls[0]["kind"] == "human_review"
+
+
+# ─────────────────────────────────────────────────────────────
+# T3. tool_calls + content 同時存在 → 優先 tool_calls
+# ─────────────────────────────────────────────────────────────
+def test_qwen3_tool_calls_takes_precedence_over_content(monkeypatch):
+ import services.nemoton_dispatcher_service as module
+
+ _enable_qwen3_path(monkeypatch, module)
+
+ fake_body = {
+ "message": {
+ "role": "assistant",
+ "content": '[{"name": "flag_for_human_review", "parameters": {"sku": "X"}}]',
+ "tool_calls": [
+ {
+ "function": {
+ "name": "trigger_price_alert",
+ "arguments": {
+ "sku": "SKU-Q1",
+ "name": "qwen3 測試品",
+ "gap_pct": 22.4,
+ "sales_delta": -35.0,
+ "action": "降價",
+ "confidence": 0.85,
+ },
+ }
+ }
+ ],
+ },
+ "prompt_eval_count": 200,
+ "eval_count": 40,
+ }
+ monkeypatch.setattr(module.requests, "post", lambda *a, **kw: _FakeResp(fake_body))
+
+ dispatcher = module.NemotronDispatcher()
+ calls = _patch_execution_methods(monkeypatch, dispatcher)
+ result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})
+
+ assert result["dispatched"] == 1
+ assert calls[0]["kind"] == "price_alert", "tool_calls 結構必須優先於 content fallback"
+
+
+# ─────────────────────────────────────────────────────────────
+# T4. qwen3 連線失敗 → 不爆,自動 fallback 到 NIM
+# ─────────────────────────────────────────────────────────────
+def test_qwen3_connection_error_falls_back_to_nim(monkeypatch):
+ """GCP Ollama 連不上時,dispatcher 應靜默改走 NIM,最終仍能 dispatch"""
+ import requests
+ import services.nemoton_dispatcher_service as module
+
+ _enable_qwen3_path(monkeypatch, module)
+
+ def _boom(*a, **kw):
+ raise requests.ConnectionError("GCP unreachable")
+
+ monkeypatch.setattr(module.requests, "post", _boom)
+
+ # NIM 路徑:給 valid key + quota,且 mock _call_nim 回傳 1 個 tool_call
+ monkeypatch.setattr(module, "NIM_API_KEY", "fake-key")
+ monkeypatch.setattr(module, "_check_nim_quota", lambda: True)
+
+ dispatcher = module.NemotronDispatcher()
+ calls = _patch_execution_methods(monkeypatch, dispatcher)
+ nim_invoked = {"v": False}
+
+ def _fake_nim(threats):
+ nim_invoked["v"] = True
+ return (
+ [{
+ "tool": "trigger_price_alert",
+ "args": {
+ "sku": "SKU-Q1", "name": "qwen3 測試品",
+ "gap_pct": 22.4, "sales_delta": -35.0,
+ "action": "降價", "confidence": 0.85,
+ },
+ }],
+ {"total_tokens": 256, "quota_used": 5},
+ )
+
+ monkeypatch.setattr(dispatcher, "_call_nim", _fake_nim)
+
+ result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})
+
+ assert nim_invoked["v"] is True, "qwen3 失敗後必須 fallback 至 NIM"
+ assert result["dispatched"] == 1
+ assert result["nim_stats"].get("total_tokens") == 256
+ assert calls[0]["kind"] == "price_alert"
+
+
+# ─────────────────────────────────────────────────────────────
+# T5. qwen3 + NIM 全失敗 → ADR-004 Hermes 規則引擎兜底
+# ─────────────────────────────────────────────────────────────
+def test_qwen3_and_nim_both_fail_falls_back_to_hermes_rules(monkeypatch):
+ """雙路全爆時必須走 Hermes 規則引擎,並保留 🟡 [規則引擎] 標記"""
+ import requests
+ import services.nemoton_dispatcher_service as module
+
+ _enable_qwen3_path(monkeypatch, module)
+ monkeypatch.setattr(module.requests, "post",
+ lambda *a, **kw: (_ for _ in ()).throw(requests.ConnectionError("qwen3 down")))
+
+ monkeypatch.setattr(module, "NIM_API_KEY", "fake-key")
+ monkeypatch.setattr(module, "_check_nim_quota", lambda: True)
+
+ dispatcher = module.NemotronDispatcher()
+
+ # 攔 _call_nim 也擲 timeout
+ monkeypatch.setattr(
+ dispatcher, "_call_nim",
+ lambda threats: (_ for _ in ()).throw(requests.Timeout("NIM timeout")),
+ )
+
+ # 攔住規則引擎內部呼叫的 _exec_*,記錄 concern / reason 文字驗證 🟡 標記
+ # 規則引擎部分 _exec_* 用 positional args(line 787-795 _exec_trigger_price_alert
+ # 簽名: sku, name, gap_pct, sales_delta, action, confidence, ...),
+ # record helper 必須把 positional 與 keyword 合併才能 .get('action')。
+ captured = []
+
+ def _merge_positional(name_order, args, kwargs):
+ merged = dict(kwargs)
+ for i, val in enumerate(args):
+ if i < len(name_order):
+ merged.setdefault(name_order[i], val)
+ return merged
+
+ def record_review(*args, **kwargs):
+ merged = _merge_positional(
+ ['sku', 'name', 'concern', 'confidence', 'footprint',
+ 'momo_price', 'comp_price', 'gap_pct', 'sales_delta',
+ 'revenue_loss_7d', 'recommended_price'],
+ args, kwargs)
+ captured.append(("human_review", merged))
+
+ def record_alert(*args, **kwargs):
+ merged = _merge_positional(
+ ['sku', 'name', 'gap_pct', 'sales_delta', 'action', 'confidence',
+ 'momo_price', 'comp_price', 'footprint',
+ 'revenue_loss_7d', 'recommended_price'],
+ args, kwargs)
+ captured.append(("price_alert", merged))
+
+ def record_reco(*args, **kwargs):
+ captured.append(("recommendation", kwargs))
+
+ monkeypatch.setattr(dispatcher, "_exec_flag_for_human_review", record_review)
+ monkeypatch.setattr(dispatcher, "_exec_trigger_price_alert", record_alert)
+ monkeypatch.setattr(dispatcher, "_exec_add_to_recommendation", record_reco)
+
+ # gap_pct=22.4 + risk=HIGH → 規則 2:trigger_price_alert,action 應有 🟡 [規則引擎] 前綴
+ result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})
+
+ assert result["nim_stats"].get("degraded") is True, "ADR-004 降級旗標必須存在"
+ assert captured, "規則引擎必須兜底執行至少一次"
+ kind, kwargs = captured[0]
+ assert kind == "price_alert"
+ assert "🟡 [規則引擎]" in kwargs.get("action", ""), \
+ "ADR-004 鐵律:Hermes 規則引擎兜底時必須帶『🟡 [規則引擎]』標記"
+ # footprint 也應帶 🟡 [降級模式 ADR-004] 標記(給 Telegram 告警頭顯示)
+ assert "🟡 [降級模式 ADR-004]" in kwargs.get("footprint", "")
+
+
+# ─────────────────────────────────────────────────────────────
+# T6. feature flag 預設 false → 戰前行為,qwen3 完全不被呼叫
+# ─────────────────────────────────────────────────────────────
+def test_flag_default_false_preserves_pre_war_behavior(monkeypatch):
+ """NEMOTRON_OLLAMA_FIRST 預設 false 時:dispatch 不應觸碰 GCP Ollama,
+ nim_stats 不可帶 provider='gcp_ollama'。"""
+ import services.nemoton_dispatcher_service as module
+
+ # 不打開 flag(預設值),但安全起見明確 set false
+ monkeypatch.setattr(module, "NEMOTRON_OLLAMA_FIRST", False)
+
+ # 任何呼叫 requests.post 都視為錯誤(戰前 NIM 路徑會被 _call_nim mock 接走)
+ qwen3_post_called = {"v": False}
+
+ def _maybe_post(*a, **kw):
+ qwen3_post_called["v"] = True
+ raise AssertionError("flag=false 時不可呼叫 GCP Ollama HTTP")
+
+ monkeypatch.setattr(module.requests, "post", _maybe_post)
+
+ monkeypatch.setattr(module, "NIM_API_KEY", "fake-key")
+ monkeypatch.setattr(module, "_check_nim_quota", lambda: True)
+
+ dispatcher = module.NemotronDispatcher()
+ calls = _patch_execution_methods(monkeypatch, dispatcher)
+ monkeypatch.setattr(
+ dispatcher, "_call_nim",
+ lambda threats: (
+ [{
+ "tool": "trigger_price_alert",
+ "args": {
+ "sku": "SKU-Q1", "name": "qwen3 測試品",
+ "gap_pct": 22.4, "sales_delta": -35.0,
+ "action": "降價", "confidence": 0.85,
+ },
+ }],
+ {"total_tokens": 100, "quota_used": 1},
+ ),
+ )
+
+ result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})
+
+ assert qwen3_post_called["v"] is False
+ assert result["dispatched"] == 1
+ assert result["nim_stats"].get("provider") in (None, "nim"), \
+ "flag=false 時 nim_stats 不應帶 provider='gcp_ollama'"
+
+
+# ─────────────────────────────────────────────────────────────
+# T7. 共用 helper 純單元測試(OpenAI tool_calls schema 邊界)
+# ─────────────────────────────────────────────────────────────
+def test_parse_tool_calls_struct_handles_string_arguments():
+ """NIM 回 arguments 是 JSON 字串、qwen3 回 dict — 兩者都得接住"""
+ from services.nemoton_dispatcher_service import _parse_tool_calls_struct
+
+ # NIM 風格(arguments 是 JSON 字串)
+ nim_style = [{"function": {"name": "foo", "arguments": '{"a": 1, "b": "x"}'}}]
+ out_nim = _parse_tool_calls_struct(nim_style)
+ assert out_nim == [{"tool": "foo", "args": {"a": 1, "b": "x"}}]
+
+ # qwen3/Ollama 風格(arguments 已是 dict)
+ qwen_style = [{"function": {"name": "bar", "arguments": {"a": 2}}}]
+ out_qwen = _parse_tool_calls_struct(qwen_style)
+ assert out_qwen == [{"tool": "bar", "args": {"a": 2}}]
+
+ # 邊界:空 / 壞 JSON / 缺 name → 不爆,回空或忽略
+ assert _parse_tool_calls_struct([]) == []
+ assert _parse_tool_calls_struct(None) == []
+ bad = [{"function": {"name": "baz", "arguments": "{not json"}}]
+ out_bad = _parse_tool_calls_struct(bad)
+ assert out_bad == [{"tool": "baz", "args": {}}]
+ no_name = [{"function": {"arguments": "{}"}}]
+ assert _parse_tool_calls_struct(no_name) == []
+
+
+def test_parse_content_fallback_handles_various_shapes():
+ from services.nemoton_dispatcher_service import _parse_content_fallback
+
+ # OpenAI 老風格 [{"name", "parameters"}]
+ out1 = _parse_content_fallback('[{"name": "foo", "parameters": {"a": 1}}]')
+ assert out1 == [{"tool": "foo", "args": {"a": 1}}]
+
+ # 帶 function 嵌套
+ out2 = _parse_content_fallback('[{"function": {"name": "bar"}, "arguments": "{\\"b\\": 2}"}]')
+ assert out2 == [{"tool": "bar", "args": {"b": 2}}]
+
+ # 非 list / 非 JSON / 空字串 → []
+ assert _parse_content_fallback("") == []
+ assert _parse_content_fallback("not json") == []
+ assert _parse_content_fallback('{"a":1}') == []
diff --git a/tests/test_openclaw_daily_template.py b/tests/test_openclaw_daily_template.py
new file mode 100644
index 0000000..b03fa5e
--- /dev/null
+++ b/tests/test_openclaw_daily_template.py
@@ -0,0 +1,212 @@
+"""
+tests/test_openclaw_daily_template.py
+─────────────────────────────────────────────────────────────────
+Operation Ollama-First v5.0 / Phase 3 / A8 — 日報模板路由測試
+
+驗證面:
+ T1. flag=false(預設)→ 走 _legacy_full_gemini_daily_report(regression)
+ T2. flag=true → 走 _generate_daily_report_hermes_template
+ T3. _compute_daily_kpi 各 KPI 函數可獨立 mock 測(DB 失敗回安全預設)
+ T4. _render_daily_template_v2 缺欄位優雅降級(_SafeUndefined 不 raise)
+ T5. _SafeUndefined 對 'X.Y.Z' 巢狀存取不爆
+
+紀律:
+ - 不打真實 DB / Gemini API
+ - 不寫 ai_insights
+ - 不發 Telegram
+"""
+
+import os
+from datetime import date, datetime
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# Fixtures
+# ═══════════════════════════════════════════════════════════════════════════
+
+@pytest.fixture(autouse=True)
+def _reset_flag(monkeypatch):
+ """每個 test 前清環境變數,避免互相污染"""
+ monkeypatch.delenv('OPENCLAW_DAILY_HERMES_TEMPLATE', raising=False)
+ yield
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# T1+T2 — Routing
+# ═══════════════════════════════════════════════════════════════════════════
+
+class TestRouting:
+
+ def test_flag_false_routes_to_legacy(self, monkeypatch):
+ """flag=false → _legacy_full_gemini_daily_report 被呼叫"""
+ monkeypatch.setenv('OPENCLAW_DAILY_HERMES_TEMPLATE', 'false')
+ import importlib
+ import services.openclaw_strategist_service as svc
+ importlib.reload(svc)
+
+ legacy_called = {'v': False}
+ hermes_called = {'v': False}
+
+ def mock_legacy():
+ legacy_called['v'] = True
+ return {'status': 'ok', 'mode': 'legacy'}
+
+ def mock_hermes():
+ hermes_called['v'] = True
+ return {'status': 'ok', 'mode': 'hermes_template'}
+
+ monkeypatch.setattr(svc, '_legacy_full_gemini_daily_report', mock_legacy)
+ monkeypatch.setattr(svc, '_generate_daily_report_hermes_template', mock_hermes)
+
+ svc.generate_daily_report()
+
+ assert legacy_called['v'] is True, "flag=false 必須走 legacy 路徑"
+ assert hermes_called['v'] is False, "flag=false 不可走 hermes 模板"
+
+ def test_flag_true_routes_to_hermes_template(self, monkeypatch):
+ """flag=true → _generate_daily_report_hermes_template 被呼叫"""
+ monkeypatch.setenv('OPENCLAW_DAILY_HERMES_TEMPLATE', 'true')
+ import importlib
+ import services.openclaw_strategist_service as svc
+ importlib.reload(svc)
+
+ legacy_called = {'v': False}
+ hermes_called = {'v': False}
+
+ monkeypatch.setattr(svc, '_legacy_full_gemini_daily_report',
+ lambda: legacy_called.update(v=True) or {'status': 'ok'})
+ monkeypatch.setattr(svc, '_generate_daily_report_hermes_template',
+ lambda: hermes_called.update(v=True) or {'status': 'ok'})
+
+ svc.generate_daily_report()
+
+ assert hermes_called['v'] is True, "flag=true 必須走 hermes 模板路徑"
+ assert legacy_called['v'] is False, "flag=true 不可走 legacy"
+
+ def test_flag_default_is_false(self, monkeypatch):
+ """無 env 設定時 → 預設 false(戰前行為)"""
+ # 不 set env
+ import importlib
+ import services.openclaw_strategist_service as svc
+ importlib.reload(svc)
+
+ assert svc._daily_hermes_template_enabled() is False
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# T3 — KPI 計算(DB 失敗安全降級)
+# ═══════════════════════════════════════════════════════════════════════════
+
+class TestKPIComputation:
+
+ def test_compute_daily_kpi_invalid_date_raises(self):
+ import services.openclaw_strategist_service as svc
+ with pytest.raises(TypeError):
+ svc._compute_daily_kpi("not-a-date")
+
+ def test_revenue_kpi_returns_safe_default_on_db_error(self, monkeypatch):
+ """DB 異常時 _query_revenue_kpi 回零(不拋 exception)"""
+ import services.openclaw_strategist_service as svc
+
+ class _BrokenSession:
+ def execute(self, *a, **kw):
+ raise RuntimeError('DB connection lost')
+ def close(self):
+ pass
+
+ monkeypatch.setattr(svc, 'get_session', lambda: _BrokenSession())
+
+ result = svc._query_revenue_kpi(date(2026, 5, 3))
+
+ assert result['today'] == 0.0
+ assert result['dod_pct'] == 0.0
+ assert result['wow_pct'] == 0.0
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# T4+T5 — Template 渲染與缺欄位優雅降級
+# ═══════════════════════════════════════════════════════════════════════════
+
+class TestTemplateRendering:
+
+ def test_render_with_full_context_succeeds(self):
+ import services.openclaw_strategist_service as svc
+
+ context = {
+ 'today': '2026年05月02日',
+ 'weekday': '週五',
+ 'revenue': {
+ 'today': 1234567.0,
+ 'yesterday': 1100000.0,
+ 'avg_7d': 1050000.0,
+ 'dod_pct': 12.2,
+ 'wow_pct': 17.6,
+ },
+ 'orders': {
+ 'today_rows': 234,
+ 'today_sku': 187,
+ 'avg_value_today': 5276.0,
+ },
+ 'top_skus': [
+ {'name': 'SKU-A', 'qty': 50, 'revenue': 100000},
+ {'name': 'SKU-B', 'qty': 32, 'revenue': 80000},
+ ],
+ 'price_gaps': [
+ {'sku_name': '商品X', 'momo_price': 1200, 'comp_price': 980,
+ 'gap_pct': 22.4, 'competitor': 'PChome'},
+ ],
+ 'inventory_alerts': [],
+ 'priority_actions': ['對 SKU-A 啟動 EA 流程', '觀察 PChome 補貼'],
+ 'gemini_insight': '今日營收強勁成長,建議加碼家電促銷檔期。',
+ }
+
+ rendered = svc._render_daily_template_v2(context)
+
+ assert '2026年05月02日' in rendered
+ assert '週五' in rendered
+ assert 'NT$1,234,567' in rendered
+ assert 'SKU-A' in rendered
+ assert '商品X' in rendered
+ assert 'PChome' in rendered
+ assert '今日營收強勁成長' in rendered
+
+ def test_render_with_missing_fields_does_not_raise(self):
+ """_SafeUndefined 對缺欄位回 — 不拋 UndefinedError"""
+ import services.openclaw_strategist_service as svc
+
+ context = {
+ 'today': '2026年05月02日',
+ 'weekday': '週五',
+ 'revenue': {'today': 0.0, 'dod_pct': 0.0, 'wow_pct': 0.0},
+ 'orders': {}, # 空 dict
+ 'top_skus': [],
+ 'price_gaps': [],
+ 'inventory_alerts': [],
+ 'priority_actions': [],
+ 'gemini_insight': '',
+ }
+
+ # 不 raise 即過
+ rendered = svc._render_daily_template_v2(context)
+
+ assert isinstance(rendered, str)
+ assert len(rendered) > 0
+ # 缺欄位該降級為 — 或預設值
+ assert '今日無熱銷資料' in rendered or '✅' in rendered
+
+ def test_safe_undefined_nested_access(self):
+ """_SafeUndefined 對 'X.Y.Z' 巢狀存取不爆"""
+ import services.openclaw_strategist_service as svc
+
+ # 完全無 'revenue' 也不該 raise
+ context = {
+ 'today': '2026年05月02日',
+ 'weekday': '週五',
+ # 故意省略 revenue / orders / top_skus 等
+ }
+ rendered = svc._render_daily_template_v2(context)
+ assert isinstance(rendered, str)
+ assert '2026年05月02日' in rendered
diff --git a/tests/test_openclaw_qa_golden_set.py b/tests/test_openclaw_qa_golden_set.py
new file mode 100644
index 0000000..b6e7962
--- /dev/null
+++ b/tests/test_openclaw_qa_golden_set.py
@@ -0,0 +1,286 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+tests/test_openclaw_qa_golden_set.py
+OpenClaw Q&A 黃金集 A/B 對照框架
+(Operation Ollama-First v5.0 — Phase 3, A7 fullstack-engineer)
+
+目的:
+ 在統帥盲測前,先建立 Ollama qwen3:14b vs Gemini 2.5 Flash 的「量化基線」。
+ 10 題典型 momo 商業 Q&A,雙模型各跑一次,比對:
+ - 簡體字污染數量(A2 黃燈警訊核心)
+ - 回應長度
+ - 結構性指標(行數、列點數)
+ - 拒答訊號
+ - 黃金關鍵字命中率(題目自帶 expect_keywords)
+
+執行:
+ RUN_GOLDEN_SET=1 pytest tests/test_openclaw_qa_golden_set.py -v -s
+ # GCP 還沒拉 qwen3:14b 之前,預設 SKIP(避免 CI 紅燈)
+
+紀律:
+ - PII 紀律:題目/答案無真實 chat_id / username / 身份證 / 手機,全部去識別化
+ - 不對「正確性」做 hard assert;本框架專做「品質量化基線」收集
+ - 報告印到 stdout(pytest -s 顯示),人工檢視,不卡 CI
+"""
+
+import json
+import os
+import sys
+import time
+from typing import Dict, List, Optional
+
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 啟用條件:須三條件齊備才實跑
+# 1. RUN_GOLDEN_SET=1
+# 2. OPENCLAW_QA_OLLAMA_HOST 可達
+# 3. GEMINI_API_KEY 已設
+# 否則 SKIP。
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _ollama_reachable(host: str, timeout: float = 2.0) -> bool:
+ try:
+ import requests
+ r = requests.get(f"{host.rstrip('/')}/api/version", timeout=timeout)
+ return r.status_code == 200
+ except Exception:
+ return False
+
+
+def _ollama_has_model(host: str, model: str, timeout: float = 3.0) -> bool:
+ """檢查 Ollama 主機是否已 pull 指定模型。"""
+ try:
+ import requests
+ r = requests.get(f"{host.rstrip('/')}/api/tags", timeout=timeout)
+ if r.status_code != 200:
+ return False
+ tags = r.json().get('models', [])
+ return any(m.get('name', '').startswith(model.split(':')[0]) for m in tags)
+ except Exception:
+ return False
+
+
+_RUN_GOLDEN = os.getenv('RUN_GOLDEN_SET', '0') == '1'
+_HOST = os.getenv('OPENCLAW_QA_OLLAMA_HOST', os.getenv('OLLAMA_HOST_PRIMARY', 'http://34.143.170.20:11434'))
+_MODEL = os.getenv('OPENCLAW_QA_OLLAMA_MODEL', 'qwen3:14b')
+_HAS_GEMINI = bool(os.getenv('GEMINI_API_KEY'))
+
+pytestmark = pytest.mark.skipif(
+ not _RUN_GOLDEN,
+ reason="黃金集需要 RUN_GOLDEN_SET=1 + GCP qwen3:14b ready + GEMINI_API_KEY;統帥盲測前才跑",
+)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 黃金集(10 題;全部去 PII;情境取自 momo-pro 真實 Telegram 互動模式)
+# ─────────────────────────────────────────────────────────────────────────────
+
+GOLDEN_SET: List[Dict] = [
+ {
+ "id": "g01_weekly_trend",
+ "question": "本週 momo 業績趨勢如何?跟上週比?",
+ "expect_keywords": ["業績", "週", "成長"],
+ "category": "業績趨勢",
+ },
+ {
+ "id": "g02_competitor_threat",
+ "question": "PChome 最近在 3C 類有發動補貼戰嗎?對我們影響?",
+ "expect_keywords": ["PChome", "3C"],
+ "category": "競品威脅",
+ },
+ {
+ "id": "g03_pricing_strategy",
+ "question": "我有一支 SKU 比競品貴 8%,銷量持續下滑,該怎麼辦?",
+ "expect_keywords": ["定價", "競品"],
+ "category": "定價策略",
+ },
+ {
+ "id": "g04_seasonal",
+ "question": "母親節檔期快到了,建議哪些品類加碼?",
+ "expect_keywords": ["母親節", "品類"],
+ "category": "季節機會",
+ },
+ {
+ "id": "g05_command_routing",
+ "question": "我想看完整週報怎麼下指令?",
+ "expect_keywords": ["weekly", "週報"],
+ "category": "指令導引",
+ },
+ {
+ "id": "g06_top_threats",
+ "question": "目前 TOP 5 最緊急的競價威脅是哪些?",
+ "expect_keywords": ["威脅", "TOP"],
+ "category": "威脅清單",
+ },
+ {
+ "id": "g07_inventory_signal",
+ "question": "如何判斷某 SKU 該促銷出清?",
+ "expect_keywords": ["促銷", "出清"],
+ "category": "庫存決策",
+ },
+ {
+ "id": "g08_cross_category",
+ "question": "家電 vs 生活雜貨,哪個品類本月成長動能比較強?",
+ "expect_keywords": ["家電", "成長"],
+ "category": "品類比較",
+ },
+ {
+ "id": "g09_data_unavailable",
+ "question": "幫我看 2030 年的銷售預測。",
+ "expect_keywords": ["資料", "無法"], # 期待模型誠實回應「資料不足」而非編造
+ "category": "資料邊界",
+ },
+ {
+ "id": "g10_action_item",
+ "question": "綜合本週數據,給我 3 個 48 小時內必做行動。",
+ "expect_keywords": ["行動", "建議"],
+ "category": "行動清單",
+ },
+]
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Scoring helpers
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _count_simplified(text: str) -> int:
+ """重用 strategist service 的簡體字 hint 集合計數。"""
+ from services.openclaw_strategist_service import _SIMPLIFIED_HINT_CHARS
+ return sum(1 for c in (text or '') if c in _SIMPLIFIED_HINT_CHARS)
+
+
+def _count_keyword_hits(text: str, keywords: List[str]) -> int:
+ if not text:
+ return 0
+ return sum(1 for kw in keywords if kw in text)
+
+
+def _is_refusal(text: str) -> bool:
+ from services.openclaw_strategist_service import _REFUSAL_PATTERNS
+ return any(p in (text or '') for p in _REFUSAL_PATTERNS)
+
+
+def _structure_score(text: str) -> Dict[str, int]:
+ """結構性量化指標。"""
+ if not text:
+ return {"lines": 0, "bullets": 0, "tables": 0}
+ return {
+ "lines": text.count('\n') + 1,
+ # 條列符號粗略偵測(含中文「、」「,」開頭的列點)
+ "bullets": sum(text.count(s) for s in ('- ', '• ', '* ', '1.', '2.', '3.')),
+ "tables": text.count('|'),
+ }
+
+
+def _score_response(qid: str, question: str, response: str, expect_kw: List[str]) -> Dict:
+ structure = _structure_score(response)
+ return {
+ "qid": qid,
+ "length": len(response or ''),
+ "simplified_count": _count_simplified(response),
+ "keyword_hits": _count_keyword_hits(response, expect_kw),
+ "is_refusal": _is_refusal(response),
+ "lines": structure["lines"],
+ "bullets": structure["bullets"],
+ "tables": structure["tables"],
+ "preview": (response or '')[:120].replace('\n', ' / '),
+ }
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Caller wrappers (使用 service 的真實函式)
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _call_ollama(question: str) -> Optional[str]:
+ from services.openclaw_strategist_service import _call_qwen3_qa
+ return _call_qwen3_qa(question, None, f"golden-{int(time.time())}")
+
+
+def _call_gemini_baseline(question: str) -> Optional[str]:
+ from services.openclaw_strategist_service import _call_gemini
+ system_prompt = (
+ "你是 MOMO Pro 電商情報策略師「OpenClaw」。以繁體中文(台灣用語)回覆使用者。"
+ "嚴禁簡體字。回覆長度控制在 500 字內,可用 Markdown 條列。"
+ )
+ return _call_gemini(system_prompt, question, temperature=0.5, caller="openclaw_qa_golden")
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Tests
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_environment_ready():
+ """sanity check:跑黃金集前確認 GCP host + model + Gemini key 都 ready。"""
+ assert _ollama_reachable(_HOST), f"Ollama 主機不可達:{_HOST}"
+ assert _ollama_has_model(_HOST, _MODEL), (
+ f"GCP Ollama 尚未拉 {_MODEL}(請於 Phase 8 由 A1 完成 ollama pull)"
+ )
+ assert _HAS_GEMINI, "GEMINI_API_KEY 未設"
+
+
+def test_golden_set_ab_comparison(capsys):
+ """跑 10 題雙模型 A/B 對照,量化指標印到 stdout。
+
+ 本測試不對「正確性」做 hard assert;目的是給統帥盲測前的「品質量化基線」。
+ 僅 hard assert:
+ - 雙模型至少都有回應(非全 None)
+ - Gemini baseline 簡體字數量 == 0(baseline 不該污染)
+ """
+ # 啟用 flag 讓 _call_qwen3_qa 走真實邏輯
+ os.environ['OPENCLAW_QA_OLLAMA_FIRST'] = 'true'
+
+ rows = []
+ for item in GOLDEN_SET:
+ qid = item['id']
+ question = item['question']
+ kws = item['expect_keywords']
+
+ ollama_resp = _call_ollama(question)
+ gemini_resp = _call_gemini_baseline(question)
+
+ rows.append({
+ 'qid': qid,
+ 'category': item['category'],
+ 'question': question,
+ 'ollama': _score_response(qid, question, ollama_resp or '', kws),
+ 'gemini': _score_response(qid, question, gemini_resp or '', kws),
+ })
+
+ # 列印量化基線(pytest -s 才看得到)
+ print("\n" + "=" * 100)
+ print("OpenClaw QA 黃金集 A/B 量化基線(Ollama qwen3:14b vs Gemini 2.5 Flash)")
+ print("=" * 100)
+ for r in rows:
+ print(f"\n[{r['qid']}] ({r['category']}) {r['question']}")
+ for side in ('ollama', 'gemini'):
+ s = r[side]
+ print(
+ f" {side:>7}: len={s['length']:>4} simp={s['simplified_count']:>2} "
+ f"kw={s['keyword_hits']}/{len(GOLDEN_SET[0]['expect_keywords'])} "
+ f"lines={s['lines']:>2} refusal={s['is_refusal']}"
+ )
+ print(f" preview: {s['preview']}")
+
+ # 匯出 JSON 給後續分析
+ out_path = os.path.join(os.path.dirname(__file__), 'logs', 'qa_golden_baseline.json')
+ os.makedirs(os.path.dirname(out_path), exist_ok=True)
+ with open(out_path, 'w', encoding='utf-8') as f:
+ json.dump(rows, f, ensure_ascii=False, indent=2)
+ print(f"\n基線已存:{out_path}")
+
+ # Hard assertions(最少安全網)
+ ollama_responded = sum(1 for r in rows if r['ollama']['length'] > 0)
+ gemini_responded = sum(1 for r in rows if r['gemini']['length'] > 0)
+ assert ollama_responded >= 8, f"Ollama 回應率過低:{ollama_responded}/10"
+ assert gemini_responded >= 9, f"Gemini 回應率過低:{gemini_responded}/10"
+
+ # Gemini baseline 不該有簡體污染(用以驗證測量本身正確)
+ for r in rows:
+ assert r['gemini']['simplified_count'] == 0, (
+ f"Gemini baseline 簡體污染(指標可能誤判):{r['qid']} {r['gemini']['preview']}"
+ )
diff --git a/tests/test_openclaw_qa_routing.py b/tests/test_openclaw_qa_routing.py
new file mode 100644
index 0000000..70368cd
--- /dev/null
+++ b/tests/test_openclaw_qa_routing.py
@@ -0,0 +1,358 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+tests/test_openclaw_qa_routing.py
+OpenClaw Q&A 路由 + 品質守門 unit tests
+(Operation Ollama-First v5.0 — Phase 3, A7 fullstack-engineer)
+
+涵蓋:
+ - feature flag OPENCLAW_QA_OLLAMA_FIRST=false → 走 Gemini-first(regression test)
+ - flag=true + 高品質 Ollama 回應 → 直接回 Ollama 結果,不走 Gemini
+ - flag=true + 低品質 Ollama 回應 → 升級至 Gemini,並標 fallback_to=openclaw_qa_gemini_fallback
+ - flag=true + Ollama 呼叫失敗 → 升級至 Gemini
+ - _is_low_quality_response 各規則:空字串 / 長度過短 / 簡體污染 / 拒答 / 流水帳
+
+執行:
+ pytest tests/test_openclaw_qa_routing.py -v
+"""
+
+import os
+import sys
+import time
+from typing import Any, Dict, Optional
+
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import services.openclaw_strategist_service as svc
+import services.ai_call_logger as logger_mod
+from services.ai_call_logger import _reset_kill_switch
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Fixtures
+# ─────────────────────────────────────────────────────────────────────────────
+
+@pytest.fixture(autouse=True)
+def reset_state(monkeypatch):
+ """每個測試重置 logger kill-switch + stub DB 寫入收集 ai_calls 紀錄。"""
+ _reset_kill_switch()
+ captured = []
+
+ def fake_write(state):
+ captured.append({
+ 'caller': state.caller,
+ 'provider': state.provider,
+ 'model': state.model,
+ 'status': state.status,
+ 'fallback_to': state.fallback_to,
+ 'error': state.error,
+ 'meta': dict(state.meta),
+ 'request_id': state.request_id,
+ })
+
+ monkeypatch.setattr(logger_mod, '_write_to_db', fake_write)
+ monkeypatch.setenv('AI_CALL_LOGGING_ENABLED', 'true')
+ # 預設 flag=false(戰前行為)
+ monkeypatch.delenv('OPENCLAW_QA_OLLAMA_FIRST', raising=False)
+ yield captured
+
+
+def _wait_async(captured, n=1, timeout=2.0):
+ deadline = time.time() + timeout
+ while time.time() < deadline:
+ if len(captured) >= n:
+ return True
+ time.sleep(0.01)
+ return False
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 1. _is_low_quality_response 純函式規則
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestLowQualityRules:
+
+ def test_empty_string_is_low_quality(self):
+ assert svc._is_low_quality_response("") is True
+ assert svc._is_low_quality_response(None) is True
+ assert svc._is_low_quality_response(" \n ") is True
+
+ def test_too_short_is_low_quality(self):
+ # 長度 < 50 字元 → 低品質
+ assert svc._is_low_quality_response("你好,我是 OpenClaw") is True
+
+ def test_acceptable_response_passes(self):
+ good = (
+ "本週 momo 業績較上週成長 12%,主要受惠於家電與生活雜貨。\n"
+ "建議:持續關注 PChome 競價動態,必要時調整定價策略。\n"
+ "預估下週 momo 仍有 5-8% 成長空間。"
+ )
+ assert svc._is_low_quality_response(good) is False
+
+ def test_simplified_pollution_detected(self):
+ # 句中含 >= 3 個簡體字 hint → 低品質(Qwen 繁中短板核心檢查)
+ polluted = (
+ "本周业绩比上周增长,您可以关注这个产品的价格变动趋势,"
+ "我们建议处理掉滞销库存以提高资产效率"
+ )
+ assert svc._is_low_quality_response(polluted) is True
+
+ def test_two_simplified_chars_still_acceptable(self):
+ # 只有 2 個簡體 hint 字(边界以下)+ 結構良好 → 仍可接受
+ # (避免過度敏感誤殺正常繁體回覆中混入零星簡體字的情境)
+ text = (
+ "本週 momo 业绩成長明顯,建議持續關注競品動向。\n"
+ "重點品類:家電、3C、生活雜貨。\n"
+ "下週可加碼促銷檔期。"
+ )
+ assert svc._is_low_quality_response(text) is False
+
+ def test_refusal_pattern_detected(self):
+ for refusal in ['無法回答', '我不知道', '抱歉,我無法協助']:
+ text = f"關於這個問題,{refusal},請改問其他內容以便我協助您。"
+ assert svc._is_low_quality_response(text) is True, f"應被判定為拒答:{refusal}"
+
+ def test_flowing_text_no_breaks_is_low_quality(self):
+ # 200+ 字無換行 → 流水帳
+ text = "本週業績整體呈現上升趨勢。" * 20 # ~200+ 字
+ assert "\n" not in text
+ assert len(text) > 200
+ assert svc._is_low_quality_response(text) is True
+
+ def test_long_text_with_breaks_is_acceptable(self):
+ # 200+ 字但有適度斷行 → 結構良好
+ text = (
+ "本週業績整體呈現上升趨勢,主要驅動類別為家電與生活雜貨大類別。\n"
+ "競品動向:PChome 在 3C 類發動大規模補貼戰,預估壓縮我方 3 至 5 個百分點毛利率。\n"
+ "蝦皮也在母嬰用品加碼免運券促銷,需密切觀察跟降節奏,避免市占下滑。\n"
+ "建議行動:(1) 加碼家電促銷檔期,重點操作大尺寸電視與廚房家電,"
+ "(2) 觀察 PChome 補貼是否延續至下週,準備二段反擊方案,"
+ "(3) 對價差大於 5% 的 SKU 主動啟動 EA 流程,避免毛利持續流失。"
+ )
+ assert len(text) > 200
+ assert svc._is_low_quality_response(text) is False
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 2. Routing:feature flag = false 時維持 Gemini-first 路徑(regression)
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestFlagOff:
+
+ def test_flag_false_routes_to_legacy(self, monkeypatch, reset_state):
+ """flag=false(預設)→ 不應該呼叫 _call_qwen3_qa,直接走 _legacy_gemini_first_qa。"""
+ monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'false')
+ legacy_called = {'count': 0}
+ ollama_called = {'count': 0}
+
+ def fake_legacy(q, ctx, request_id=None):
+ legacy_called['count'] += 1
+ return "[legacy gemini reply]"
+
+ def fake_ollama(q, ctx, rid):
+ ollama_called['count'] += 1
+ return "[should not be called]"
+
+ monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
+ monkeypatch.setattr(svc, '_call_qwen3_qa', fake_ollama)
+
+ result = svc.generate_strategy_response("本週業績如何?")
+ assert result == "[legacy gemini reply]"
+ assert legacy_called['count'] == 1
+ assert ollama_called['count'] == 0
+
+ def test_flag_unset_defaults_to_off(self, monkeypatch, reset_state):
+ """環境變數完全未設 → 預設 false → 走 legacy。"""
+ monkeypatch.delenv('OPENCLAW_QA_OLLAMA_FIRST', raising=False)
+ legacy_called = {'count': 0}
+
+ def fake_legacy(q, ctx, request_id=None):
+ legacy_called['count'] += 1
+ return "[legacy reply]"
+
+ monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
+ # 不 stub _call_qwen3_qa;如果意外被呼叫會打到真網路 → fail
+ result = svc.generate_strategy_response("競品分析")
+ assert legacy_called['count'] == 1
+ assert result == "[legacy reply]"
+
+ def test_empty_query_short_circuits(self, monkeypatch, reset_state):
+ """空 query 不應觸發任何 LLM 呼叫。"""
+ monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
+ legacy_called = {'count': 0}
+ ollama_called = {'count': 0}
+ monkeypatch.setattr(svc, '_legacy_gemini_first_qa',
+ lambda q, ctx, request_id=None: legacy_called.update({'count': legacy_called['count']+1}) or "")
+ monkeypatch.setattr(svc, '_call_qwen3_qa',
+ lambda q, ctx, rid: ollama_called.update({'count': ollama_called['count']+1}) or "")
+
+ out = svc.generate_strategy_response("")
+ assert "請輸入您的問題" in out
+ assert legacy_called['count'] == 0
+ assert ollama_called['count'] == 0
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 3. Routing:feature flag = true + Ollama 高/低品質
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestFlagOn:
+
+ def test_flag_true_high_quality_returns_ollama(self, monkeypatch, reset_state):
+ """flag=true + Ollama 回高品質 → 直接回 Ollama,不走 Gemini。"""
+ monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
+ legacy_called = {'count': 0}
+ good_reply = (
+ "本週 momo 業績成長 12%,主要驅動類別為家電。\n"
+ "建議:持續關注 PChome 競價並加碼家電促銷檔期。\n"
+ "下週預估仍有 5-8% 成長空間。"
+ )
+ monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: good_reply)
+
+ def fake_legacy(q, ctx, request_id=None):
+ legacy_called['count'] += 1
+ return "[gemini fallback]"
+
+ monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
+
+ out = svc.generate_strategy_response("本週業績如何?")
+ assert out == good_reply
+ assert legacy_called['count'] == 0 # Gemini 沒被呼叫
+
+ def test_flag_true_low_quality_falls_back_to_gemini(self, monkeypatch, reset_state):
+ """flag=true + Ollama 回低品質(簡體污染)→ fallback Gemini。"""
+ monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
+ bad_reply = "本周业绩增长,您可以关注这个产品的价格变动,我们建议处理库存"
+ legacy_called = {'count': 0}
+
+ monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: bad_reply)
+
+ def fake_legacy(q, ctx, request_id=None):
+ legacy_called['count'] += 1
+ return "[gemini high quality reply]"
+
+ monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
+
+ out = svc.generate_strategy_response("本週業績如何?")
+ assert out == "[gemini high quality reply]"
+ assert legacy_called['count'] == 1
+
+ def test_flag_true_ollama_returns_none_falls_back(self, monkeypatch, reset_state):
+ """flag=true + Ollama 呼叫失敗(回 None)→ fallback Gemini。"""
+ monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
+ legacy_called = {'count': 0}
+ monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: None)
+
+ def fake_legacy(q, ctx, request_id=None):
+ legacy_called['count'] += 1
+ return "[gemini reply after ollama down]"
+
+ monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
+
+ out = svc.generate_strategy_response("test")
+ assert out == "[gemini reply after ollama down]"
+ assert legacy_called['count'] == 1
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 4. _call_qwen3_qa: ai_call_logger 整合 + fallback_to 標記
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestCallQwen3Telemetry:
+
+ def test_qwen3_logs_ok_status_on_success(self, monkeypatch, reset_state):
+ """高品質回應 → ai_calls 應記 status=ok, caller=openclaw_qa, provider=gcp_ollama"""
+ captured = reset_state
+
+ class FakeResp:
+ status_code = 200
+ def raise_for_status(self): pass
+ def json(self):
+ return {
+ 'response': '本週 momo 業績成長 12%,建議加碼家電促銷。',
+ 'prompt_eval_count': 150,
+ 'eval_count': 60,
+ }
+
+ monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp())
+
+ result = svc._call_qwen3_qa("本週業績?", None, "qa-test123")
+ assert result is not None
+ assert "業績成長" in result
+
+ assert _wait_async(captured, 1)
+ assert len(captured) == 1
+ rec = captured[0]
+ assert rec['caller'] == 'openclaw_qa'
+ assert rec['provider'] == 'gcp_ollama'
+ assert rec['model'] == svc.OPENCLAW_QA_OLLAMA_MODEL
+ assert rec['status'] == 'ok'
+ assert rec['fallback_to'] is None
+ assert rec['meta'].get('flag') == 'OPENCLAW_QA_OLLAMA_FIRST'
+ assert rec['request_id'] == "qa-test123"
+
+ def test_qwen3_logs_fallback_on_exception(self, monkeypatch, reset_state):
+ """Ollama 連線失敗 → ai_calls 應記 fallback_to=openclaw_qa_gemini_fallback + status=fallback"""
+ captured = reset_state
+
+ def boom(*a, **kw):
+ raise svc.requests.ConnectionError("connection refused")
+
+ monkeypatch.setattr(svc.requests, 'post', boom)
+
+ result = svc._call_qwen3_qa("test", None, "qa-fail123")
+ assert result is None
+
+ assert _wait_async(captured, 1)
+ rec = captured[0]
+ assert rec['status'] == 'fallback'
+ assert rec['fallback_to'] == 'openclaw_qa_gemini_fallback'
+ assert rec['error'] is not None
+ assert 'ConnectionError' in rec['error']
+
+ def test_qwen3_logs_fallback_on_empty_response(self, monkeypatch, reset_state):
+ """Ollama 回空 response → 視為 empty_response,標 fallback。"""
+ captured = reset_state
+
+ class FakeResp:
+ status_code = 200
+ def raise_for_status(self): pass
+ def json(self):
+ return {'response': '', 'prompt_eval_count': 100, 'eval_count': 0}
+
+ monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp())
+
+ result = svc._call_qwen3_qa("test", None, "qa-empty")
+ assert result is None
+
+ assert _wait_async(captured, 1)
+ rec = captured[0]
+ assert rec['status'] == 'fallback'
+ assert rec['fallback_to'] == 'openclaw_qa_gemini_fallback'
+ assert rec['error'] == 'empty_response'
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 5. 環境變數讀取即時性(runtime toggle)
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestRuntimeToggle:
+
+ def test_flag_helper_reads_env_each_call(self, monkeypatch):
+ """_qa_ollama_first_enabled() 應每次重讀 env,允許 runtime 灰度切換。"""
+ monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'false')
+ assert svc._qa_ollama_first_enabled() is False
+
+ monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
+ assert svc._qa_ollama_first_enabled() is True
+
+ # 各種真值字串
+ for v in ('TRUE', 'True', '1', 'yes', 'on'):
+ monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', v)
+ assert svc._qa_ollama_first_enabled() is True, f"應視為 true: {v!r}"
+
+ for v in ('false', '0', 'no', 'off', '', 'foo'):
+ monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', v)
+ assert svc._qa_ollama_first_enabled() is False, f"應視為 false: {v!r}"