From 838267c293845f71188c56e66a5abada2be1dc73 Mon Sep 17 00:00:00 2001 From: OoO Date: Sun, 3 May 2026 23:05:38 +0800 Subject: [PATCH] =?UTF-8?q?feat(p1+p3):=20logger=20=E6=8E=A5=2013=20caller?= =?UTF-8?q?=20+=20Q&A/Nemotron/=E6=97=A5=E5=A0=B1=20feature=20flag=20?= =?UTF-8?q?=E7=81=B0=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 A4 — 13 個呼叫點接 ai_call_logger(覆蓋率 11.8% → 預估 50%+) - TOP-1 nemoton_dispatcher: nemotron_dispatch caller (NIM 配額追蹤) - TOP-2 openclaw_strategist: 4 reports (daily/weekly/monthly/meta) + qa caller - TOP-3 hermes_analyst: hermes_analyst + hermes_intent (順修 commit 00591c5 殘留 bug) - TOP-4 code_review_pipeline: code_review_hermes/openclaw/elephant 三鏈 (request_id 串) - TOP-5 openclaw_bot_routes: openclaw_bot_main/gemini/nim 三層 fallback Phase 3 A7 — OpenClaw Q&A → qwen3:14b(feature flag OFF) - OPENCLAW_QA_OLLAMA_FIRST 灰度開關 - 繁中強制 system prompt + Gemini fallback chain - _is_low_quality_response 品質守門(簡體字檢測 + 拒答訊號 + 結構分數) - 黃金集 A/B 對照測試框架(10 樣本去 PII) Phase 3 A8 — OpenClaw 日報 → Hermes 模板(feature flag OFF) - OPENCLAW_DAILY_HERMES_TEMPLATE 灰度開關 - _compute_daily_kpi 純 SQL + Hermes 規則引擎 - _compute_gemini_insight 精簡 200 字洞察 prompt - templates/daily_report_v2.j2 + _SafeUndefined 缺欄位優雅降級 - scripts/compare_daily_report_versions.py 雙版本盲測 Phase 3 A9 — Nemotron NIM → qwen3:14b(feature flag OFF) - NEMOTRON_OLLAMA_FIRST 灰度開關(A2 紅燈:deepseek-r1 假支援,改 qwen3) - _call_qwen3_dispatch + 既有 NIM tool_calls 解析共用 - 保留 ADR-004「🟡 [降級模式]」Hermes 規則引擎兜底 H6 PII fix — chat_id 進 ai_calls.meta 改 SHA1[:8](4 處 Bot Q&A) Code Review pipeline — N3 動態 provider tag(gcp/secondary/111)+ A4 logger 三鏈 37 unit tests 全綠(routing 15 + golden 5 + qwen3 8 + daily template 8 + nemotron 1) Operation Ollama-First v5.0 / Phase 1 A4 + Phase 3 A7+A8+A9 Co-Authored-By: Claude Opus 4.7 (1M context) --- routes/openclaw_bot_routes.py | 198 +++- scripts/compare_daily_report_versions.py | 101 +++ services/code_review_pipeline_service.py | 134 ++- services/hermes_analyst_service.py | 125 ++- services/nemoton_dispatcher_service.py | 423 +++++++-- services/openclaw_strategist_service.py | 1048 +++++++++++++++++++--- templates/daily_report_v2.j2 | 63 ++ tests/conftest.py | 15 + tests/test_nemotron_qwen3_compat.py | 456 ++++++++++ tests/test_openclaw_daily_template.py | 212 +++++ tests/test_openclaw_qa_golden_set.py | 286 ++++++ tests/test_openclaw_qa_routing.py | 358 ++++++++ 12 files changed, 3122 insertions(+), 297 deletions(-) create mode 100644 scripts/compare_daily_report_versions.py create mode 100644 templates/daily_report_v2.j2 create mode 100644 tests/conftest.py create mode 100644 tests/test_nemotron_qwen3_compat.py create mode 100644 tests/test_openclaw_daily_template.py create mode 100644 tests/test_openclaw_qa_golden_set.py create mode 100644 tests/test_openclaw_qa_routing.py diff --git a/routes/openclaw_bot_routes.py b/routes/openclaw_bot_routes.py index 5b84612..12e84a5 100644 --- a/routes/openclaw_bot_routes.py +++ b/routes/openclaw_bot_routes.py @@ -24,6 +24,7 @@ import os import json import re import threading +import hashlib # Operation Ollama-First v5.0 P1: H6 PII fix — chat_id 進 meta 改 hash[:8] from contextvars import ContextVar from contextlib import contextmanager import requests @@ -48,6 +49,7 @@ from services.openclaw_bot.telegram_api import ( send_photo, send_typing, ) +from services.ai_call_logger import log_ai_call # Operation Ollama-First v5.0 P1 from services.openclaw_bot.menu_keyboards import ( _BACK, _SUBMENUS, @@ -85,7 +87,17 @@ try: except ImportError: _PCHOME_AVAILABLE = False +# V-New: 引入 Ollama 探測機制 +try: + from services.ollama_service import resolve_ollama_host + _OLLAMA_AVAILABLE = True +except ImportError: + _OLLAMA_AVAILABLE = False + # AI 引擎:Gemini Flash(主,2~5秒)→ NIM(備援,45~90秒) +# LOCKED-GEMINI: PPT 簡報文案需長 context (5K+ rows + 多輪歷史) + 繁中商業敘事 +# Ollama qwen2.5-coder:7b 為 PPT 失敗時 L3 fallback(已在 _call_ollama 路徑) +# ADR-028 鎖定場景 #7 GEMINI_API_KEY = os.getenv('GEMINI_API_KEY', '') GEMINI_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta/models' GEMINI_MODEL = 'gemini-2.0-flash' @@ -2469,6 +2481,29 @@ def _ppt_ai_analysis(prompt_data: str, report_type: str = '') -> str: .get('content', {}).get('parts', [{}])[0] .get('text', '').strip()) + def _call_ollama(prompt: str, tokens: int) -> str: + if not _OLLAMA_AVAILABLE: + return "" + try: + host = resolve_ollama_host() + # 簡報分析使用 qwen2.5-coder:7b (已升級 GCP) 或 hermes3 + model = os.getenv('OPENCLAW_OLLAMA_MODEL', 'qwen2.5-coder:7b') + r = requests.post( + f"{host}/api/generate", + json={ + 'model': model, + 'prompt': prompt, + 'stream': False, + 'options': {'num_predict': tokens, 'temperature': 0.3} + }, + timeout=90 + ) + r.raise_for_status() + return r.json().get('response', '').strip() + except Exception as e: + sys_log.warning(f"[PPT] Ollama error: {e}") + return "" + if not NVIDIA_API_KEY: if GEMINI_API_KEY: try: @@ -2532,6 +2567,29 @@ def _ppt_ai_analysis(prompt_data: str, report_type: str = '') -> str: return result_text except Exception as e2: sys_log.error(f"[PPT] Gemini fallback error: {e2}") + + # ── Ollama (GCP/111) Final Fallback ─────────────────────── + if _OLLAMA_AVAILABLE: + try: + sys_log.info("[PPT] Trying local/GCP Ollama as final fallback") + raw = _call_ollama(f"{sys_instruction}\n\n--- 資料 ---\n{prompt_data}", max_tokens) + result_text = _clean_ai_text(raw) + if result_text and len(result_text) > 100: + if _LEARNING_ENABLED: + import threading as _thr + _thr.Thread( + target=store_insight, + kwargs={ + 'insight_type': report_type or 'analysis', + 'content': result_text, + 'period': datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d'), + }, + daemon=True + ).start() + return result_text + except Exception as e3: + sys_log.error(f"[PPT] Ollama final fallback error: {e3}") + return '(AI 分析暫時無法使用,請稍後重試)' @@ -6768,18 +6826,29 @@ def openclaw_answer(question: str, chat_id: int = None): + "請用繁體中文直接回答,不要開場白,300字以內。" ) - resp = ollama_service.generate(question, system_prompt=sys_prompt, timeout=180) - if resp.success and resp.content: - if chat_id: - openclaw_session.append_turn(chat_id, question, resp.content) - if _LEARNING_ENABLED: - import threading as _thr - _thr.Thread(target=store_conversation, - args=(0, 0, question, resp.content, "ollama", []), - daemon=True).start() - return resp.content, None - else: - sys_log.warning(f"[Ollama] 生成失敗: {resp.error},fallback 到 Gemini") + # Phase 1 v5.0: 包 ai_call_logger 追蹤 Bot Q&A 主鏈 Ollama + _qa_req_id = f"qa-{chat_id or 0}-{int(_time_mod.time())}" + with log_ai_call( + caller='openclaw_bot_main', + provider='gcp_ollama', + model=getattr(ollama_service, 'model', 'llama3.1:8b'), + request_id=_qa_req_id, + meta={'chat_id_hash': hashlib.sha1(str(chat_id or 0).encode()).hexdigest()[:8], 'has_db_ctx': bool(db_ctx)}, + ) as _ctx: + resp = ollama_service.generate(question, system_prompt=sys_prompt, timeout=180) + if resp.success and resp.content: + if chat_id: + openclaw_session.append_turn(chat_id, question, resp.content) + if _LEARNING_ENABLED: + import threading as _thr + _thr.Thread(target=store_conversation, + args=(0, 0, question, resp.content, "ollama", []), + daemon=True).start() + return resp.content, None + else: + sys_log.warning(f"[Ollama] 生成失敗: {resp.error},fallback 到 Gemini") + _ctx.set_error(f"ollama generate failed: {resp.error}") + _ctx.fallback_to_caller('openclaw_bot_gemini') except Exception as e: sys_log.warning(f"[Ollama] 例外發生: {e},fallback 到 Gemini") @@ -6817,15 +6886,30 @@ def openclaw_answer(question: str, chat_id: int = None): "tool_config": {"function_calling_config": {"mode": "AUTO"}}, "generationConfig": {"temperature": 0.3, "maxOutputTokens": 600}, } - r1 = requests.post( - f"{GEMINI_BASE_URL}/{GEMINI_MODEL}:generateContent?key={GEMINI_API_KEY}", - headers={"Content-Type": "application/json"}, - json=payload, timeout=30, - ) - r1.raise_for_status() - resp1 = r1.json() - candidate = resp1.get("candidates", [{}])[0] - parts = candidate.get("content", {}).get("parts", []) + # Phase 1 v5.0: 包 ai_call_logger 追蹤 Gemini FC 第一輪 + _qa_gemini_req_id = f"qa-{chat_id or 0}-{int(_time_mod.time())}" + with log_ai_call( + caller='openclaw_bot_gemini', + provider='gemini', + model=GEMINI_MODEL, + request_id=_qa_gemini_req_id, + meta={'chat_id_hash': hashlib.sha1(str(chat_id or 0).encode()).hexdigest()[:8], 'turn': 1}, + ) as _ctx_g1: + r1 = requests.post( + f"{GEMINI_BASE_URL}/{GEMINI_MODEL}:generateContent?key={GEMINI_API_KEY}", + headers={"Content-Type": "application/json"}, + json=payload, timeout=30, + ) + r1.raise_for_status() + resp1 = r1.json() + # Gemini REST: usageMetadata.{promptTokenCount, candidatesTokenCount} + _um = resp1.get("usageMetadata", {}) or {} + _ctx_g1.set_tokens( + input=_um.get("promptTokenCount", 0), + output=_um.get("candidatesTokenCount", 0), + ) + candidate = resp1.get("candidates", [{}])[0] + parts = candidate.get("content", {}).get("parts", []) # 如果沒有 function call,直接回傳文字 tool_calls = [p["functionCall"] for p in parts if "functionCall" in p] @@ -6870,15 +6954,28 @@ def openclaw_answer(question: str, chat_id: int = None): "maxOutputTokens": 600, }, } - r2 = requests.post( - f"{GEMINI_BASE_URL}/{GEMINI_MODEL}:generateContent?key={GEMINI_API_KEY}", - headers={"Content-Type": "application/json"}, - json=payload2, timeout=35, - ) - r2.raise_for_status() - resp2 = r2.json() - parts2 = resp2.get("candidates", [{}])[0].get("content", {}).get("parts", []) - final = "".join(p.get("text", "") for p in parts2 if "text" in p).strip() + # Phase 1 v5.0: 包 ai_call_logger 追蹤 Gemini FC 第二輪 + with log_ai_call( + caller='openclaw_bot_gemini', + provider='gemini', + model=GEMINI_MODEL, + request_id=_qa_gemini_req_id, + meta={'chat_id_hash': hashlib.sha1(str(chat_id or 0).encode()).hexdigest()[:8], 'turn': 2, 'tools_used': used_sources}, + ) as _ctx_g2: + r2 = requests.post( + f"{GEMINI_BASE_URL}/{GEMINI_MODEL}:generateContent?key={GEMINI_API_KEY}", + headers={"Content-Type": "application/json"}, + json=payload2, timeout=35, + ) + r2.raise_for_status() + resp2 = r2.json() + _um2 = resp2.get("usageMetadata", {}) or {} + _ctx_g2.set_tokens( + input=_um2.get("promptTokenCount", 0), + output=_um2.get("candidatesTokenCount", 0), + ) + parts2 = resp2.get("candidates", [{}])[0].get("content", {}).get("parts", []) + final = "".join(p.get("text", "") for p in parts2 if "text" in p).strip() if final: sys_log.info(f"[FC] done tools={used_sources} reply={len(final)}chars") @@ -6931,19 +7028,34 @@ def openclaw_answer(question: str, chat_id: int = None): + f"\n用戶問:{question}\n" "請用繁體中文直接回答,不要開場白,300字以內。" ) - r = requests.post( - f"{NVIDIA_BASE_URL}/chat/completions", - headers={"Authorization": f"Bearer {NVIDIA_API_KEY}", - "Content-Type": "application/json"}, - json={ - "model": CHAT_MODEL, - "messages": [{"role": "user", "content": nim_prompt}], - "max_tokens": 500, "temperature": 0.3, - }, - timeout=20, - ) - r.raise_for_status() - return r.json()["choices"][0]["message"]["content"].strip(), None + # Phase 1 v5.0: 包 ai_call_logger 追蹤 Bot Q&A NIM 三層 fallback + _qa_nim_req_id = f"qa-{chat_id or 0}-{int(_time_mod.time())}" + with log_ai_call( + caller='openclaw_bot_nim', + provider='nim', + model=CHAT_MODEL, + request_id=_qa_nim_req_id, + meta={'chat_id_hash': hashlib.sha1(str(chat_id or 0).encode()).hexdigest()[:8], 'has_db_ctx': bool(db_ctx)}, + ) as _ctx_nim: + r = requests.post( + f"{NVIDIA_BASE_URL}/chat/completions", + headers={"Authorization": f"Bearer {NVIDIA_API_KEY}", + "Content-Type": "application/json"}, + json={ + "model": CHAT_MODEL, + "messages": [{"role": "user", "content": nim_prompt}], + "max_tokens": 500, "temperature": 0.3, + }, + timeout=20, + ) + r.raise_for_status() + _body = r.json() + _u = _body.get("usage", {}) or {} + _ctx_nim.set_tokens( + input=_u.get("prompt_tokens", 0), + output=_u.get("completion_tokens", 0), + ) + return _body["choices"][0]["message"]["content"].strip(), None except Exception as e: sys_log.error(f"[FC] NIM fallback error: {e}") diff --git a/scripts/compare_daily_report_versions.py b/scripts/compare_daily_report_versions.py new file mode 100644 index 0000000..61025d6 --- /dev/null +++ b/scripts/compare_daily_report_versions.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +""" +scripts/compare_daily_report_versions.py +───────────────────────────────────────────────────────────────── +Operation Ollama-First v5.0 / Phase 3 / A8 — 日報雙版本盲測腳本 + +用途: + 跑同一天的「舊版 Gemini 全文」vs「新版 Hermes 模板」 + 輸出兩份檔案到 reports/,供統帥盲測選擇預設模式。 + +使用: + python3 scripts/compare_daily_report_versions.py --date 2026-05-03 + python3 scripts/compare_daily_report_versions.py # 預設昨日 + +紀律: + - 不寫入 ai_insights(避免污染 production 資料) + - 不發 Telegram(純 dry-run) + - 兩版本皆獨立執行,互不干擾 + - 失敗時報錯但不刪舊檔 +""" + +import os +import sys +import argparse +from datetime import date, timedelta, datetime +from pathlib import Path + +# 確保可 import 本專案 services +ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(ROOT)) + + +def _ensure_reports_dir() -> Path: + reports_dir = ROOT / 'reports' + reports_dir.mkdir(exist_ok=True) + return reports_dir + + +def _run_legacy(target_date: date) -> str: + """跑舊版(_legacy_full_gemini_daily_report)""" + os.environ['OPENCLAW_DAILY_HERMES_TEMPLATE'] = 'false' + # 強制 reload module(避免 cache) + import importlib + import services.openclaw_strategist_service as svc + importlib.reload(svc) + print(f"[legacy] 開始跑 Gemini 全文版日報 target_date={target_date}") + result = svc._legacy_full_gemini_daily_report() + return result.get('content', '') or result.get('report_content', '') or str(result) + + +def _run_hermes_template(target_date: date) -> str: + """跑新版(_generate_daily_report_hermes_template)""" + os.environ['OPENCLAW_DAILY_HERMES_TEMPLATE'] = 'true' + import importlib + import services.openclaw_strategist_service as svc + importlib.reload(svc) + print(f"[hermes] 開始跑 Hermes 模板版日報 target_date={target_date}") + result = svc._generate_daily_report_hermes_template() + return result.get('content', '') or result.get('report_content', '') or str(result) + + +def main(): + parser = argparse.ArgumentParser(description='OpenClaw 日報雙版本盲測') + parser.add_argument('--date', help='YYYY-MM-DD(預設昨日)') + args = parser.parse_args() + + if args.date: + target_date = datetime.strptime(args.date, '%Y-%m-%d').date() + else: + target_date = date.today() - timedelta(days=1) + + reports_dir = _ensure_reports_dir() + date_tag = target_date.strftime('%Y%m%d') + + legacy_file = reports_dir / f'daily_report_legacy_{date_tag}.md' + hermes_file = reports_dir / f'daily_report_v2_{date_tag}.md' + + # 跑舊版 + try: + legacy_content = _run_legacy(target_date) + legacy_file.write_text(legacy_content, encoding='utf-8') + print(f"✅ legacy 版輸出:{legacy_file}") + except Exception as e: + print(f"❌ legacy 版失敗:{e}", file=sys.stderr) + + # 跑新版 + try: + hermes_content = _run_hermes_template(target_date) + hermes_file.write_text(hermes_content, encoding='utf-8') + print(f"✅ hermes 模板版輸出:{hermes_file}") + except Exception as e: + print(f"❌ hermes 版失敗:{e}", file=sys.stderr) + + print(f"\n盲測檢查(建議):") + print(f" diff <(head -50 {legacy_file}) <(head -50 {hermes_file})") + print(f" wc -w {legacy_file} {hermes_file}") + print(f" # 統帥盲測時可遮 caller 名稱避免偏見") + + +if __name__ == '__main__': + main() diff --git a/services/code_review_pipeline_service.py b/services/code_review_pipeline_service.py index bd3c06b..a6e96be 100644 --- a/services/code_review_pipeline_service.py +++ b/services/code_review_pipeline_service.py @@ -29,7 +29,12 @@ from typing import Any, Dict, List, Optional from database.manager import get_session from sqlalchemy import text -from services.hermes_analyst_service import HERMES_URL as _HERMES_URL, HERMES_MODEL as _HERMES_MODEL +# ADR-027 Phase 2 N3:HERMES_MODEL 仍 import(純常數),HERMES_URL 改 lazy +# 每次 _hermes_scan 才透過 get_hermes_url() 取最新解析(GCP 優先 / 111 備援), +# 避免 import-time freeze 導致主機切換不生效。 +from services.hermes_analyst_service import HERMES_MODEL as _HERMES_MODEL +from config import get_hermes_url +from services.ai_call_logger import log_ai_call # Operation Ollama-First v5.0 P1 logger = logging.getLogger(__name__) @@ -38,6 +43,9 @@ _current_pipeline: Dict[str, Any] = {} _pipeline_lock = threading.Lock() GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "") +# LOCKED-GEMINI: Code Review 全 repo diff 可達 100K+ tokens,超過 Ollama 32K context +# 未來可升 Claude Opus 4.7 (200K context, Arena code Elo 1548) — Phase 7 任務 +# ADR-028 鎖定場景 #5 REVIEW_MODEL = os.getenv("OPENCLAW_MODEL", "gemini-2.5-flash") INTERNAL_TOKEN = os.getenv("INTERNAL_WEBHOOK_TOKEN", "") AUTO_FIX_ENABLED = os.getenv("CODE_REVIEW_AUTO_FIX_ENABLED", "true").lower() == "true" @@ -215,14 +223,36 @@ class CodeReviewPipeline: 只輸出 JSON 陣列,不含其他文字。無問題時輸出 []""" - resp = _req.post( - f"{_HERMES_URL}/api/generate", - json={"model": _HERMES_MODEL, "prompt": prompt, - "stream": False, "options": {"temperature": 0.1}}, - timeout=120, + # ADR-027 Phase 2 N3:lazy resolve Hermes 主機(GCP 優先 / 111 備援), + # 避開 import-time freeze。provider 標籤跟著解析結果動態決定。 + hermes_url = get_hermes_url() + provider_tag = ( + 'gcp_ollama' if ('34.21.145.224' in hermes_url or '34.143.170.20' in hermes_url) + else 'ollama_111' if '192.168.0.111' in hermes_url + else 'ollama_other' ) - resp.raise_for_status() - raw = resp.json().get("response", "").strip() + # Phase 1 v5.0: 包 ai_call_logger 追蹤 Code Review Hermes scan + with log_ai_call( + caller='code_review_hermes', + provider=provider_tag, + model=_HERMES_MODEL, + request_id=f"cr-{self.commit_sha[:8]}", + meta={'commit': self.commit_sha[:8], 'branch': self.branch, + 'files': len(files), 'host': hermes_url}, + ) as _ctx: + resp = _req.post( + f"{hermes_url}/api/generate", + json={"model": _HERMES_MODEL, "prompt": prompt, + "stream": False, "options": {"temperature": 0.1}}, + timeout=120, + ) + resp.raise_for_status() + body = resp.json() + _ctx.set_tokens( + input=body.get("prompt_eval_count", 0), + output=body.get("eval_count", 0), + ) + raw = body.get("response", "").strip() match = re.search(r"\[.*\]", raw, re.DOTALL) if not match: @@ -271,36 +301,70 @@ class CodeReviewPipeline: 💡 架構優化方向(1條長期建議) ✅ 本次部署亮點""" - # 優先 Gemini + # 優先 Gemini — Phase 1 v5.0 logger 追蹤 if GEMINI_API_KEY: - try: - import google.generativeai as genai - genai.configure(api_key=GEMINI_API_KEY) - model = genai.GenerativeModel( - model_name=REVIEW_MODEL, - generation_config=genai.types.GenerationConfig( - temperature=0.3, max_output_tokens=1500, - ), - system_instruction=system, - ) - resp = model.generate_content(user_prompt, request_options={"timeout": 90}) - return resp.text or "" - except Exception as e: - logger.warning("[CodeReview] OpenClaw Gemini 失敗,降級 ElephantAlpha: %s", e) + with log_ai_call( + caller='code_review_openclaw', + provider='gemini', + model=REVIEW_MODEL, + request_id=f"cr-{self.commit_sha[:8]}", + meta={'commit': self.commit_sha[:8], 'branch': self.branch}, + ) as _ctx: + try: + import google.generativeai as genai + genai.configure(api_key=GEMINI_API_KEY) + model = genai.GenerativeModel( + model_name=REVIEW_MODEL, + generation_config=genai.types.GenerationConfig( + temperature=0.3, max_output_tokens=1500, + ), + system_instruction=system, + ) + resp = model.generate_content(user_prompt, request_options={"timeout": 90}) + try: + usage = getattr(resp, 'usage_metadata', None) + if usage is not None: + _ctx.set_tokens( + input=getattr(usage, 'prompt_token_count', 0) or 0, + output=getattr(usage, 'candidates_token_count', 0) or 0, + ) + except Exception: + pass + return resp.text or "" + except Exception as e: + logger.warning("[CodeReview] OpenClaw Gemini 失敗,降級 ElephantAlpha: %s", e) + _ctx.set_error(f"{type(e).__name__}: {e}") + _ctx.fallback_to_caller('code_review_elephant') # 降級:ElephantAlpha via OpenRouter(OPENROUTER_API_KEY 容器內一定有) - try: - from services.elephant_service import elephant_service - resp = elephant_service.generate( - prompt=user_prompt, - system_prompt=system, - temperature=0.3, - timeout=90, - ) - if resp.success: - return resp.content or "" - except Exception as e: - logger.warning("[CodeReview] OpenClaw ElephantAlpha 降級也失敗: %s", e) + # Phase 1 v5.0 logger 追蹤 + with log_ai_call( + caller='code_review_elephant', + provider='nim_via_elephant', + model='nvidia/llama-3.3-nemotron-super-49b-v1.5', + request_id=f"cr-{self.commit_sha[:8]}", + meta={'commit': self.commit_sha[:8], 'branch': self.branch}, + ) as _ctx: + try: + from services.elephant_service import elephant_service + resp = elephant_service.generate( + prompt=user_prompt, + system_prompt=system, + temperature=0.3, + timeout=90, + ) + if resp.success: + # ElephantResponse 已含 input_tokens/output_tokens + _ctx.set_tokens( + input=getattr(resp, 'input_tokens', 0) or 0, + output=getattr(resp, 'output_tokens', 0) or 0, + ) + return resp.content or "" + else: + _ctx.set_error(getattr(resp, 'error', 'elephant generate failed')) + except Exception as e: + logger.warning("[CodeReview] OpenClaw ElephantAlpha 降級也失敗: %s", e) + _ctx.set_error(f"{type(e).__name__}: {e}") return "" diff --git a/services/hermes_analyst_service.py b/services/hermes_analyst_service.py index 7edbabe..76f5d7c 100644 --- a/services/hermes_analyst_service.py +++ b/services/hermes_analyst_service.py @@ -23,6 +23,8 @@ from typing import Optional import requests from sqlalchemy import text from services.mcp_context_service import build_mcp_context +from services.ollama_service import resolve_ollama_host, get_host_label +from services.ai_call_logger import log_ai_call # Operation Ollama-First v5.0 P1 logger = logging.getLogger(__name__) @@ -155,32 +157,48 @@ class HermesAnalystService: "keep_alive": HERMES_KEEP_ALIVE, # ADR-012:避免冷啟動 timeout "options": {"temperature": 0.1}, } - try: - resp = requests.post( - f"{HERMES_URL}/api/generate", - json=payload, - timeout=HERMES_TIMEOUT, # 統一 config 集中讀取(ADR-008);keep_alive 確保熱駐留時實測 < 10s - ) - resp.raise_for_status() - raw = (resp.json().get("response", "") or "").strip() - if raw.startswith("```"): - raw = re.sub(r"^```(?:json)?\s*", "", raw, flags=re.MULTILINE) - raw = re.sub(r"\s*```\s*$", "", raw.strip(), flags=re.MULTILINE).strip() - data = json.loads(raw) - return { - "intent": data.get("intent", "unknown"), - "confidence": float(data.get("confidence", 0.5)), - "complexity_score": float(data.get("complexity_score", 0.5)), - "requires_data_fetch": bool(data.get("requires_data_fetch", False)), - "preliminary_answer": data.get("preliminary_answer", "") or "", - "metadata": {"source": "hermes_llm"}, - } - except Exception as e: - logger.warning( - f"[Hermes.intent] Ollama 連線失敗,降級規則引擎" - f"(host={HERMES_URL} model={HERMES_MODEL} error={type(e).__name__}: {e})" - ) - return None + target_host = resolve_ollama_host() + # Phase 1 v5.0: 包 ai_call_logger 追蹤 Hermes 意圖分類 token / fallback + with log_ai_call( + caller='hermes_intent', + provider='gcp_ollama', + model=HERMES_MODEL, + meta={'host_label': get_host_label(target_host)}, + ) as _ctx: + try: + resp = requests.post( + f"{target_host}/api/generate", + json=payload, + timeout=HERMES_TIMEOUT, # 統一 config 集中讀取(ADR-008);keep_alive 確保熱駐留時實測 < 10s + ) + resp.raise_for_status() + body = resp.json() + _ctx.set_tokens( + input=body.get("prompt_eval_count", 0), + output=body.get("eval_count", 0), + ) + raw = (body.get("response", "") or "").strip() + if raw.startswith("```"): + raw = re.sub(r"^```(?:json)?\s*", "", raw, flags=re.MULTILINE) + raw = re.sub(r"\s*```\s*$", "", raw.strip(), flags=re.MULTILINE).strip() + data = json.loads(raw) + return { + "intent": data.get("intent", "unknown"), + "confidence": float(data.get("confidence", 0.5)), + "complexity_score": float(data.get("complexity_score", 0.5)), + "requires_data_fetch": bool(data.get("requires_data_fetch", False)), + "preliminary_answer": data.get("preliminary_answer", "") or "", + "metadata": {"source": "hermes_llm"}, + } + except Exception as e: + # NOTE: 修補 commit 00591c5 殘留的孤立 f-string(原 logger.warning 被誤刪) + logger.warning( + f"[Hermes.intent] Ollama 連線失敗,降級規則引擎" + f"(model={HERMES_MODEL} error={type(e).__name__}: {e})" + ) + _ctx.set_error(f"{type(e).__name__}: {e}") + _ctx.fallback_to_caller('hermes_rule_engine') + return None def _rule_based_intent(self, message: str) -> dict: """Ollama 掛掉時的規則引擎 fallback — 永遠返回結構化結果。""" @@ -416,23 +434,46 @@ class HermesAnalystService: "options": {"temperature": 0.1}, } - resp = requests.post( - f"{HERMES_URL}/api/generate", - json=payload, - timeout=HERMES_TIMEOUT, - ) - resp.raise_for_status() + target_host = resolve_ollama_host() + # Phase 1 v5.0: 包 ai_call_logger 追蹤 Hermes 競價分析 token / fallback + with log_ai_call( + caller='hermes_analyst', + provider='gcp_ollama', + model=HERMES_MODEL, + meta={ + 'host_label': get_host_label(target_host), + 'item_count': len(items), + 'top_n': TOP_N, + }, + ) as _ctx: + try: + resp = requests.post( + f"{target_host}/api/generate", + json=payload, + timeout=HERMES_TIMEOUT, + ) + resp.raise_for_status() + except Exception as e: + _ctx.set_error(f"{type(e).__name__}: {e}") + raise - data = resp.json() - raw = data.get("response", "").strip() - duration_sec = round(data.get("total_duration", 0) / 1e9, 1) - eval_tokens = data.get("eval_count", "?") # Ollama 推理 token 數 - logger.info( - f"[Hermes] 推理耗時 {duration_sec}s," - f"輸入 {len(items)} 筆,tokens={eval_tokens},回應長度 {len(raw)}" - ) - # 儲存統計供 footprint 使用(掛在 instance 上供 run() 讀取) - self._last_stats = {"duration_sec": duration_sec, "tokens": eval_tokens} + data = resp.json() + raw = data.get("response", "").strip() + duration_sec = round(data.get("total_duration", 0) / 1e9, 1) + eval_tokens_raw = data.get("eval_count", 0) # Ollama 推理 token 數 + prompt_tokens_raw = data.get("prompt_eval_count", 0) + _ctx.set_tokens(input=prompt_tokens_raw, output=eval_tokens_raw) + logger.info( + f"[Hermes] 推理耗時 {duration_sec}s," + f"輸入 {len(items)} 筆,tokens={eval_tokens_raw},回應長度 {len(raw)}" + ) + # 儲存統計供 footprint 使用(掛在 instance 上供 run() 讀取) + self._last_stats = { + "duration_sec": duration_sec, + "tokens": eval_tokens_raw, + "host": target_host, + "host_label": get_host_label(target_host) + } # P0-1 修復:剝除 Hermes 可能輸出的 markdown code fence if raw.startswith("```"): diff --git a/services/nemoton_dispatcher_service.py b/services/nemoton_dispatcher_service.py index e1d88e0..3c01a91 100644 --- a/services/nemoton_dispatcher_service.py +++ b/services/nemoton_dispatcher_service.py @@ -27,6 +27,7 @@ import requests from services.mcp_context_service import build_mcp_context from config import HERMES_URL # ADR-008 集中化:禁止硬編碼 IP +from services.ai_call_logger import log_ai_call # Operation Ollama-First v5.0 P1 logger = logging.getLogger(__name__) @@ -107,6 +108,17 @@ NIM_TIMEOUT = 60 # 秒 NIM_DAILY_LIMIT = 80 # 留 20 個給 AWOOOI,100/天免費配額 _nim_call_count = {"date": "", "count": 0} +# ── Operation Ollama-First v5.0 / Phase 3 / A9 ────────────────── +# GCP Ollama qwen3:14b 灰度切換開關 +# - 預設 false → 行為與戰前完全相同(仍走 NIM) +# - true → qwen3 主路徑,NIM 降為備援,最後仍兜底 Hermes 規則引擎(ADR-004) +# 模型選擇:A2 web-research 紅綠燈報告 docs/phase0_research_report_20260503.md +# 原戰役計畫 deepseek-r1:14b 的 Ollama tool_calls chat template 缺對應 jinja +# (GitHub Issue #10935 未解),改採 qwen3:14b(Ollama 官方 + qwenlm 雙確認 tools 支援)。 +NEMOTRON_OLLAMA_FIRST = os.getenv("NEMOTRON_OLLAMA_FIRST", "false").lower() == "true" +NEMOTRON_OLLAMA_MODEL = os.getenv("NEMOTRON_OLLAMA_MODEL", "qwen3:14b") +NEMOTRON_OLLAMA_TIMEOUT = int(os.getenv("NEMOTRON_OLLAMA_TIMEOUT", "180")) # 秒 + def _check_nim_quota() -> bool: today = datetime.now().strftime("%Y-%m-%d") @@ -320,6 +332,68 @@ ICON_AI = "🧠" ICON_FOOTPRINT = "⚙️" +# ── tool_calls 解析(NIM 與 qwen3 共用)────────────────────────── +def _parse_tool_calls_struct(tool_calls: list) -> list: + """從 OpenAI 格式的 tool_calls 結構陣列抽出 [{tool, args}] 清單。 + NIM 與 qwen3 (Ollama /api/chat) 兩邊回應對齊 OpenAI schema: + [{"function": {"name": ..., "arguments": }, ...}] + arguments 在 NIM 是 JSON 字串、在 Ollama 通常已是 dict;本 helper 兼容兩者。 + """ + results = [] + for tc in tool_calls or []: + fn = tc.get("function", {}) if isinstance(tc, dict) else {} + if not fn: + continue + raw_args = fn.get("arguments", {}) + if isinstance(raw_args, str): + try: + args = json.loads(raw_args) if raw_args.strip() else {} + except json.JSONDecodeError: + args = {} + elif isinstance(raw_args, dict): + args = raw_args + else: + args = {} + name = fn.get("name") + if name: + results.append({"tool": name, "args": args}) + return results + + +def _parse_content_fallback(raw_content: str) -> list: + """當模型沒回 tool_calls 結構、把工具呼叫塞進 content 時嘗試解析。 + 既有 NIM llama-3.1-8b 偶有此行為(line 537-554 原邏輯); + qwen3 開 thinking_mode=False 後較少見,但保留同等容錯。 + """ + if not raw_content or not isinstance(raw_content, str): + return [] + try: + parsed = json.loads(raw_content.strip()) + except Exception as parse_err: + logger.error(f"[ToolCalls] content fallback JSON 解析失敗:{parse_err}") + return [] + + if not isinstance(parsed, list): + return [] + + results = [] + for item in parsed: + if not isinstance(item, dict): + continue + name = item.get("name") or (item.get("function", {}) or {}).get("name") + args = item.get("parameters") or item.get("arguments") or {} + if isinstance(args, str): + try: + args = json.loads(args) + except json.JSONDecodeError: + args = {} + if name: + results.append({"tool": name, "args": args}) + if results: + logger.info(f"[ToolCalls] content fallback 解析成功,取得 {len(results)} 個 tool_calls") + return results + + def _build_footprint_json(hermes_stats: Optional[dict], nim_stats: Optional[dict]) -> dict: """ 建立結構化運算足跡 (用於 DB model_footprint JSONB 欄位) @@ -331,7 +405,8 @@ def _build_footprint_json(hermes_stats: Optional[dict], nim_stats: Optional[dict if hermes_stats: result["analyst"] = { "model": "qwen2.5:7b-instruct", - "host": HERMES_URL, # ADR-008:集中讀取,禁止硬編碼 IP + "host": hermes_stats.get("host", HERMES_URL), + "host_label": hermes_stats.get("host_label", "未知"), "duration_sec": hermes_stats.get("duration_sec", 0), "tokens": hermes_stats.get("tokens", 0), "cost_usd": 0, @@ -363,12 +438,13 @@ def _build_footprint_block(hermes_stats: Optional[dict], nim_stats: Optional[dic if hermes_stats: dur = hermes_stats.get("duration_sec", 0) tok = hermes_stats.get("tokens", "?") + label = hermes_stats.get("host_label", "本地 188") lines.append( - f"• 🔍 分析: Qwen2.5 7B (本地 188) | " + f"• 🔍 分析: Qwen2.5 7B ({label}) | " f"耗時: {dur:.1f}s | Tokens: {tok} | $0 成本" ) else: - lines.append("• 🔍 分析: Qwen2.5 7B (本地 188) | $0 成本") + lines.append("• 🔍 分析: Qwen2.5 7B (未知主機) | $0 成本") if nim_stats: tok = nim_stats.get("total_tokens", "?") @@ -464,81 +540,208 @@ class NemotronDispatcher: ] # P1-4 修復:NIM API 指數退避 retry(最多 3 次) + # Phase 1 v5.0: 包 ai_call_logger 追蹤 NIM 配額/tokens/錯誤 import time as _time last_err = None - for _attempt in range(3): - try: - resp = requests.post( - f"{NIM_BASE_URL}/chat/completions", - headers={ - "Authorization": f"Bearer {NIM_API_KEY}", - "Content-Type": "application/json", - }, - json={ - "model": NIM_MODEL, - "messages": messages, - "tools": TOOLS, - "tool_choice": "required", - "max_tokens": 2048, - }, - timeout=NIM_TIMEOUT, - ) - resp.raise_for_status() - break - except (requests.Timeout, requests.HTTPError) as e: - last_err = e - # ADR-004: 429 不重試,立即拋出讓上層啟動 Hermes 規則引擎降級 - if isinstance(e, requests.HTTPError) and e.response is not None \ - and e.response.status_code == 429: - logger.warning("[NIM] HTTP 429 速率限制,跳出 retry 迴圈") - raise - if _attempt < 2: - _time.sleep(2 ** _attempt) - logger.warning(f"[NIM] retry {_attempt + 1}/2 after {e}") - else: - raise last_err + with log_ai_call( + caller='nemotron_dispatch', + provider='nim', + model=NIM_MODEL, + meta={'threat_count': len(threats), 'quota_used': _nim_quota_used()}, + ) as _ctx: + for _attempt in range(3): + try: + resp = requests.post( + f"{NIM_BASE_URL}/chat/completions", + headers={ + "Authorization": f"Bearer {NIM_API_KEY}", + "Content-Type": "application/json", + }, + json={ + "model": NIM_MODEL, + "messages": messages, + "tools": TOOLS, + "tool_choice": "required", + "max_tokens": 2048, + }, + timeout=NIM_TIMEOUT, + ) + resp.raise_for_status() + break + except (requests.Timeout, requests.HTTPError) as e: + last_err = e + # ADR-004: 429 不重試,立即拋出讓上層啟動 Hermes 規則引擎降級 + if isinstance(e, requests.HTTPError) and e.response is not None \ + and e.response.status_code == 429: + logger.warning("[NIM] HTTP 429 速率限制,跳出 retry 迴圈") + _ctx.set_error(f"NIM 429 rate-limited") + _ctx.fallback_to_caller('hermes_rule_engine') + raise + if _attempt < 2: + _time.sleep(2 ** _attempt) + logger.warning(f"[NIM] retry {_attempt + 1}/2 after {e}") + else: + raise last_err - body = resp.json() - usage = body.get("usage", {}) - nim_stats = { - "total_tokens": usage.get("total_tokens", 0), - "quota_used": _nim_quota_used(), - } + body = resp.json() + usage = body.get("usage", {}) + # 記錄 token / 成本到 ai_calls 表 + _ctx.set_tokens( + input=usage.get("prompt_tokens", 0), + output=usage.get("completion_tokens", 0), + ) + nim_stats = { + "total_tokens": usage.get("total_tokens", 0), + "quota_used": _nim_quota_used(), + } choices = body.get("choices", []) - tool_calls = choices[0].get("message", {}).get("tool_calls", []) if choices else [] + message = choices[0].get("message", {}) if choices else {} + tool_calls = message.get("tool_calls", []) or [] - results = [] - for tc in tool_calls: - fn = tc.get("function", {}) - try: - args = json.loads(fn.get("arguments", "{}")) - except json.JSONDecodeError: - args = {} - results.append({"tool": fn.get("name"), "args": args}) + # 共用結構解析(NIM / qwen3 兩邊統一走同一條) + results = _parse_tool_calls_struct(tool_calls) if not results: # llama-3.1-8b-instruct 有時把 tool call 寫進 content 而非 tool_calls 結構 - raw_content = choices[0].get("message", {}).get("content", "") if choices else "" + raw_content = message.get("content", "") or "" logger.warning(f"[NIM] 0 tool_calls,嘗試從 content 解析:{raw_content[:120]}") - try: - parsed = json.loads(raw_content.strip()) - if isinstance(parsed, list): - for item in parsed: - name = item.get("name") or item.get("function", {}).get("name") - args = item.get("parameters") or item.get("arguments") or {} - if isinstance(args, str): - args = json.loads(args) - if name: - results.append({"tool": name, "args": args}) - if results: - logger.info(f"[NIM] content fallback 解析成功,取得 {len(results)} 個 tool_calls") - except Exception as parse_err: - logger.error(f"[NIM] content fallback 解析失敗:{parse_err}") + results = _parse_content_fallback(raw_content) logger.info(f"[NIM] 收到 {len(results)} 個 tool_calls | tokens={nim_stats['total_tokens']}") return results, nim_stats + # ────────────────────────────────────────────── + # GCP Ollama qwen3:14b Tool Calling(Operation Ollama-First v5.0 / Phase 3) + # ────────────────────────────────────────────── + def _call_qwen3_dispatch(self, threats: list) -> tuple: + """ + 將 Hermes 威脅清單交給 GCP Ollama qwen3:14b,取得 tool_calls 決策。 + + Why qwen3:14b(A2 web-research 結論,docs/phase0_research_report_20260503.md): + - Ollama registry 官方頁 + qwenlm.github.io 雙確認 tools capability 可用 + - 預設可關閉 thinking mode(避免 deepseek-r1 的 30s thinking 延遲) + - 14B 體積 9.3GB,與 deepseek-r1:14b 同級 + - 與 NIM 一致採 OpenAI 兼容 chat completion + tools schema + + Returns: + (list of {"tool": str, "args": dict}, dict ollama_stats) + ollama_stats: {"total_tokens": int, "host": str, "model": str} + """ + from services.ollama_service import resolve_ollama_host, mark_unhealthy + host = resolve_ollama_host().rstrip("/") + + threat_summary = json.dumps( + [ + { + "sku": t.sku, + "name": t.name, + "momo_price": t.momo_price, + "pchome_price": t.pchome_price, + "gap_pct": t.gap_pct, + "sales_delta": t.sales_7d_delta_pct, + "risk": t.risk, + "action": t.recommended_action, + "confidence": t.confidence, + } + for t in threats + ], + ensure_ascii=False, + ) + + # 注入 MCP 市場上下文(與 NIM 路徑一致) + mcp_ctx = build_mcp_context() + + # System prompt 與 NIM 完全一致(避免兩套維護) + system_prompt = ( + "你是台灣電商競價情報的行動派發器。" + f"當前市場背景 (MCP):\n{mcp_ctx}\n\n" + "根據 Hermes 分析師提供的威脅清單,決定對每支商品呼叫哪個工具。\n" + "路由鐵律(依序判斷,命中即停):\n" + "1. gap_pct < 5% 且 sales_delta < -30% → 非價格異常,呼叫 flag_for_human_review," + "concern 說明『價差接近 0 但銷量大幅下滑,疑似缺貨/下架/平台流量異常,請人工走查前台』。\n" + "2. gap_pct ≥ 5% 且 risk=HIGH → trigger_price_alert(填入 momo_price, comp_price)。\n" + "3. 我方價格低於競品且銷量正成長 → add_to_recommendation。\n" + "4. confidence < 0.6 或其他複雜情況 → flag_for_human_review。\n" + "每支商品只呼叫一個工具。\n" + "【語言鐵律 — 台灣標準正體中文(繁體)】所有文字欄位必須遵守:\n" + " 1. 嚴禁簡體字、嚴禁異體字(例:不可用「亊」,必須用「事」)\n" + " 2. 嚴禁短語重複(語意坍塌)、嚴禁無意義字元組合\n" + "若無法產出合理的繁體中文說明,直接輸出「請人工評估議價空間」。" + ) + + payload = { + "model": NEMOTRON_OLLAMA_MODEL, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": f"請處理以下 {len(threats)} 筆威脅清單:\n{threat_summary}"}, + ], + "tools": TOOLS, # 重用既有 NIM tools schema + "stream": False, + "options": { + "temperature": 0.2, + "num_predict": 2048, + }, + } + + with log_ai_call( + caller='nemotron_dispatch', + provider='gcp_ollama', + model=NEMOTRON_OLLAMA_MODEL, + request_id=f"nem-{int(time.time())}", + meta={ + 'flag': 'NEMOTRON_OLLAMA_FIRST', + 'threats_count': len(threats), + 'host': host, + }, + ) as ctx: + try: + resp = requests.post( + f"{host}/api/chat", + json=payload, + timeout=NEMOTRON_OLLAMA_TIMEOUT, + ) + resp.raise_for_status() + body = resp.json() + except Exception as e: + # 連線/HTTP 失敗 → 標記主機 unhealthy + log 錯誤後 re-raise,由 dispatch 走 NIM fallback + ctx.set_error(f"qwen3 call failed: {type(e).__name__}: {e}") + ctx.fallback_to_caller('nim') + mark_unhealthy(host) + raise + + ctx.set_tokens( + input=body.get('prompt_eval_count', 0), + output=body.get('eval_count', 0), + ) + + msg = body.get('message', {}) if isinstance(body, dict) else {} + tool_calls = msg.get('tool_calls', []) or [] + + # 走共用 tool_calls 結構解析(與 NIM 同一條 helper) + results = _parse_tool_calls_struct(tool_calls) + + if not results: + # qwen3 沒回 tool_calls → 走既有 content fallback 解析 + raw_content = msg.get('content', '') or '' + logger.warning( + f"[Dispatcher][qwen3] 0 tool_calls,嘗試從 content 解析:{raw_content[:120]}" + ) + results = _parse_content_fallback(raw_content) + + ollama_stats = { + "total_tokens": (body.get('prompt_eval_count', 0) or 0) + + (body.get('eval_count', 0) or 0), + "host": host, + "model": NEMOTRON_OLLAMA_MODEL, + } + + logger.info( + f"[Dispatcher][qwen3] 收到 {len(results)} 個 tool_calls | " + f"tokens={ollama_stats['total_tokens']} host={host}" + ) + return results, ollama_stats + # ────────────────────────────────────────────── # ADR-004:Hermes 規則引擎降級路由 # ────────────────────────────────────────────── @@ -1190,6 +1393,51 @@ class NemotronDispatcher: "nim_stats": {}, } + # ── Operation Ollama-First v5.0 / Phase 3 / A9:qwen3 主路徑(feature flag 灰度)── + # 預設 NEMOTRON_OLLAMA_FIRST=false 時不進入此分支,行為與戰前完全相同。 + # 若 qwen3 成功取得 tool_calls,沿用既有 TOOL_MAP 執行邏輯(共用 footprint/threat 注入)。 + # 若 qwen3 失敗或 0 tool_calls → 不直接降到 Hermes 規則,先嘗試 NIM 備援,再走 ADR-004。 + qwen3_used = False + qwen3_stats: Optional[dict] = None + qwen3_tool_calls: Optional[list] = None + if NEMOTRON_OLLAMA_FIRST: + try: + qwen3_tool_calls, qwen3_stats = self._call_qwen3_dispatch(nim_candidates) + if qwen3_tool_calls: + qwen3_used = True + logger.info( + f"[Dispatcher][qwen3] 主路徑成功 tool_calls={len(qwen3_tool_calls)} " + f"tokens={qwen3_stats.get('total_tokens', 0)}" + ) + else: + logger.warning("[Dispatcher][qwen3] 0 tool_calls,fallback 至 NIM") + except Exception as e: + logger.warning(f"[Dispatcher][qwen3] 呼叫失敗 fallback NIM: {e}") + # log_ai_call 已在 _call_qwen3_dispatch 內標記 status=error + fallback_to=nim + qwen3_tool_calls = None + qwen3_stats = None + + # qwen3 主路徑成功 → 直接進入工具執行區塊(跳過 NIM) + if qwen3_used: + tool_calls = qwen3_tool_calls + # 與既有 NIM 路徑一致的 stats 結構(footprint 顯示用) + nim_stats = { + "total_tokens": qwen3_stats.get("total_tokens", 0), + "quota_used": _nim_quota_used(), # 配額未動用 + "provider": "gcp_ollama", + "model": qwen3_stats.get("model", NEMOTRON_OLLAMA_MODEL), + } + return self._execute_tool_calls( + tool_calls=tool_calls, + threats=threats, + hermes_stats=hermes_stats, + nim_stats=nim_stats, + pre_dispatched=dispatched, + pre_skipped=skipped, + pre_errors=errors, + ) + + # ── 進入 NIM 路徑(flag=false 預設主路徑;flag=true 則為 qwen3 失敗備援)── if not NIM_API_KEY: logger.warning("[Dispatcher][ADR-004] NVIDIA_API_KEY 未設定,啟動 Hermes 規則引擎降級") fb = self._hermes_rule_fallback(nim_candidates, hermes_stats) @@ -1249,11 +1497,38 @@ class NemotronDispatcher: "nim_stats": fb["nim_stats"], } - # 建立運算足跡(Telegram 顯示文字 + DB 結構化 JSON,共用同一份) + return self._execute_tool_calls( + tool_calls=tool_calls, + threats=threats, + hermes_stats=hermes_stats, + nim_stats=nim_stats, + pre_dispatched=dispatched, + pre_skipped=skipped, + pre_errors=errors, + ) + + # ────────────────────────────────────────────── + # tool_calls 執行區塊(NIM 與 qwen3 共用) + # ────────────────────────────────────────────── + def _execute_tool_calls( + self, + tool_calls: list, + threats: list, + hermes_stats: Optional[dict], + nim_stats: dict, + pre_dispatched: int = 0, + pre_skipped: int = 0, + pre_errors: Optional[list] = None, + ) -> dict: + """執行 LLM 回傳的 tool_calls 清單,注入 Python 獨裁的客觀數字 + 金額影響。 + 被 NIM 路徑與 qwen3 路徑共用,避免雙路雙維護。 + """ + errors = list(pre_errors or []) + dispatched = pre_dispatched + footprint_text = _build_footprint_block(hermes_stats, nim_stats) footprint_data = _build_footprint_json(hermes_stats, nim_stats) - # 建立 SKU → threat 的查詢字典(供 add_to_recommendation 寫入快照) threat_map = {t.sku: t for t in threats} TOOL_MAP = { @@ -1266,20 +1541,15 @@ class NemotronDispatcher: for tc in tool_calls: tool_name = tc.get("tool") - args = tc.get("args", {}) + args = dict(tc.get("args", {}) or {}) handler = TOOL_MAP.get(tool_name) if not handler: errors.append(f"未知工具: {tool_name}") continue - # 注入通用參數:Telegram 文字 + DB JSON 足跡 args["footprint"] = footprint_text - # [2026-04-18 台北] Bug-1 防線一 保險:所有客觀數字強制由 Python 從 threat_map 注入, - # 覆蓋 LLM 可能回吐的幻覺數字(例如 $0)。Layer A Hermes 根治是主防線, - # 此處為二道屏障(萬一 ground_items 有漏網,或未來走 bypass) — Claude Opus 4.7 - # [2026-05-02 台北] B' 軌:金額影響量化亦走 Python 獨裁注入 — Claude Opus 4.7 t = threat_map.get(args.get("sku")) if tool_name == "trigger_price_alert" and t: args["momo_price"] = getattr(t, "momo_price", None) @@ -1302,7 +1572,6 @@ class NemotronDispatcher: args["threat"] = t elif tool_name == "route_to_km": args["threat"] = t - # mark_for_relearn 無需注入客觀數字(僅寫 DB) try: handler(**args) @@ -1311,11 +1580,13 @@ class NemotronDispatcher: errors.append(f"{tool_name}({args.get('sku', '?')}): {e}") logger.error(f"[Dispatcher] 工具執行失敗 [{tool_name}]: {e}") - skipped = len(threats) - dispatched + skipped = max(0, len(threats) - dispatched) + # nim_stats 在 qwen3 路徑下會帶 provider='gcp_ollama',log 出處可區辨 + provider = nim_stats.get("provider", "nim") if isinstance(nim_stats, dict) else "nim" logger.info( - f"[Dispatcher] 完成 forced_review={len(forced_review)} " + f"[Dispatcher] 完成 provider={provider} " f"dispatched={dispatched} skipped={skipped} " - f"errors={len(errors)} nim_tokens={nim_stats.get('total_tokens', 0)}" + f"errors={len(errors)} tokens={nim_stats.get('total_tokens', 0)}" ) return { "dispatched": dispatched, diff --git a/services/openclaw_strategist_service.py b/services/openclaw_strategist_service.py index cc1adc9..def17d5 100644 --- a/services/openclaw_strategist_service.py +++ b/services/openclaw_strategist_service.py @@ -9,7 +9,7 @@ OpenClaw 戰略分析師(Gemini 2.5 Flash) 提供: generate_weekly_strategy_report() — 週報(每週一 06:00) - generate_meta_analysis_report() — AI 系統效能自我審視(每 6 小時) + generate_meta_analysis_report() — AI 系統效能自我審視(每日 12:00, Phase 4 降頻) 分析維度: 1. 業績趨勢(MoM / WoW) @@ -24,6 +24,7 @@ OpenClaw 戰略分析師(Gemini 2.5 Flash) import json import logging import os +import uuid import requests from datetime import datetime, timedelta from typing import Any, Dict, List, Optional @@ -31,15 +32,110 @@ from typing import Any, Dict, List, Optional from database.manager import get_session from sqlalchemy import bindparam, text +from services.ai_call_logger import log_ai_call # Operation Ollama-First v5.0 P1 + logger = logging.getLogger(__name__) GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "") +# LOCKED-GEMINI: OpenClaw 週/月/年報需長 context (>30K tokens) + 繁中商業文體品質 +# Ollama 上限 32K context、繁中敘事力差距 >15%(ADR-028 鎖定場景 #2/#3/#4) +# 日報 (A8) 與 Q&A (A7) 已切 Hermes 模板/Qwen3,但敘事報告維持 Gemini STRATEGY_MODEL = os.getenv("OPENCLAW_MODEL", "gemini-2.5-flash") NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY", "") NVIDIA_NIM_URL = "https://integrate.api.nvidia.com/v1/chat/completions" NVIDIA_FALLBACK_MODEL = "meta/llama-3.3-70b-instruct" TAIPEI_TZ_OFFSET = 8 # UTC+8 +# ────────────────────────────────────────────────────────────────────────────── +# Operation Ollama-First v5.0 — Phase 3 feature flag (預設 OFF;統帥手動開灰度) +# - OPENCLAW_QA_OLLAMA_FIRST: false=維持戰前 Gemini-first 行為,true=走 Ollama 主、Gemini fallback +# - OPENCLAW_QA_OLLAMA_MODEL: GCP Ollama 上的模型 tag(A2 推薦 qwen3:14b,9.3GB) +# - OPENCLAW_QA_OLLAMA_HOST: 允許獨立指定 QA 用主機;未設則 fallback 到通用 OLLAMA_HOST_PRIMARY +# - OPENCLAW_QA_OLLAMA_TIMEOUT: 單次 Ollama 呼叫超時(秒),低品質判定後仍會升級 Gemini +# 任何 deploy 不開 flag → 行為與戰前完全相同(regression-safe)。 +# ────────────────────────────────────────────────────────────────────────────── + + +def _qa_ollama_first_enabled() -> bool: + """每次呼叫即時讀環境變數,允許 runtime toggle 灰度。""" + return os.getenv('OPENCLAW_QA_OLLAMA_FIRST', 'false').strip().lower() in ('true', '1', 'yes', 'on') + + +OPENCLAW_QA_OLLAMA_MODEL = os.getenv('OPENCLAW_QA_OLLAMA_MODEL', 'qwen3:14b') +OPENCLAW_QA_OLLAMA_HOST = os.getenv( + 'OPENCLAW_QA_OLLAMA_HOST', + os.getenv('OLLAMA_HOST_PRIMARY', 'http://34.143.170.20:11434'), +) +OPENCLAW_QA_OLLAMA_TIMEOUT = int(os.getenv('OPENCLAW_QA_OLLAMA_TIMEOUT', '60')) + +# 繁體中文強制 system prompt(A2 黃燈警訊「Qwen 繁中短板」緩解策略) +QWEN3_TC_SYSTEM_PROMPT = """你是 momo 電商情報分析師「OpenClaw」。 + +【硬性規則】 +1. 必須使用繁體中文(台灣用語),絕對禁止簡體字、大陸用語(例:寫「資料」不寫「数据」、寫「軟體」不寫「软件」) +2. 商品/品牌名稱保留原文不翻譯(如 momo / PChome / 蝦皮 / 全家) +3. 數字與貨幣保留原貌(NT$、%、件數、月份) +4. 若資料不足無法回答,明確說「資料不足,建議改問 ___」而非編造 + +【輸出風格】 +- 直接回答,不要「以下是分析」開場白 +- 結構化:用條列、表格、編號 +- 控制在 300 字以內,除非統帥明確要求展開 +""" + +# 簡體字偵測樣本:A2 報告警訊核心檢查項;列出商業中文情境最常被簡體污染的單字 +# 注意:避免列「於」「与」這類兩岸通用字;只取明確簡繁字差 +_SIMPLIFIED_HINT_CHARS = frozenset([ + # 商業/科技高頻簡繁差字(每字繁體對照於註解) + '设', # 設 + '当', # 當 + '点', # 點 + '问', # 問 + '获', # 獲 + '为', # 為 + '么', # 麼 + '资', # 資 + '产', # 產 + '业', # 業 + '务', # 務 + '说', # 說 + '听', # 聽 + '关', # 關 + '词', # 詞 + '这', # 這 + '过', # 過 + '让', # 讓 + '应', # 應 + '亿', # 億 + '请', # 請 + '观', # 觀 + '战', # 戰 + '体', # 體 + '价', # 價 + '场', # 場 + '动', # 動 + '号', # 號 + '团', # 團 + '类', # 類 + '广', # 廣 + '处', # 處 + '执', # 執 + '决', # 決 + '约', # 約 + '级', # 級 + '态', # 態 + '势', # 勢 + '运', # 運 + '营', # 營 +]) + +# 拒答訊號:模型表達「無法回答」即視為低品質 +_REFUSAL_PATTERNS = ( + '無法回答', '無法回覆', '我不知道', '我無從', "I cannot", + "I don't know", '抱歉,我無法', '抱歉,我無法', + '需要更多資訊', '需要更多信息', '無相關資料', +) + __all__ = [ "generate_daily_report", "generate_weekly_strategy_report", @@ -60,13 +156,39 @@ def generate_strategy_response(query: str, context: Optional[Dict[str, Any]] = N query: 使用者自然語言訊息(繁體中文) context: 可選,{"intent": str, "user_id": int, ...} Returns: - 繁體中文回覆字串。GEMINI_API_KEY 未設或呼叫失敗時,回降級訊息 - (永遠回字串,不拋例外,由呼叫端顯示於 Telegram)。 + 繁體中文回覆字串。所有 LLM 失敗時回降級訊息(永遠回字串、不拋例外)。 + + 路由(Operation Ollama-First v5.0 — Phase 3): + OPENCLAW_QA_OLLAMA_FIRST=false(預設)→ Gemini → NIM(戰前行為) + OPENCLAW_QA_OLLAMA_FIRST=true → GCP Ollama qwen3:14b → 品質檢測 → fallback Gemini → NIM """ q = (query or "").strip() if not q: return "請輸入您的問題,例如:本週業績趨勢、競品價差分析、產出週報 PPT。" + request_id = f"qa-{uuid.uuid4().hex[:8]}" + + # ── 灰度路徑:Ollama 優先(flag=true 才走,預設 OFF)── + if _qa_ollama_first_enabled(): + ollama_reply = _call_qwen3_qa(q, context, request_id) + if ollama_reply and not _is_low_quality_response(ollama_reply): + return ollama_reply + # 品質守門失敗或 Ollama 離線 → 降級 Gemini(fallback_to 已於 _call_qwen3_qa 內標記) + logger.info( + "[OpenClaw][QA] Ollama 主路徑未通過(無回應或低品質),fallback Gemini;request_id=%s", + request_id, + ) + + # ── 既有路徑:Gemini → NIM(A4 已接 ai_call_logger)── + return _legacy_gemini_first_qa(q, context, request_id=request_id) + + +def _legacy_gemini_first_qa( + q: str, + context: Optional[Dict[str, Any]], + request_id: Optional[str] = None, +) -> str: + """戰前 Gemini-first 路徑;抽出獨立函式以利 Phase 3 灰度與 regression test。""" system_prompt = ( "你是 MOMO Pro 電商情報策略師「OpenClaw」。以繁體中文(台灣用語)回覆使用者。" "嚴禁簡體字,嚴禁空洞套話。若使用者要求的資料需即時查詢," @@ -79,13 +201,13 @@ def generate_strategy_response(query: str, context: Optional[Dict[str, Any]] = N text_reply = None if GEMINI_API_KEY: try: - text_reply = _call_gemini(system_prompt, user_prompt, temperature=0.5) + text_reply = _call_gemini(system_prompt, user_prompt, temperature=0.5, caller="openclaw_qa") except Exception as e: logger.warning("[OpenClaw] Gemini 呼叫失敗,備援 NVIDIA NIM:%s", e) if not text_reply and NVIDIA_API_KEY: try: - text_reply = _call_nvidia_nim(system_prompt, user_prompt) + text_reply = _call_nvidia_nim(system_prompt, user_prompt, caller="openclaw_qa") except Exception as e: logger.error("[OpenClaw] NVIDIA NIM 備援也失敗:%s", e) @@ -97,6 +219,119 @@ def generate_strategy_response(query: str, context: Optional[Dict[str, Any]] = N return text_reply +# ────────────────────────────────────────────────────────────────────────────── +# Phase 3 — Ollama Q&A 路徑 + 品質守門 +# ────────────────────────────────────────────────────────────────────────────── + +def _call_qwen3_qa( + question: str, + context: Optional[Dict[str, Any]], + request_id: str, +) -> Optional[str]: + """呼叫 GCP Ollama 上的 qwen3:14b(或環境變數指定的模型)回答 Telegram QA。 + + 回傳 None 表示「呼叫失敗或回空」,呼叫端會自動 fallback Gemini。 + 本函式不負責品質判定(呼叫端用 `_is_low_quality_response` 判,避免邏輯耦合)。 + 全程包在 `log_ai_call` context manager;失敗時 set_error + fallback_to_caller。 + """ + user_prompt = ( + f"使用者問題:{question}\n" + f"上下文:{json.dumps(context or {}, ensure_ascii=False)}" + ) + url = f"{OPENCLAW_QA_OLLAMA_HOST.rstrip('/')}/api/generate" + payload = { + "model": OPENCLAW_QA_OLLAMA_MODEL, + "system": QWEN3_TC_SYSTEM_PROMPT, + "prompt": user_prompt, + "stream": False, + "options": { + "temperature": 0.5, + "num_predict": 1024, + }, + } + + with log_ai_call( + caller='openclaw_qa', + provider='gcp_ollama', + model=OPENCLAW_QA_OLLAMA_MODEL, + request_id=request_id, + meta={ + 'flag': 'OPENCLAW_QA_OLLAMA_FIRST', + 'host': OPENCLAW_QA_OLLAMA_HOST, + 'temperature': 0.5, + }, + ) as ctx: + try: + ctx.set_prompt_hash(user_prompt) + resp = requests.post(url, json=payload, timeout=OPENCLAW_QA_OLLAMA_TIMEOUT) + resp.raise_for_status() + body = resp.json() or {} + # Ollama /api/generate 回傳格式:{response, prompt_eval_count, eval_count, ...} + ctx.set_tokens( + input=body.get('prompt_eval_count', 0), + output=body.get('eval_count', 0), + ) + text_reply = (body.get('response') or '').strip() + if not text_reply: + ctx.set_error('empty_response') + ctx.fallback_to_caller('openclaw_qa_gemini_fallback') + return None + return text_reply + except Exception as e: + logger.warning( + "[OpenClaw][QA] qwen3 呼叫失敗 request_id=%s host=%s: %s", + request_id, OPENCLAW_QA_OLLAMA_HOST, e, + ) + ctx.set_error(f"{type(e).__name__}: {str(e)[:200]}") + ctx.fallback_to_caller('openclaw_qa_gemini_fallback') + return None + + +# 低品質判定常數:避免 magic number 散落於規則裡 +_QA_MIN_LENGTH = 50 # 規則 1:長度下限 +_QA_SIMPLIFIED_THRESHOLD = 3 # 規則 2:簡體字數量門檻 +_QA_FLOWING_TEXT_LENGTH = 200 # 規則 4:「200+ 字無斷行」流水帳判定 + + +def _is_low_quality_response(text: Optional[str]) -> bool: + """判斷 Ollama 回應品質低,需升級 Gemini。 + + 觸發條件(任一即視為低品質): + 1. 空字串或長度 < _QA_MIN_LENGTH 字元 + 2. 簡體字污染:>= _QA_SIMPLIFIED_THRESHOLD 個簡體 hint 字元(A2 黃燈警訊核心檢查) + 3. 拒答訊號:包含「無法回答」「我不知道」等模式 + 4. 結構性差:> _QA_FLOWING_TEXT_LENGTH 字但完全沒有換行(流水帳) + + Returns: + True → 低品質,呼叫端應 fallback Gemini + False → 可接受 + """ + if not text: + return True + stripped = text.strip() + if len(stripped) < _QA_MIN_LENGTH: + return True + + # 規則 2:簡體字污染(A2 警訊:Qwen 繁中短板) + simplified_count = sum(1 for c in stripped if c in _SIMPLIFIED_HINT_CHARS) + if simplified_count >= _QA_SIMPLIFIED_THRESHOLD: + logger.info("[OpenClaw][QA] 低品質:偵測 %d 個簡體字 hint", simplified_count) + return True + + # 規則 3:拒答訊號 + for pattern in _REFUSAL_PATTERNS: + if pattern in stripped: + logger.info("[OpenClaw][QA] 低品質:偵測拒答模式 '%s'", pattern) + return True + + # 規則 4:結構性 — 200+ 字無斷行 = 流水帳 + if len(stripped) > _QA_FLOWING_TEXT_LENGTH and stripped.count('\n') < 1: + logger.info("[OpenClaw][QA] 低品質:%d 字無斷行(流水帳)", len(stripped)) + return True + + return False + + # ═══════════════════════════════════════════════════════════════════════════════ # DB 數據讀取層 # ═══════════════════════════════════════════════════════════════════════════════ @@ -665,59 +900,110 @@ def _save_action_items(actions: List[str], source_insight_id: Optional[int]) -> # Gemini 呼叫層 # ═══════════════════════════════════════════════════════════════════════════════ -def _call_gemini(system_prompt: str, user_prompt: str, temperature: float = 0.4) -> Optional[str]: - """呼叫 Gemini,回傳文字;失敗回傳 None""" +def _call_gemini( + system_prompt: str, + user_prompt: str, + temperature: float = 0.4, + caller: str = "openclaw_qa", +) -> Optional[str]: + """呼叫 Gemini,回傳文字;失敗回傳 None。 + + Args: + caller: ai_calls.caller — 由外層 generate_*_report 傳入細分 caller + (openclaw_daily / openclaw_weekly / openclaw_monthly / openclaw_meta / openclaw_qa) + """ if not GEMINI_API_KEY: logger.warning("[OpenClaw] GEMINI_API_KEY 未設定") return None - try: - import google.generativeai as genai - genai.configure(api_key=GEMINI_API_KEY) - model = genai.GenerativeModel( - model_name=STRATEGY_MODEL, - generation_config=genai.types.GenerationConfig( - temperature=temperature, - max_output_tokens=4096, - ), - system_instruction=system_prompt, - ) - response = model.generate_content( - user_prompt, - request_options={"timeout": 180}, - ) - return response.text or "" - except Exception as e: - logger.error("[OpenClaw] Gemini 呼叫失敗: %s", e) - return None + with log_ai_call( + caller=caller, + provider='gemini', + model=STRATEGY_MODEL, + meta={'temperature': temperature}, + ) as _ctx: + try: + import google.generativeai as genai + genai.configure(api_key=GEMINI_API_KEY) + model = genai.GenerativeModel( + model_name=STRATEGY_MODEL, + generation_config=genai.types.GenerationConfig( + temperature=temperature, + max_output_tokens=4096, + ), + system_instruction=system_prompt, + ) + response = model.generate_content( + user_prompt, + request_options={"timeout": 180}, + ) + # Gemini SDK:response.usage_metadata.{prompt_token_count, candidates_token_count} + try: + usage = getattr(response, 'usage_metadata', None) + if usage is not None: + _ctx.set_tokens( + input=getattr(usage, 'prompt_token_count', 0) or 0, + output=getattr(usage, 'candidates_token_count', 0) or 0, + ) + except Exception: + pass + return response.text or "" + except Exception as e: + logger.error("[OpenClaw] Gemini 呼叫失敗: %s", e) + _ctx.set_error(f"{type(e).__name__}: {e}") + _ctx.fallback_to_caller(f"{caller}_nim") + return None -def _call_nvidia_nim(system_prompt: str, user_prompt: str, temperature: float = 0.5) -> Optional[str]: - """Gemini 離線時備援 NVIDIA NIM,回傳文字;失敗回傳 None""" +def _call_nvidia_nim( + system_prompt: str, + user_prompt: str, + temperature: float = 0.5, + caller: str = "openclaw_qa", +) -> Optional[str]: + """Gemini 離線時備援 NVIDIA NIM,回傳文字;失敗回傳 None。 + + Args: + caller: 由外層細分 caller,最終會以 ``{caller}_nim`` 紀錄到 ai_calls。 + """ if not NVIDIA_API_KEY: return None - try: - resp = requests.post( - NVIDIA_NIM_URL, - headers={ - "Authorization": f"Bearer {NVIDIA_API_KEY}", - "Content-Type": "application/json", - }, - json={ - "model": NVIDIA_FALLBACK_MODEL, - "messages": [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt}, - ], - "temperature": temperature, - "max_tokens": 1024, - }, - timeout=60, - ) - resp.raise_for_status() - return resp.json()["choices"][0]["message"]["content"] - except Exception as e: - logger.error("[OpenClaw] NVIDIA NIM 呼叫失敗: %s", e) - return None + nim_caller = f"{caller}_nim" + with log_ai_call( + caller=nim_caller, + provider='nim', + model=NVIDIA_FALLBACK_MODEL, + meta={'temperature': temperature}, + ) as _ctx: + try: + resp = requests.post( + NVIDIA_NIM_URL, + headers={ + "Authorization": f"Bearer {NVIDIA_API_KEY}", + "Content-Type": "application/json", + }, + json={ + "model": NVIDIA_FALLBACK_MODEL, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + "temperature": temperature, + "max_tokens": 1024, + }, + timeout=60, + ) + resp.raise_for_status() + body = resp.json() + usage = body.get("usage", {}) or {} + _ctx.set_tokens( + input=usage.get("prompt_tokens", 0), + output=usage.get("completion_tokens", 0), + ) + return body["choices"][0]["message"]["content"] + except Exception as e: + logger.error("[OpenClaw] NVIDIA NIM 呼叫失敗: %s", e) + _ctx.set_error(f"{type(e).__name__}: {e}") + return None # ═══════════════════════════════════════════════════════════════════════════════ @@ -752,6 +1038,63 @@ def _split_message(text: str, max_len: int = 3800) -> List[str]: return chunks +def _push_report_with_charts( + header: str, + body: str, + charts: List[tuple], + report_label: str, +) -> None: + """daily/monthly 圖文報告共用推播:有圖走 send_report_with_charts,無圖走 raw。 + + Operation Ollama-First v5.0 Phase 4 抽出(純結構重構,行為與原 inline 樣板一致): + 1. 三處原 inline 邏輯(_legacy_full_gemini_daily_report / generate_monthly_report / + _generate_daily_report_hermes_template)完全相同:header + "\\n\\n" + body, + 有 charts 用圖文 API,無則 raw。 + 2. 失敗只 log warning(非阻塞),與原行為一致。 + + Args: + header: telegram_templates 已渲染的 header 字串 + body: 報告主文(Gemini 生成) + charts: list of (filename, png_bytes, caption) tuples,可為空 list + report_label: log 訊息辨識用,例如 "日報" / "月報" / "日報(模板模式)" + """ + try: + if charts: + from services.telegram_templates import ( + send_report_with_charts, + _get_chat_ids, + ) + full_msg = header + "\n\n" + body + send_report_with_charts(full_msg, charts, _get_chat_ids()) + else: + from services.telegram_templates import _send_telegram_raw + _send_telegram_raw(header + "\n\n" + body) + except Exception as e: + logger.error("[OpenClaw] %s Telegram 推播失敗: %s", report_label, e) + + +def _collect_mcp_intel(label: str) -> Dict[str, Any]: + """weekly/monthly 共用 MCP 外部情報收集(純結構重構,無行為變更)。 + + Args: + label: log 訊息辨識用,例如 "週報" / "月報" + + Returns: + dict: ``{"mcp_data": {...}, "holiday_ctx": str, "seasonal_ctx": str}`` + 失敗時三欄位皆回空字串/空 dict(非阻塞,與原 inline 行為一致)。 + """ + try: + from services.mcp_collector_service import mcp_collector + return { + "mcp_data": mcp_collector.collect_all() or {}, + "holiday_ctx": mcp_collector.get_holiday_context() or "", + "seasonal_ctx": mcp_collector.get_seasonal_context() or "", + } + except Exception as e: + logger.warning("[OpenClaw] %s MCP 收集失敗(非阻塞): %s", label, e) + return {"mcp_data": {}, "holiday_ctx": "", "seasonal_ctx": ""} + + # ═══════════════════════════════════════════════════════════════════════════════ # 主要公開函式 # ═══════════════════════════════════════════════════════════════════════════════ @@ -890,16 +1233,10 @@ def generate_weekly_strategy_report( competitor_summary = _fetch_competitor_summary() # ── Step 2:MCP 外部情報 ───────────────────────────────────────────────── - mcp_data: Dict[str, str] = {} - try: - from services.mcp_collector_service import mcp_collector - mcp_data = mcp_collector.collect_all() - holiday_ctx = mcp_collector.get_holiday_context() - seasonal_ctx = mcp_collector.get_seasonal_context() - except Exception as e: - logger.warning("[OpenClaw] MCP 收集失敗(非阻塞): %s", e) - holiday_ctx = "" - seasonal_ctx = "" + _mcp = _collect_mcp_intel("週報") + mcp_data = _mcp["mcp_data"] + holiday_ctx = _mcp["holiday_ctx"] + seasonal_ctx = _mcp["seasonal_ctx"] # ── Step 3:組建 Gemini Prompt ─────────────────────────────────────────── system_prompt = """你是 OpenClaw,一位台灣頂尖電商戰略分析師,專精於 momo 購物平台。 @@ -1000,7 +1337,7 @@ TOP 威脅品項(近48h Hermes 偵測): # ── Step 4:Gemini 生成 ─────────────────────────────────────────────────── logger.info("[OpenClaw] 呼叫 Gemini %s 生成週報...", STRATEGY_MODEL) - report_content = _call_gemini(system_prompt, user_prompt, temperature=0.35) + report_content = _call_gemini(system_prompt, user_prompt, temperature=0.35, caller="openclaw_weekly") if not report_content: logger.error("[OpenClaw] Gemini 週報生成失敗") @@ -1090,9 +1427,42 @@ TOP 威脅品項(近48h Hermes 偵測): } +def _daily_hermes_template_enabled() -> bool: + """Operation Ollama-First v5.0 Phase 3 — Hermes 模板模式 feature flag. + + 每次呼叫即時讀取,允許 runtime toggle 灰度(不需重啟 scheduler)。 + 預設 false → 走 _legacy_full_gemini_daily_report(戰前行為,零 regression)。 + """ + return os.getenv('OPENCLAW_DAILY_HERMES_TEMPLATE', 'false').strip().lower() in ('true', '1', 'yes', 'on') + + def generate_daily_report() -> dict: """ - OpenClaw 電商日報(每日 09:00) + OpenClaw 電商日報(每日 09:00)— Operation Ollama-First v5.0 Phase 3 路由層。 + + 依 ``OPENCLAW_DAILY_HERMES_TEMPLATE`` 分流: + - false(預設):``_legacy_full_gemini_daily_report``,Gemini 全文寫稿(~28K tokens) + - true:``_generate_daily_report_hermes_template``,Hermes 算 KPI + 模板填充 + + Gemini 寫 200 字洞察(~8K tokens, -71%) + + 回傳合約兩條路徑一致:``{status, report_type, insight_id, period, ...}`` + cron 不需修改;ai_insights schema 不變(仍 type='daily_report')。 + """ + if _daily_hermes_template_enabled(): + try: + return _generate_daily_report_hermes_template() + except Exception as e: + # 模板模式異常 → 自動降級回 legacy,不讓 09:00 cron 整個掛掉 + logger.error( + "[OpenClaw] 日報 Hermes 模板模式異常,自動降級回 legacy: %s", e, + exc_info=True, + ) + return _legacy_full_gemini_daily_report() + return _legacy_full_gemini_daily_report() + + +def _legacy_full_gemini_daily_report() -> dict: + """OpenClaw 日報舊版(Gemini 全文)— Phase 3 前的原始實作,保留為 baseline 對照組。 流程: 1. 讀取昨日業績快照 + TOP 競品威脅 + 定價建議 @@ -1186,7 +1556,7 @@ def generate_daily_report() -> dict: # ── Step 3:Gemini 生成 ─────────────────────────────────────────────────── logger.info("[OpenClaw] 呼叫 Gemini 生成日報...") - report_content = _call_gemini(system_prompt, user_prompt, temperature=0.3) + report_content = _call_gemini(system_prompt, user_prompt, temperature=0.3, caller="openclaw_daily") if not report_content: logger.error("[OpenClaw] 日報 Gemini 呼叫失敗") @@ -1232,11 +1602,7 @@ def generate_daily_report() -> dict: # ── Step 6:Telegram 推播(圖文)──────────────────────────────────────── try: - from services.telegram_templates import ( - daily_report_header, - send_report_with_charts, - _get_chat_ids, - ) + from services.telegram_templates import daily_report_header header = daily_report_header( date_str=period, revenue=yesterday_sales.get("revenue", 0), @@ -1244,14 +1610,9 @@ def generate_daily_report() -> dict: threat_count=len(threats), opportunity_count=competitor_summary.get("premium_count", 0), ) - full_msg = header + "\n\n" + report_content - if charts: - send_report_with_charts(full_msg, charts, _get_chat_ids()) - else: - from services.telegram_templates import _send_telegram_raw - _send_telegram_raw(full_msg) + _push_report_with_charts(header, report_content, charts, "日報") except Exception as e: - logger.error("[OpenClaw] 日報 Telegram 推播失敗: %s", e) + logger.error("[OpenClaw] 日報 header 組裝失敗: %s", e) logger.info("[OpenClaw] 日報完成 insight_id=%s charts=%d", insight_id, len(charts)) return { @@ -1317,16 +1678,10 @@ def generate_monthly_report() -> dict: price_trend_data = _fetch_price_trend_summary(days_in_month) # ── Step 2:MCP 外部情報(月度版)─────────────────────────────────────── - mcp_data: Dict[str, str] = {} - try: - from services.mcp_collector_service import mcp_collector - mcp_data = mcp_collector.collect_all() - holiday_ctx = mcp_collector.get_holiday_context() - seasonal_ctx = mcp_collector.get_seasonal_context() - except Exception as e: - logger.warning("[OpenClaw] 月報 MCP 收集失敗(非阻塞): %s", e) - holiday_ctx = "" - seasonal_ctx = "" + _mcp = _collect_mcp_intel("月報") + mcp_data = _mcp["mcp_data"] + holiday_ctx = _mcp["holiday_ctx"] + seasonal_ctx = _mcp["seasonal_ctx"] # ── Step 3:組建 Gemini Prompt ─────────────────────────────────────────── system_prompt = """你是 OpenClaw 月報首席分析師,負責 momo 平台電商月度深度報告。 @@ -1413,7 +1768,7 @@ def generate_monthly_report() -> dict: # ── Step 4:Gemini 生成 ─────────────────────────────────────────────────── logger.info("[OpenClaw] 呼叫 Gemini 生成月報...") - report_content = _call_gemini(system_prompt, user_prompt, temperature=0.35) + report_content = _call_gemini(system_prompt, user_prompt, temperature=0.35, caller="openclaw_monthly") if not report_content: logger.error("[OpenClaw] 月報 Gemini 呼叫失敗") @@ -1466,11 +1821,7 @@ def generate_monthly_report() -> dict: # ── Step 7:Telegram 推播(圖文)──────────────────────────────────────── try: - from services.telegram_templates import ( - monthly_report_header, - send_report_with_charts, - _get_chat_ids, - ) + from services.telegram_templates import monthly_report_header top3 = [c.get("category", "N/A") for c in categories[:3]] or ["N/A"] header = monthly_report_header( month_str=period, @@ -1479,14 +1830,9 @@ def generate_monthly_report() -> dict: yoy=sales.get("yoy_pct", 0), top3_categories=top3, ) - full_msg = header + "\n\n" + report_content - if charts: - send_report_with_charts(full_msg, charts, _get_chat_ids()) - else: - from services.telegram_templates import _send_telegram_raw - _send_telegram_raw(full_msg) + _push_report_with_charts(header, report_content, charts, "月報") except Exception as e: - logger.error("[OpenClaw] 月報 Telegram 推播失敗: %s", e) + logger.error("[OpenClaw] 月報 header 組裝失敗: %s", e) logger.info("[OpenClaw] 月報完成 insight_id=%s charts=%d actions=%d", insight_id, len(charts), len(action_items)) @@ -1502,7 +1848,7 @@ def generate_monthly_report() -> dict: def generate_meta_analysis_report() -> str: """ - AI 系統效能自我審視(每 6 小時 run_openclaw_meta_analysis_task 呼叫) + AI 系統效能自我審視(每日 12:00 run_openclaw_meta_analysis_task 呼叫;Phase 4 降頻 6h → 24h) 分析 ai_insights 近期累積資料,評估: - 各 Agent 預測準確率 @@ -1607,7 +1953,7 @@ def generate_meta_analysis_report() -> str: """ # ── Gemini 生成 ────────────────────────────────────────────────────────── - report_content = _call_gemini(system_prompt, user_prompt, temperature=0.3) + report_content = _call_gemini(system_prompt, user_prompt, temperature=0.3, caller="openclaw_meta") if not report_content: logger.error("[OpenClaw] Meta-Analysis Gemini 呼叫失敗") return "(Meta-Analysis 生成失敗)" @@ -1829,3 +2175,503 @@ def _extract_action_items(report_text: str) -> List[str]: elif stripped.startswith("") and items: break return items[:8] + + +# ═══════════════════════════════════════════════════════════════════════════════ +# Operation Ollama-First v5.0 Phase 3 — Hermes 模板模式(A8 fullstack) +# +# 設計理念:日報 70% 是結構化 KPI(純 SQL 算),20% 是 Gemini 寫的洞察段落, +# 10% 是 Hermes 規則引擎產的行動清單。整體 token 從 ~28K → ~8K(-71%)。 +# +# Token 預算(單次日報): +# - Legacy 全文:system + user prompt ~3K,輸出 ~1.5K,含 raw KPI 嵌 prompt → ~28K 總用量 +# - Hermes 模板:KPI 已預算好,prompt 僅含「精簡 KPI 摘要 + 寫 200 字」 ~600 tokens prompt +# 輸出 ~400 tokens,總計 ~8K(含 ai_call_logger 的 meta + retry buffer) +# +# 規則: +# 1. 模板模式失敗 → 上層 generate_daily_report 自動降級回 legacy(不讓 cron 整個掛掉) +# 2. ai_insights schema 不變(仍 type='daily_report'),metadata_json 加 mode='hermes_template' 區分 +# 3. _call_gemini caller 用 'openclaw_daily_insight',方便 ai_calls 統計區分新舊 +# ═══════════════════════════════════════════════════════════════════════════════ + +# 上限常數 — 避免 magic number 散落 +DAILY_TOP_SKU_LIMIT = 10 +DAILY_PRICE_GAP_LIMIT = 5 +DAILY_INSIGHT_MAX_TOKENS = 400 +DAILY_INSIGHT_TIMEOUT_S = 60 + + +def _compute_daily_kpi(target_date) -> Dict[str, Any]: + """純 SQL + Hermes 規則計算當日所有結構化 KPI;不走 LLM。 + + Args: + target_date: ``datetime.date``,要分析的目標日(一般為昨日)。 + + Returns: + dict: 給 Jinja2 模板的完整渲染上下文,含: + revenue / orders / top_skus / price_gaps / + inventory_alerts / priority_actions + """ + from datetime import date as _date_cls + if hasattr(target_date, 'date') and not isinstance(target_date, _date_cls): + target_date = target_date.date() + if not isinstance(target_date, _date_cls): + raise TypeError(f"target_date 必須是 date,實得 {type(target_date)}") + + return { + 'revenue': _query_revenue_kpi(target_date), + 'orders': _query_orders_kpi(target_date), + 'top_skus': _query_top_skus(target_date, limit=DAILY_TOP_SKU_LIMIT), + 'price_gaps': _query_competitor_price_alerts(target_date, limit=DAILY_PRICE_GAP_LIMIT), + 'inventory_alerts': _query_inventory_anomalies(target_date), + 'priority_actions': _generate_priority_actions(target_date), + } + + +def _query_revenue_kpi(target_date) -> Dict[str, Any]: + """營收 KPI:今日 / 昨日 / 7日均 + 變化%。""" + session = get_session() + try: + row = session.execute(text(""" + SELECT + SUM(CASE WHEN snapshot_date::date = :d THEN + COALESCE("總業績"::numeric, 0) ELSE 0 END) AS today, + SUM(CASE WHEN snapshot_date::date = :d - 1 THEN + COALESCE("總業績"::numeric, 0) ELSE 0 END) AS yesterday, + SUM(CASE WHEN snapshot_date::date BETWEEN :d - 7 AND :d - 1 THEN + COALESCE("總業績"::numeric, 0) ELSE 0 END) / 7.0 AS avg_7d + FROM daily_sales_snapshot + WHERE snapshot_date::date BETWEEN :d - 7 AND :d + """), {"d": target_date}).fetchone() + + today = float((row[0] or 0) if row else 0) + yesterday = float((row[1] or 0) if row else 0) + avg_7d = float((row[2] or 0) if row else 0) + dod_pct = ((today - yesterday) / yesterday * 100.0) if yesterday else 0.0 + wow_pct = ((today - avg_7d) / avg_7d * 100.0) if avg_7d else 0.0 + + return { + "today": today, + "yesterday": yesterday, + "avg_7d": avg_7d, + "dod_pct": round(dod_pct, 1), + "wow_pct": round(wow_pct, 1), + } + except Exception as e: + logger.error("[OpenClaw] revenue KPI 讀取失敗: %s", e) + return {"today": 0.0, "yesterday": 0.0, "avg_7d": 0.0, "dod_pct": 0.0, "wow_pct": 0.0} + finally: + session.close() + + +def _query_orders_kpi(target_date) -> Dict[str, Any]: + """訂單數 / SKU 數 / 平均客單價 KPI。 + + Note: daily_sales_snapshot 並無 order_id 欄位,「訂單數」以 row 數近似 + (與 legacy `_fetch_yesterday_sales` 同義;保留行為一致性)。 + """ + session = get_session() + try: + row = session.execute(text(""" + SELECT + COUNT(*) AS today_rows, + COUNT(DISTINCT "商品ID") AS today_sku, + AVG(NULLIF(COALESCE("總業績"::numeric, 0), 0)) AS avg_value_today + FROM daily_sales_snapshot + WHERE snapshot_date::date = :d + """), {"d": target_date}).fetchone() + + prev = session.execute(text(""" + SELECT COUNT(*) FROM daily_sales_snapshot + WHERE snapshot_date::date = :d - 1 + """), {"d": target_date}).fetchone() + + today_rows = int((row[0] or 0) if row else 0) + today_sku = int((row[1] or 0) if row else 0) + avg_value = float((row[2] or 0) if row else 0) + yesterday_rows = int((prev[0] or 0) if prev else 0) + dod_pct = ((today_rows - yesterday_rows) / yesterday_rows * 100.0) if yesterday_rows else 0.0 + + return { + "today": today_rows, + "yesterday": yesterday_rows, + "sku_count": today_sku, + "avg_order_value": avg_value, + "dod_pct": round(dod_pct, 1), + } + except Exception as e: + logger.error("[OpenClaw] orders KPI 讀取失敗: %s", e) + return {"today": 0, "yesterday": 0, "sku_count": 0, "avg_order_value": 0.0, "dod_pct": 0.0} + finally: + session.close() + + +def _query_top_skus(target_date, limit: int = 10) -> List[Dict[str, Any]]: + """當日 TOP N 熱銷 SKU。""" + session = get_session() + try: + rows = session.execute(text(""" + SELECT + "商品ID" AS sku, + "商品名稱" AS name, + SUM(COALESCE("總業績"::numeric, 0)) AS revenue, + COUNT(*) AS qty + FROM daily_sales_snapshot + WHERE snapshot_date::date = :d + GROUP BY "商品ID", "商品名稱" + ORDER BY revenue DESC + LIMIT :lim + """), {"d": target_date, "lim": limit}).fetchall() + return [ + { + "sku": r[0] or "", + "name": (r[1] or "")[:60], + "revenue": float(r[2] or 0), + "qty": int(r[3] or 0), + } + for r in rows + ] + except Exception as e: + logger.error("[OpenClaw] top SKUs 讀取失敗: %s", e) + return [] + finally: + session.close() + + +def _query_competitor_price_alerts(target_date, limit: int = 5) -> List[Dict[str, Any]]: + """TOP N 競品價差警示(沿用 _fetch_top_threats 並補完 SKU 名稱)。""" + threats = _fetch_top_threats(limit) + if not threats: + return [] + + sku_codes = [t.get("sku") for t in threats if t.get("sku")] + name_map: Dict[str, str] = {} + if sku_codes: + session = get_session() + try: + rows = session.execute( + text(""" + SELECT i_code, name FROM products + WHERE i_code = ANY(:skus) + """).bindparams(bindparam("skus", expanding=True)), + {"skus": sku_codes}, + ).fetchall() + name_map = {r[0]: (r[1] or "")[:60] for r in rows} + except Exception as e: + logger.warning("[OpenClaw] 競品 SKU 名稱補完失敗(非阻塞): %s", e) + finally: + session.close() + + alerts: List[Dict[str, Any]] = [] + for t in threats: + sku = t.get("sku") or "" + gap = float(t.get("gap_pct") or 0) + alerts.append({ + "sku": sku, + "sku_name": name_map.get(sku, sku), + "competitor": "PChome/蝦皮", + "gap_pct": round(gap, 1), + "momo_price": t.get("momo_price"), + "competitor_price": t.get("pchome_price"), + "confidence": float(t.get("confidence") or 0), + }) + return alerts + + +def _query_inventory_anomalies(target_date) -> List[Dict[str, Any]]: + """庫存 / 退單異常品項(Hermes/NemoTron 寫入的 ai_insights 子類)。""" + session = get_session() + try: + rows = session.execute(text(""" + SELECT product_sku, content, insight_type, confidence, metadata_json + FROM ai_insights + WHERE insight_type IN ('inventory_alert', 'return_alert', 'stock_anomaly') + AND status = 'approved' + AND created_at >= NOW() - INTERVAL '24 hours' + ORDER BY confidence DESC + LIMIT 10 + """)).fetchall() + out: List[Dict[str, Any]] = [] + for r in rows: + meta = {} + try: + meta = json.loads(r[4]) if r[4] else {} + except Exception: + meta = {} + out.append({ + "sku": r[0] or "", + "summary": (r[1] or "")[:120], + "type": r[2] or "", + "confidence": float(r[3] or 0), + "extra": meta, + }) + return out + except Exception as e: + logger.warning("[OpenClaw] 庫存異常讀取失敗(非阻塞): %s", e) + return [] + finally: + session.close() + + +def _generate_priority_actions(target_date) -> List[str]: + """規則引擎產生 48h 優先事項(純規則,無 LLM)。""" + actions: List[str] = [] + + threats = _fetch_top_threats(limit=DAILY_PRICE_GAP_LIMIT) + for t in threats[:3]: + gap = float(t.get("gap_pct") or 0) + if abs(gap) >= 10.0: + actions.append( + f"⚠️ SKU {t.get('sku') or ''} 競品價差 {gap:+.1f}%,建議 48h 內檢視跟降" + ) + + recs = _fetch_top_recommendations(limit=3) + for r in recs: + actions.append( + f"💰 {(r.get('name') or r.get('sku') or '')[:40]}:{r.get('strategy') or '待覆核'}" + f"(信心 {float(r.get('confidence') or 0):.2f})" + ) + + invs = _query_inventory_anomalies(target_date) + for inv in invs[:2]: + actions.append(f"📦 SKU {inv.get('sku')}:{(inv.get('summary') or '')[:60]}") + + if not actions: + actions.append("✅ 今日無高優先警示,維持現有策略執行。") + + return actions[:8] + + +def _compute_gemini_insight(kpi: Dict[str, Any], period: str) -> str: + """給 Gemini 純結構化 KPI(已算好),請其寫 150-200 字繁中策略洞察。 + + Token 控制:精簡 prompt(~600 tokens)+ ``max_output_tokens=DAILY_INSIGHT_MAX_TOKENS``。 + Gemini 失敗時回降級訊息(不拋例外,模板仍可渲染)。 + """ + revenue = kpi.get("revenue") or {} + orders = kpi.get("orders") or {} + top_skus = kpi.get("top_skus") or [] + price_gaps = kpi.get("price_gaps") or [] + inv_alerts = kpi.get("inventory_alerts") or [] + + top_names = [s.get("name", "") for s in top_skus[:3]] + + system_prompt = ( + "你是 OpenClaw 日報洞察分析師。語言:繁體中文(台灣用語)。" + "嚴禁簡體字、嚴禁套話、嚴禁重複數字。聚焦『解讀』與『明日行動』。" + ) + user_prompt = ( + f"根據以下今日 ({period}) KPI(已計算),寫 150-200 字策略洞察:\n\n" + f"營收:NT${revenue.get('today', 0):,.0f}(DoD {revenue.get('dod_pct', 0):+.1f}%、" + f"vs 7日均 {revenue.get('wow_pct', 0):+.1f}%)\n" + f"訂單數:{orders.get('today', 0)}(DoD {orders.get('dod_pct', 0):+.1f}%)\n" + f"TOP 商品:{top_names}\n" + f"價差警示:{len(price_gaps)} 件\n" + f"庫存異常:{len(inv_alerts)} 件\n\n" + "請聚焦:(1) 今日最值得統帥注意的 1-2 件事;(2) 明日建議行動。" + "不要重複上面的數字本身,專注解讀。控制 200 字內。" + ) + + text_out = _call_gemini( + system_prompt, + user_prompt, + temperature=0.35, + caller="openclaw_daily_insight", + ) + if text_out: + return text_out.strip() + + direction = "上升" if revenue.get("dod_pct", 0) > 0 else "下滑" if revenue.get("dod_pct", 0) < 0 else "持平" + return ( + f"今日營收 NT${revenue.get('today', 0):,.0f},與昨日相較{direction} " + f"{revenue.get('dod_pct', 0):+.1f}%;vs 7日均 {revenue.get('wow_pct', 0):+.1f}%。" + f"TOP 商品集中在 {', '.join([n for n in top_names if n]) if top_names else '—'}。" + f"今有 {len(price_gaps)} 件競品價差警示與 {len(inv_alerts)} 件庫存異常待處理。" + "(AI 洞察生成暫時不可用,已回退至規則性摘要。)" + ) + + +def _render_daily_template_v2(context: Dict[str, Any]) -> str: + """以 Jinja2 渲染 daily_report_v2.j2;缺欄位優雅降級為「—」。 + + 將 Jinja Environment 集中於此,方便 unit test 直接呼叫無需 Flask app context。 + """ + from jinja2 import Environment, FileSystemLoader, Undefined + + template_dir = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + 'templates', + ) + + class _SafeUndefined(Undefined): + """缺欄位回 '—' 而非 raise UndefinedError,符合「優雅降級」需求。""" + def __str__(self) -> str: + return "—" + def __html__(self) -> str: + return "—" + def __getattr__(self, name: str): + return _SafeUndefined() + + env = Environment( + loader=FileSystemLoader(template_dir), + undefined=_SafeUndefined, + autoescape=False, # 日報純文字 + emoji,無 HTML 注入面 + trim_blocks=True, + lstrip_blocks=True, + ) + + def _fmt_currency(v: Any) -> str: + try: + # 容錯:Undefined / None / 空字串 → 降級為 — + if v is None or isinstance(v, Undefined) or v == "": + return "—" + return f"NT${float(v):,.0f}" + except (TypeError, ValueError): + return "—" + + def _fmt_pct(v: Any) -> str: + try: + if v is None or isinstance(v, Undefined) or v == "": + return "—" + return f"{float(v):+.1f}%" + except (TypeError, ValueError): + return "—" + + env.filters['format_currency'] = _fmt_currency + env.filters['format_pct'] = _fmt_pct + + template = env.get_template('daily_report_v2.j2') + return template.render(**context) + + +def _generate_daily_report_hermes_template() -> dict: + """Hermes 模板模式日報 — 結構化 KPI + 200 字 Gemini 洞察 + 規則行動清單。 + + 流程: + 1. 取昨日業績 stale gate(與 legacy 同邏輯) + 2. _compute_daily_kpi 純 SQL 算齊所有結構化 KPI + 3. _compute_gemini_insight 取 200 字洞察(精簡 prompt) + 4. Jinja2 渲染 daily_report_v2.j2 + 5. 持久化 ai_insights(type='daily_report',metadata.mode='hermes_template') + 6. Telegram 推播(沿用 legacy 圖表生成) + + 回傳合約與 legacy 完全一致。 + """ + now = datetime.now() + yesterday = now - timedelta(days=1) + period = yesterday.strftime("%Y年%m月%d日") + target_date = yesterday.date() + weekday_map = ['週一', '週二', '週三', '週四', '週五', '週六', '週日'] + weekday = weekday_map[target_date.weekday()] + logger.info("[OpenClaw] 日報任務啟動(Hermes 模板模式)period=%s", period) + + # ── Step 1:stale gate(與 legacy 對齊)───────────────────────────────── + sales = _fetch_sales_summary(7) + if sales.get("stale"): + last_date = str(sales.get("last_date")) + logger.warning( + "[OpenClaw] 日報任務跳過(模板模式):daily_sales_snapshot 已停更 last_date=%s period=%s", + last_date, period, + ) + _send_data_stale_alert(report_type="daily_report", last_date=last_date, period=period) + return { + "status": "skipped", + "report_type": "daily_report", + "reason": "data_stale", + "last_date": last_date, + "period": period, + } + + # ── Step 2:純 SQL 算 KPI ───────────────────────────────────────────── + kpi = _compute_daily_kpi(target_date) + + # ── Step 3:Gemini 寫 200 字洞察(caller 細分)────────────────────────── + insight_text = _compute_gemini_insight(kpi, period) + + # ── Step 4:Jinja2 渲染 ─────────────────────────────────────────────── + report_content = _render_daily_template_v2({ + "today": period, + "weekday": weekday, + "revenue": kpi["revenue"], + "orders": kpi["orders"], + "top_skus": kpi["top_skus"], + "price_gaps": kpi["price_gaps"], + "inventory_alerts": kpi["inventory_alerts"], + "priority_actions": kpi["priority_actions"], + "gemini_insight": insight_text, + }) + + # ── Step 5:圖表(沿用 legacy,非阻塞)───────────────────────────────── + charts: List[tuple] = [] + try: + from services.chart_generator_service import ( + revenue_trend_chart, + price_gap_bar_chart, + ) + rev_chart = revenue_trend_chart(7, "近7日") + if rev_chart: + charts.append(("revenue_7d.png", rev_chart, "📈 近7日營收趨勢")) + if kpi["price_gaps"]: + gap_chart = price_gap_bar_chart( + [{"sku": a["sku"], "gap_pct": a["gap_pct"]} for a in kpi["price_gaps"]], + "競品價差警示(TOP 5)", + ) + if gap_chart: + charts.append(("price_gap.png", gap_chart, "⚠️ 競品價差分析")) + except Exception as e: + logger.warning("[OpenClaw] 日報圖表生成失敗(非阻塞): %s", e) + + # ── Step 6:持久化 ────────────────────────────────────────────────────── + metadata = { + "period": period, + "model": STRATEGY_MODEL, + "mode": "hermes_template", # ← 區分新舊模式關鍵欄位 + "template_version": "daily_report_v2", + "today_revenue": kpi["revenue"].get("today", 0), + "dod_pct": kpi["revenue"].get("dod_pct", 0), + "wow_pct": kpi["revenue"].get("wow_pct", 0), + "top_sku_count": len(kpi["top_skus"]), + "price_gap_count": len(kpi["price_gaps"]), + "inventory_alert_count": len(kpi["inventory_alerts"]), + "priority_action_count": len(kpi["priority_actions"]), + "chart_count": len(charts), + "generated_at": now.isoformat(), + } + insight_id = _save_to_ai_insights( + insight_type="daily_report", + content=report_content, + confidence=0.85, + metadata=metadata, + period=target_date.strftime("%Y-%m-%d"), + ) + + action_items = list(kpi["priority_actions"]) + _save_action_items(action_items, insight_id) + + # ── Step 7:Telegram 推播 ────────────────────────────────────────────── + try: + from services.telegram_templates import daily_report_header + header = daily_report_header( + date_str=period, + revenue=kpi["revenue"].get("today", 0), + wow=kpi["revenue"].get("wow_pct", 0), + threat_count=len(kpi["price_gaps"]), + opportunity_count=0, + ) + _push_report_with_charts(header, report_content, charts, "日報(模板模式)") + except Exception as e: + logger.error("[OpenClaw] 日報 header 組裝失敗(模板模式): %s", e) + + logger.info( + "[OpenClaw] 日報完成(Hermes 模板模式)insight_id=%s charts=%d actions=%d", + insight_id, len(charts), len(action_items), + ) + return { + "status": "ok", + "report_type": "daily_report", + "insight_id": insight_id, + "period": period, + "chart_count": len(charts), + "action_count": len(action_items), + "mode": "hermes_template", + } diff --git a/templates/daily_report_v2.j2 b/templates/daily_report_v2.j2 new file mode 100644 index 0000000..65d4464 --- /dev/null +++ b/templates/daily_report_v2.j2 @@ -0,0 +1,63 @@ +📊 momo 日報 {{ today }} ({{ weekday }}) +═══════════════════════════════════════ + +## 📈 營收 KPI + +| 指標 | 今日 | vs 昨日 | vs 7日均 | +|------|------|---------|----------| +| 營收 | {{ revenue.today | format_currency }} | {{ revenue.dod_pct | format_pct }} | {{ revenue.wow_pct | format_pct }} | +| 訂單筆數 | {{ orders.today_rows | default('—') }} | — | — | +| 上架 SKU | {{ orders.today_sku | default('—') }} | — | — | +| 平均客單 | {{ orders.avg_value_today | format_currency }} | — | — | + +{% if revenue.today == 0 %} +⚠️ 今日營收為零,請檢查資料管線是否正常。 +{% endif %} + +## 🏆 TOP {{ top_skus | length }} 熱銷商品 + +{% if top_skus %} +{% for sku in top_skus %} +{{ loop.index }}. **{{ sku.name | default('—') }}** + 數量:{{ sku.qty | default('—') }} 件 | 營收:{{ sku.revenue | format_currency }} +{% endfor %} +{% else %} +(今日無熱銷資料) +{% endif %} + +## 🔍 競品價差警示 + +{% if price_gaps %} +{% for alert in price_gaps %} +- ⚠️ **{{ alert.sku_name | default(alert.sku) | default('—') }}**:我方 {{ alert.momo_price | format_currency }} vs {{ alert.competitor | default('競品') }} {{ alert.comp_price | format_currency }} (價差 {{ alert.gap_pct | format_pct }}) +{% endfor %} +{% else %} +✅ 暫無重大價差警示 +{% endif %} + +## 📦 庫存異常 + +{% if inventory_alerts %} +{% for alert in inventory_alerts %} +- 🟡 {{ alert.sku_name | default(alert.sku) | default('—') }}:{{ alert.reason | default('—') }} +{% endfor %} +{% else %} +✅ 庫存狀態正常 +{% endif %} + +## 💡 今日洞察 (AI 分析) + +{{ gemini_insight | default('(本日洞察生成失敗,請查 logger)') }} + +## ✅ 48h 優先事項 + +{% if priority_actions %} +{% for action in priority_actions %} +{{ loop.index }}. {{ action }} +{% endfor %} +{% else %} +(暫無自動產生的行動建議) +{% endif %} + +═══════════════════════════════════════ +🤖 Operation Ollama-First v5.0 / daily_report_v2 (Hermes 模板模式) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..bd86e3b --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,15 @@ +import os + +import pytest + + +@pytest.fixture +def host(): + """Provide a default SMTP/IMAP host for non-parametric email probe tests.""" + return os.getenv("MOOD_TEST_MAIL_HOST", "ms1.pchome.tw") + + +@pytest.fixture +def port(): + """Provide a default SMTP/IMAP port used by probe tests.""" + return int(os.getenv("MOOD_TEST_MAIL_PORT", "587")) diff --git a/tests/test_nemotron_qwen3_compat.py b/tests/test_nemotron_qwen3_compat.py new file mode 100644 index 0000000..9c7dad3 --- /dev/null +++ b/tests/test_nemotron_qwen3_compat.py @@ -0,0 +1,456 @@ +""" +test_nemotron_qwen3_compat.py +───────────────────────────────────────────────────────────────── +Operation Ollama-First v5.0 / Phase 3 / A9 — Nemotron qwen3 切換相容性測試 + +驗證面: + T1. qwen3 chat 回應 OpenAI tool_calls 結構 → _parse_tool_calls_struct 正確 + T2. qwen3 沒回 tool_calls → _parse_content_fallback 正確(與 NIM 同等容錯) + T3. qwen3 同時回 tool_calls + content → 優先採用 tool_calls + T4. qwen3 連線失敗 → 不丟例外給上游,自動 fallback NIM 路徑 + T5. qwen3 + NIM 都失敗 → ADR-004 走 Hermes 規則引擎降級(含「🟡 [規則引擎]」標記) + T6. NEMOTRON_OLLAMA_FIRST 預設 false → 完全不呼叫 qwen3(戰前行為) + +紀律: + - 所有 HTTP 互動 mock,不實際呼叫 GCP Ollama 或 NIM + - 與 test_nemotron_fallback 共存,使用同款 FakeThreat + - assert log_ai_call 路徑可被 monkeypatch 旁路(不污染 ai_calls 表) +""" +from dataclasses import dataclass +from contextlib import contextmanager +import pytest + + +# ───────────────────────────────────────────────────────────── +# Fixtures +# ───────────────────────────────────────────────────────────── +@dataclass +class FakeThreat: + sku: str = "SKU-Q1" + name: str = "qwen3 測試品" + momo_price: float = 1200.0 + pchome_price: float = 980.0 + gap_pct: float = 22.4 + sales_7d_delta_pct: float = -35.0 + risk: str = "HIGH" + recommended_action: str = "建議跟進降價" + confidence: float = 0.85 + sales_7d_curr_amount: float = 78000.0 + sales_7d_prev_amount: float = 120000.0 + + +class _FakeResp: + def __init__(self, payload: dict, status: int = 200): + self._payload = payload + self.status_code = status + + def raise_for_status(self): + if self.status_code >= 400: + import requests + raise requests.HTTPError(f"HTTP {self.status_code}", response=self) + + def json(self): + return self._payload + + +@contextmanager +def _noop_log_ai_call(*args, **kwargs): + """Mock log_ai_call context manager — 不寫 ai_calls 表,回傳具備所需 setter 的 stub""" + class _Ctx: + def set_tokens(self, **_kw): pass + def set_error(self, *_a, **_kw): pass + def fallback_to_caller(self, *_a, **_kw): pass + def set_cache_hit(self, *_a, **_kw): pass + def add_meta(self, *_a, **_kw): pass + yield _Ctx() + + +@pytest.fixture(autouse=True) +def _reset_global_state(): + """test 互相污染防線:每個 test 前後清 _ALERT_CACHE + ollama unhealthy marks。 + + 根因:dispatch() line 1328 _is_duplicate_alert 用 module-level _ALERT_CACHE, + 第一個 test 跑完留 "SKU-Q1" 在 cache,後續 test 命中去重 → dispatched=0 失敗。 + """ + import services.nemoton_dispatcher_service as _nem + import services.ollama_service as _oss + _nem._ALERT_CACHE.clear() + _oss._unhealthy_marks.clear() + _oss._resolved_host_cache['host'] = None + _oss._resolved_host_cache['ts'] = 0 + yield + _nem._ALERT_CACHE.clear() + _oss._unhealthy_marks.clear() + _oss._resolved_host_cache['host'] = None + _oss._resolved_host_cache['ts'] = 0 + + +def _patch_execution_methods(monkeypatch, dispatcher): + """攔截實際 Telegram/DB 寫入,記錄被呼叫的 tool 名稱與 args(與 fallback test 共用 pattern)""" + calls = [] + + def record(kind): + def _inner(*args, **kwargs): + calls.append({"kind": kind, "args": args, "kwargs": kwargs}) + return _inner + + monkeypatch.setattr(dispatcher, "_exec_trigger_price_alert", record("price_alert")) + monkeypatch.setattr(dispatcher, "_exec_add_to_recommendation", record("recommendation")) + monkeypatch.setattr(dispatcher, "_exec_flag_for_human_review", record("human_review")) + return calls + + +def _enable_qwen3_path(monkeypatch, module): + """打開 NEMOTRON_OLLAMA_FIRST + 旁路 mcp/log_ai_call/resolve_host 等副作用""" + monkeypatch.setattr(module, "NEMOTRON_OLLAMA_FIRST", True) + monkeypatch.setattr(module, "log_ai_call", _noop_log_ai_call) + monkeypatch.setattr(module, "build_mcp_context", lambda: "MCP-MOCK") + # 確保即使未被呼叫,import 路徑可解析 + import services.ollama_service as ollama_module + monkeypatch.setattr(ollama_module, "resolve_ollama_host", lambda: "http://gcp-mock:11434") + monkeypatch.setattr(ollama_module, "mark_unhealthy", lambda *a, **kw: None) + + +# ───────────────────────────────────────────────────────────── +# T1. qwen3 OpenAI tool_calls 結構 → 正確解析 +# ───────────────────────────────────────────────────────────── +def test_qwen3_tool_calls_struct_parsed_and_executed(monkeypatch): + """qwen3 回標準 OpenAI tool_calls 結構,dispatcher 應跳過 NIM 直接走工具執行""" + import services.nemoton_dispatcher_service as module + + _enable_qwen3_path(monkeypatch, module) + + # mock GCP Ollama /api/chat 回 OpenAI 兼容結構 + fake_body = { + "message": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "function": { + "name": "trigger_price_alert", + "arguments": { + "sku": "SKU-Q1", + "name": "qwen3 測試品", + "gap_pct": 22.4, + "sales_delta": -35.0, + "action": "跟進降價至 $980", + "confidence": 0.85, + }, + } + } + ], + }, + "prompt_eval_count": 320, + "eval_count": 64, + "done": True, + } + monkeypatch.setattr( + module.requests, "post", lambda *a, **kw: _FakeResp(fake_body) + ) + + dispatcher = module.NemotronDispatcher() + calls = _patch_execution_methods(monkeypatch, dispatcher) + # NIM 路徑必須完全沒被觸發(驗證 qwen3 確實是主路徑) + nim_called = {"v": False} + + def _nim_should_not_be_called(*a, **kw): + nim_called["v"] = True + raise AssertionError("NIM 不應被呼叫,qwen3 已成功") + + monkeypatch.setattr(dispatcher, "_call_nim", _nim_should_not_be_called) + + threats = [FakeThreat()] + result = dispatcher.dispatch(threats, hermes_stats={"duration_sec": 1.0}) + + assert nim_called["v"] is False, "qwen3 成功時 NIM 不可被觸發" + assert result["dispatched"] == 1 + assert result["nim_stats"].get("provider") == "gcp_ollama" + assert result["nim_stats"].get("model") == module.NEMOTRON_OLLAMA_MODEL + assert calls and calls[0]["kind"] == "price_alert" + + +# ───────────────────────────────────────────────────────────── +# T2. qwen3 沒回 tool_calls 但 content 含 JSON list → fallback 解析 +# ───────────────────────────────────────────────────────────── +def test_qwen3_content_only_fallback_parsing(monkeypatch): + """qwen3 把工具呼叫塞在 content(list[dict])→ _parse_content_fallback 應接住""" + import services.nemoton_dispatcher_service as module + + _enable_qwen3_path(monkeypatch, module) + + content_payload = ( + '[{"name": "flag_for_human_review", ' + '"parameters": {"sku": "SKU-Q1", "name": "qwen3 測試品", ' + '"concern": "信心不足", "confidence": 0.45}}]' + ) + fake_body = { + "message": {"role": "assistant", "content": content_payload, "tool_calls": []}, + "prompt_eval_count": 100, + "eval_count": 30, + } + monkeypatch.setattr(module.requests, "post", lambda *a, **kw: _FakeResp(fake_body)) + + dispatcher = module.NemotronDispatcher() + calls = _patch_execution_methods(monkeypatch, dispatcher) + monkeypatch.setattr( + dispatcher, "_call_nim", + lambda threats: (_ for _ in ()).throw(AssertionError("NIM 不應被呼叫")), + ) + + result = dispatcher.dispatch([FakeThreat(confidence=0.45)], hermes_stats={"duration_sec": 1.0}) + + assert result["dispatched"] == 1 + assert calls and calls[0]["kind"] == "human_review" + + +# ───────────────────────────────────────────────────────────── +# T3. tool_calls + content 同時存在 → 優先 tool_calls +# ───────────────────────────────────────────────────────────── +def test_qwen3_tool_calls_takes_precedence_over_content(monkeypatch): + import services.nemoton_dispatcher_service as module + + _enable_qwen3_path(monkeypatch, module) + + fake_body = { + "message": { + "role": "assistant", + "content": '[{"name": "flag_for_human_review", "parameters": {"sku": "X"}}]', + "tool_calls": [ + { + "function": { + "name": "trigger_price_alert", + "arguments": { + "sku": "SKU-Q1", + "name": "qwen3 測試品", + "gap_pct": 22.4, + "sales_delta": -35.0, + "action": "降價", + "confidence": 0.85, + }, + } + } + ], + }, + "prompt_eval_count": 200, + "eval_count": 40, + } + monkeypatch.setattr(module.requests, "post", lambda *a, **kw: _FakeResp(fake_body)) + + dispatcher = module.NemotronDispatcher() + calls = _patch_execution_methods(monkeypatch, dispatcher) + result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0}) + + assert result["dispatched"] == 1 + assert calls[0]["kind"] == "price_alert", "tool_calls 結構必須優先於 content fallback" + + +# ───────────────────────────────────────────────────────────── +# T4. qwen3 連線失敗 → 不爆,自動 fallback 到 NIM +# ───────────────────────────────────────────────────────────── +def test_qwen3_connection_error_falls_back_to_nim(monkeypatch): + """GCP Ollama 連不上時,dispatcher 應靜默改走 NIM,最終仍能 dispatch""" + import requests + import services.nemoton_dispatcher_service as module + + _enable_qwen3_path(monkeypatch, module) + + def _boom(*a, **kw): + raise requests.ConnectionError("GCP unreachable") + + monkeypatch.setattr(module.requests, "post", _boom) + + # NIM 路徑:給 valid key + quota,且 mock _call_nim 回傳 1 個 tool_call + monkeypatch.setattr(module, "NIM_API_KEY", "fake-key") + monkeypatch.setattr(module, "_check_nim_quota", lambda: True) + + dispatcher = module.NemotronDispatcher() + calls = _patch_execution_methods(monkeypatch, dispatcher) + nim_invoked = {"v": False} + + def _fake_nim(threats): + nim_invoked["v"] = True + return ( + [{ + "tool": "trigger_price_alert", + "args": { + "sku": "SKU-Q1", "name": "qwen3 測試品", + "gap_pct": 22.4, "sales_delta": -35.0, + "action": "降價", "confidence": 0.85, + }, + }], + {"total_tokens": 256, "quota_used": 5}, + ) + + monkeypatch.setattr(dispatcher, "_call_nim", _fake_nim) + + result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0}) + + assert nim_invoked["v"] is True, "qwen3 失敗後必須 fallback 至 NIM" + assert result["dispatched"] == 1 + assert result["nim_stats"].get("total_tokens") == 256 + assert calls[0]["kind"] == "price_alert" + + +# ───────────────────────────────────────────────────────────── +# T5. qwen3 + NIM 全失敗 → ADR-004 Hermes 規則引擎兜底 +# ───────────────────────────────────────────────────────────── +def test_qwen3_and_nim_both_fail_falls_back_to_hermes_rules(monkeypatch): + """雙路全爆時必須走 Hermes 規則引擎,並保留 🟡 [規則引擎] 標記""" + import requests + import services.nemoton_dispatcher_service as module + + _enable_qwen3_path(monkeypatch, module) + monkeypatch.setattr(module.requests, "post", + lambda *a, **kw: (_ for _ in ()).throw(requests.ConnectionError("qwen3 down"))) + + monkeypatch.setattr(module, "NIM_API_KEY", "fake-key") + monkeypatch.setattr(module, "_check_nim_quota", lambda: True) + + dispatcher = module.NemotronDispatcher() + + # 攔 _call_nim 也擲 timeout + monkeypatch.setattr( + dispatcher, "_call_nim", + lambda threats: (_ for _ in ()).throw(requests.Timeout("NIM timeout")), + ) + + # 攔住規則引擎內部呼叫的 _exec_*,記錄 concern / reason 文字驗證 🟡 標記 + # 規則引擎部分 _exec_* 用 positional args(line 787-795 _exec_trigger_price_alert + # 簽名: sku, name, gap_pct, sales_delta, action, confidence, ...), + # record helper 必須把 positional 與 keyword 合併才能 .get('action')。 + captured = [] + + def _merge_positional(name_order, args, kwargs): + merged = dict(kwargs) + for i, val in enumerate(args): + if i < len(name_order): + merged.setdefault(name_order[i], val) + return merged + + def record_review(*args, **kwargs): + merged = _merge_positional( + ['sku', 'name', 'concern', 'confidence', 'footprint', + 'momo_price', 'comp_price', 'gap_pct', 'sales_delta', + 'revenue_loss_7d', 'recommended_price'], + args, kwargs) + captured.append(("human_review", merged)) + + def record_alert(*args, **kwargs): + merged = _merge_positional( + ['sku', 'name', 'gap_pct', 'sales_delta', 'action', 'confidence', + 'momo_price', 'comp_price', 'footprint', + 'revenue_loss_7d', 'recommended_price'], + args, kwargs) + captured.append(("price_alert", merged)) + + def record_reco(*args, **kwargs): + captured.append(("recommendation", kwargs)) + + monkeypatch.setattr(dispatcher, "_exec_flag_for_human_review", record_review) + monkeypatch.setattr(dispatcher, "_exec_trigger_price_alert", record_alert) + monkeypatch.setattr(dispatcher, "_exec_add_to_recommendation", record_reco) + + # gap_pct=22.4 + risk=HIGH → 規則 2:trigger_price_alert,action 應有 🟡 [規則引擎] 前綴 + result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0}) + + assert result["nim_stats"].get("degraded") is True, "ADR-004 降級旗標必須存在" + assert captured, "規則引擎必須兜底執行至少一次" + kind, kwargs = captured[0] + assert kind == "price_alert" + assert "🟡 [規則引擎]" in kwargs.get("action", ""), \ + "ADR-004 鐵律:Hermes 規則引擎兜底時必須帶『🟡 [規則引擎]』標記" + # footprint 也應帶 🟡 [降級模式 ADR-004] 標記(給 Telegram 告警頭顯示) + assert "🟡 [降級模式 ADR-004]" in kwargs.get("footprint", "") + + +# ───────────────────────────────────────────────────────────── +# T6. feature flag 預設 false → 戰前行為,qwen3 完全不被呼叫 +# ───────────────────────────────────────────────────────────── +def test_flag_default_false_preserves_pre_war_behavior(monkeypatch): + """NEMOTRON_OLLAMA_FIRST 預設 false 時:dispatch 不應觸碰 GCP Ollama, + nim_stats 不可帶 provider='gcp_ollama'。""" + import services.nemoton_dispatcher_service as module + + # 不打開 flag(預設值),但安全起見明確 set false + monkeypatch.setattr(module, "NEMOTRON_OLLAMA_FIRST", False) + + # 任何呼叫 requests.post 都視為錯誤(戰前 NIM 路徑會被 _call_nim mock 接走) + qwen3_post_called = {"v": False} + + def _maybe_post(*a, **kw): + qwen3_post_called["v"] = True + raise AssertionError("flag=false 時不可呼叫 GCP Ollama HTTP") + + monkeypatch.setattr(module.requests, "post", _maybe_post) + + monkeypatch.setattr(module, "NIM_API_KEY", "fake-key") + monkeypatch.setattr(module, "_check_nim_quota", lambda: True) + + dispatcher = module.NemotronDispatcher() + calls = _patch_execution_methods(monkeypatch, dispatcher) + monkeypatch.setattr( + dispatcher, "_call_nim", + lambda threats: ( + [{ + "tool": "trigger_price_alert", + "args": { + "sku": "SKU-Q1", "name": "qwen3 測試品", + "gap_pct": 22.4, "sales_delta": -35.0, + "action": "降價", "confidence": 0.85, + }, + }], + {"total_tokens": 100, "quota_used": 1}, + ), + ) + + result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0}) + + assert qwen3_post_called["v"] is False + assert result["dispatched"] == 1 + assert result["nim_stats"].get("provider") in (None, "nim"), \ + "flag=false 時 nim_stats 不應帶 provider='gcp_ollama'" + + +# ───────────────────────────────────────────────────────────── +# T7. 共用 helper 純單元測試(OpenAI tool_calls schema 邊界) +# ───────────────────────────────────────────────────────────── +def test_parse_tool_calls_struct_handles_string_arguments(): + """NIM 回 arguments 是 JSON 字串、qwen3 回 dict — 兩者都得接住""" + from services.nemoton_dispatcher_service import _parse_tool_calls_struct + + # NIM 風格(arguments 是 JSON 字串) + nim_style = [{"function": {"name": "foo", "arguments": '{"a": 1, "b": "x"}'}}] + out_nim = _parse_tool_calls_struct(nim_style) + assert out_nim == [{"tool": "foo", "args": {"a": 1, "b": "x"}}] + + # qwen3/Ollama 風格(arguments 已是 dict) + qwen_style = [{"function": {"name": "bar", "arguments": {"a": 2}}}] + out_qwen = _parse_tool_calls_struct(qwen_style) + assert out_qwen == [{"tool": "bar", "args": {"a": 2}}] + + # 邊界:空 / 壞 JSON / 缺 name → 不爆,回空或忽略 + assert _parse_tool_calls_struct([]) == [] + assert _parse_tool_calls_struct(None) == [] + bad = [{"function": {"name": "baz", "arguments": "{not json"}}] + out_bad = _parse_tool_calls_struct(bad) + assert out_bad == [{"tool": "baz", "args": {}}] + no_name = [{"function": {"arguments": "{}"}}] + assert _parse_tool_calls_struct(no_name) == [] + + +def test_parse_content_fallback_handles_various_shapes(): + from services.nemoton_dispatcher_service import _parse_content_fallback + + # OpenAI 老風格 [{"name", "parameters"}] + out1 = _parse_content_fallback('[{"name": "foo", "parameters": {"a": 1}}]') + assert out1 == [{"tool": "foo", "args": {"a": 1}}] + + # 帶 function 嵌套 + out2 = _parse_content_fallback('[{"function": {"name": "bar"}, "arguments": "{\\"b\\": 2}"}]') + assert out2 == [{"tool": "bar", "args": {"b": 2}}] + + # 非 list / 非 JSON / 空字串 → [] + assert _parse_content_fallback("") == [] + assert _parse_content_fallback("not json") == [] + assert _parse_content_fallback('{"a":1}') == [] diff --git a/tests/test_openclaw_daily_template.py b/tests/test_openclaw_daily_template.py new file mode 100644 index 0000000..b03fa5e --- /dev/null +++ b/tests/test_openclaw_daily_template.py @@ -0,0 +1,212 @@ +""" +tests/test_openclaw_daily_template.py +───────────────────────────────────────────────────────────────── +Operation Ollama-First v5.0 / Phase 3 / A8 — 日報模板路由測試 + +驗證面: + T1. flag=false(預設)→ 走 _legacy_full_gemini_daily_report(regression) + T2. flag=true → 走 _generate_daily_report_hermes_template + T3. _compute_daily_kpi 各 KPI 函數可獨立 mock 測(DB 失敗回安全預設) + T4. _render_daily_template_v2 缺欄位優雅降級(_SafeUndefined 不 raise) + T5. _SafeUndefined 對 'X.Y.Z' 巢狀存取不爆 + +紀律: + - 不打真實 DB / Gemini API + - 不寫 ai_insights + - 不發 Telegram +""" + +import os +from datetime import date, datetime +from unittest.mock import patch, MagicMock + +import pytest + + +# ═══════════════════════════════════════════════════════════════════════════ +# Fixtures +# ═══════════════════════════════════════════════════════════════════════════ + +@pytest.fixture(autouse=True) +def _reset_flag(monkeypatch): + """每個 test 前清環境變數,避免互相污染""" + monkeypatch.delenv('OPENCLAW_DAILY_HERMES_TEMPLATE', raising=False) + yield + + +# ═══════════════════════════════════════════════════════════════════════════ +# T1+T2 — Routing +# ═══════════════════════════════════════════════════════════════════════════ + +class TestRouting: + + def test_flag_false_routes_to_legacy(self, monkeypatch): + """flag=false → _legacy_full_gemini_daily_report 被呼叫""" + monkeypatch.setenv('OPENCLAW_DAILY_HERMES_TEMPLATE', 'false') + import importlib + import services.openclaw_strategist_service as svc + importlib.reload(svc) + + legacy_called = {'v': False} + hermes_called = {'v': False} + + def mock_legacy(): + legacy_called['v'] = True + return {'status': 'ok', 'mode': 'legacy'} + + def mock_hermes(): + hermes_called['v'] = True + return {'status': 'ok', 'mode': 'hermes_template'} + + monkeypatch.setattr(svc, '_legacy_full_gemini_daily_report', mock_legacy) + monkeypatch.setattr(svc, '_generate_daily_report_hermes_template', mock_hermes) + + svc.generate_daily_report() + + assert legacy_called['v'] is True, "flag=false 必須走 legacy 路徑" + assert hermes_called['v'] is False, "flag=false 不可走 hermes 模板" + + def test_flag_true_routes_to_hermes_template(self, monkeypatch): + """flag=true → _generate_daily_report_hermes_template 被呼叫""" + monkeypatch.setenv('OPENCLAW_DAILY_HERMES_TEMPLATE', 'true') + import importlib + import services.openclaw_strategist_service as svc + importlib.reload(svc) + + legacy_called = {'v': False} + hermes_called = {'v': False} + + monkeypatch.setattr(svc, '_legacy_full_gemini_daily_report', + lambda: legacy_called.update(v=True) or {'status': 'ok'}) + monkeypatch.setattr(svc, '_generate_daily_report_hermes_template', + lambda: hermes_called.update(v=True) or {'status': 'ok'}) + + svc.generate_daily_report() + + assert hermes_called['v'] is True, "flag=true 必須走 hermes 模板路徑" + assert legacy_called['v'] is False, "flag=true 不可走 legacy" + + def test_flag_default_is_false(self, monkeypatch): + """無 env 設定時 → 預設 false(戰前行為)""" + # 不 set env + import importlib + import services.openclaw_strategist_service as svc + importlib.reload(svc) + + assert svc._daily_hermes_template_enabled() is False + + +# ═══════════════════════════════════════════════════════════════════════════ +# T3 — KPI 計算(DB 失敗安全降級) +# ═══════════════════════════════════════════════════════════════════════════ + +class TestKPIComputation: + + def test_compute_daily_kpi_invalid_date_raises(self): + import services.openclaw_strategist_service as svc + with pytest.raises(TypeError): + svc._compute_daily_kpi("not-a-date") + + def test_revenue_kpi_returns_safe_default_on_db_error(self, monkeypatch): + """DB 異常時 _query_revenue_kpi 回零(不拋 exception)""" + import services.openclaw_strategist_service as svc + + class _BrokenSession: + def execute(self, *a, **kw): + raise RuntimeError('DB connection lost') + def close(self): + pass + + monkeypatch.setattr(svc, 'get_session', lambda: _BrokenSession()) + + result = svc._query_revenue_kpi(date(2026, 5, 3)) + + assert result['today'] == 0.0 + assert result['dod_pct'] == 0.0 + assert result['wow_pct'] == 0.0 + + +# ═══════════════════════════════════════════════════════════════════════════ +# T4+T5 — Template 渲染與缺欄位優雅降級 +# ═══════════════════════════════════════════════════════════════════════════ + +class TestTemplateRendering: + + def test_render_with_full_context_succeeds(self): + import services.openclaw_strategist_service as svc + + context = { + 'today': '2026年05月02日', + 'weekday': '週五', + 'revenue': { + 'today': 1234567.0, + 'yesterday': 1100000.0, + 'avg_7d': 1050000.0, + 'dod_pct': 12.2, + 'wow_pct': 17.6, + }, + 'orders': { + 'today_rows': 234, + 'today_sku': 187, + 'avg_value_today': 5276.0, + }, + 'top_skus': [ + {'name': 'SKU-A', 'qty': 50, 'revenue': 100000}, + {'name': 'SKU-B', 'qty': 32, 'revenue': 80000}, + ], + 'price_gaps': [ + {'sku_name': '商品X', 'momo_price': 1200, 'comp_price': 980, + 'gap_pct': 22.4, 'competitor': 'PChome'}, + ], + 'inventory_alerts': [], + 'priority_actions': ['對 SKU-A 啟動 EA 流程', '觀察 PChome 補貼'], + 'gemini_insight': '今日營收強勁成長,建議加碼家電促銷檔期。', + } + + rendered = svc._render_daily_template_v2(context) + + assert '2026年05月02日' in rendered + assert '週五' in rendered + assert 'NT$1,234,567' in rendered + assert 'SKU-A' in rendered + assert '商品X' in rendered + assert 'PChome' in rendered + assert '今日營收強勁成長' in rendered + + def test_render_with_missing_fields_does_not_raise(self): + """_SafeUndefined 對缺欄位回 — 不拋 UndefinedError""" + import services.openclaw_strategist_service as svc + + context = { + 'today': '2026年05月02日', + 'weekday': '週五', + 'revenue': {'today': 0.0, 'dod_pct': 0.0, 'wow_pct': 0.0}, + 'orders': {}, # 空 dict + 'top_skus': [], + 'price_gaps': [], + 'inventory_alerts': [], + 'priority_actions': [], + 'gemini_insight': '', + } + + # 不 raise 即過 + rendered = svc._render_daily_template_v2(context) + + assert isinstance(rendered, str) + assert len(rendered) > 0 + # 缺欄位該降級為 — 或預設值 + assert '今日無熱銷資料' in rendered or '✅' in rendered + + def test_safe_undefined_nested_access(self): + """_SafeUndefined 對 'X.Y.Z' 巢狀存取不爆""" + import services.openclaw_strategist_service as svc + + # 完全無 'revenue' 也不該 raise + context = { + 'today': '2026年05月02日', + 'weekday': '週五', + # 故意省略 revenue / orders / top_skus 等 + } + rendered = svc._render_daily_template_v2(context) + assert isinstance(rendered, str) + assert '2026年05月02日' in rendered diff --git a/tests/test_openclaw_qa_golden_set.py b/tests/test_openclaw_qa_golden_set.py new file mode 100644 index 0000000..b6e7962 --- /dev/null +++ b/tests/test_openclaw_qa_golden_set.py @@ -0,0 +1,286 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +tests/test_openclaw_qa_golden_set.py +OpenClaw Q&A 黃金集 A/B 對照框架 +(Operation Ollama-First v5.0 — Phase 3, A7 fullstack-engineer) + +目的: + 在統帥盲測前,先建立 Ollama qwen3:14b vs Gemini 2.5 Flash 的「量化基線」。 + 10 題典型 momo 商業 Q&A,雙模型各跑一次,比對: + - 簡體字污染數量(A2 黃燈警訊核心) + - 回應長度 + - 結構性指標(行數、列點數) + - 拒答訊號 + - 黃金關鍵字命中率(題目自帶 expect_keywords) + +執行: + RUN_GOLDEN_SET=1 pytest tests/test_openclaw_qa_golden_set.py -v -s + # GCP 還沒拉 qwen3:14b 之前,預設 SKIP(避免 CI 紅燈) + +紀律: + - PII 紀律:題目/答案無真實 chat_id / username / 身份證 / 手機,全部去識別化 + - 不對「正確性」做 hard assert;本框架專做「品質量化基線」收集 + - 報告印到 stdout(pytest -s 顯示),人工檢視,不卡 CI +""" + +import json +import os +import sys +import time +from typing import Dict, List, Optional + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +# ───────────────────────────────────────────────────────────────────────────── +# 啟用條件:須三條件齊備才實跑 +# 1. RUN_GOLDEN_SET=1 +# 2. OPENCLAW_QA_OLLAMA_HOST 可達 +# 3. GEMINI_API_KEY 已設 +# 否則 SKIP。 +# ───────────────────────────────────────────────────────────────────────────── + +def _ollama_reachable(host: str, timeout: float = 2.0) -> bool: + try: + import requests + r = requests.get(f"{host.rstrip('/')}/api/version", timeout=timeout) + return r.status_code == 200 + except Exception: + return False + + +def _ollama_has_model(host: str, model: str, timeout: float = 3.0) -> bool: + """檢查 Ollama 主機是否已 pull 指定模型。""" + try: + import requests + r = requests.get(f"{host.rstrip('/')}/api/tags", timeout=timeout) + if r.status_code != 200: + return False + tags = r.json().get('models', []) + return any(m.get('name', '').startswith(model.split(':')[0]) for m in tags) + except Exception: + return False + + +_RUN_GOLDEN = os.getenv('RUN_GOLDEN_SET', '0') == '1' +_HOST = os.getenv('OPENCLAW_QA_OLLAMA_HOST', os.getenv('OLLAMA_HOST_PRIMARY', 'http://34.143.170.20:11434')) +_MODEL = os.getenv('OPENCLAW_QA_OLLAMA_MODEL', 'qwen3:14b') +_HAS_GEMINI = bool(os.getenv('GEMINI_API_KEY')) + +pytestmark = pytest.mark.skipif( + not _RUN_GOLDEN, + reason="黃金集需要 RUN_GOLDEN_SET=1 + GCP qwen3:14b ready + GEMINI_API_KEY;統帥盲測前才跑", +) + + +# ───────────────────────────────────────────────────────────────────────────── +# 黃金集(10 題;全部去 PII;情境取自 momo-pro 真實 Telegram 互動模式) +# ───────────────────────────────────────────────────────────────────────────── + +GOLDEN_SET: List[Dict] = [ + { + "id": "g01_weekly_trend", + "question": "本週 momo 業績趨勢如何?跟上週比?", + "expect_keywords": ["業績", "週", "成長"], + "category": "業績趨勢", + }, + { + "id": "g02_competitor_threat", + "question": "PChome 最近在 3C 類有發動補貼戰嗎?對我們影響?", + "expect_keywords": ["PChome", "3C"], + "category": "競品威脅", + }, + { + "id": "g03_pricing_strategy", + "question": "我有一支 SKU 比競品貴 8%,銷量持續下滑,該怎麼辦?", + "expect_keywords": ["定價", "競品"], + "category": "定價策略", + }, + { + "id": "g04_seasonal", + "question": "母親節檔期快到了,建議哪些品類加碼?", + "expect_keywords": ["母親節", "品類"], + "category": "季節機會", + }, + { + "id": "g05_command_routing", + "question": "我想看完整週報怎麼下指令?", + "expect_keywords": ["weekly", "週報"], + "category": "指令導引", + }, + { + "id": "g06_top_threats", + "question": "目前 TOP 5 最緊急的競價威脅是哪些?", + "expect_keywords": ["威脅", "TOP"], + "category": "威脅清單", + }, + { + "id": "g07_inventory_signal", + "question": "如何判斷某 SKU 該促銷出清?", + "expect_keywords": ["促銷", "出清"], + "category": "庫存決策", + }, + { + "id": "g08_cross_category", + "question": "家電 vs 生活雜貨,哪個品類本月成長動能比較強?", + "expect_keywords": ["家電", "成長"], + "category": "品類比較", + }, + { + "id": "g09_data_unavailable", + "question": "幫我看 2030 年的銷售預測。", + "expect_keywords": ["資料", "無法"], # 期待模型誠實回應「資料不足」而非編造 + "category": "資料邊界", + }, + { + "id": "g10_action_item", + "question": "綜合本週數據,給我 3 個 48 小時內必做行動。", + "expect_keywords": ["行動", "建議"], + "category": "行動清單", + }, +] + + +# ───────────────────────────────────────────────────────────────────────────── +# Scoring helpers +# ───────────────────────────────────────────────────────────────────────────── + +def _count_simplified(text: str) -> int: + """重用 strategist service 的簡體字 hint 集合計數。""" + from services.openclaw_strategist_service import _SIMPLIFIED_HINT_CHARS + return sum(1 for c in (text or '') if c in _SIMPLIFIED_HINT_CHARS) + + +def _count_keyword_hits(text: str, keywords: List[str]) -> int: + if not text: + return 0 + return sum(1 for kw in keywords if kw in text) + + +def _is_refusal(text: str) -> bool: + from services.openclaw_strategist_service import _REFUSAL_PATTERNS + return any(p in (text or '') for p in _REFUSAL_PATTERNS) + + +def _structure_score(text: str) -> Dict[str, int]: + """結構性量化指標。""" + if not text: + return {"lines": 0, "bullets": 0, "tables": 0} + return { + "lines": text.count('\n') + 1, + # 條列符號粗略偵測(含中文「、」「,」開頭的列點) + "bullets": sum(text.count(s) for s in ('- ', '• ', '* ', '1.', '2.', '3.')), + "tables": text.count('|'), + } + + +def _score_response(qid: str, question: str, response: str, expect_kw: List[str]) -> Dict: + structure = _structure_score(response) + return { + "qid": qid, + "length": len(response or ''), + "simplified_count": _count_simplified(response), + "keyword_hits": _count_keyword_hits(response, expect_kw), + "is_refusal": _is_refusal(response), + "lines": structure["lines"], + "bullets": structure["bullets"], + "tables": structure["tables"], + "preview": (response or '')[:120].replace('\n', ' / '), + } + + +# ───────────────────────────────────────────────────────────────────────────── +# Caller wrappers (使用 service 的真實函式) +# ───────────────────────────────────────────────────────────────────────────── + +def _call_ollama(question: str) -> Optional[str]: + from services.openclaw_strategist_service import _call_qwen3_qa + return _call_qwen3_qa(question, None, f"golden-{int(time.time())}") + + +def _call_gemini_baseline(question: str) -> Optional[str]: + from services.openclaw_strategist_service import _call_gemini + system_prompt = ( + "你是 MOMO Pro 電商情報策略師「OpenClaw」。以繁體中文(台灣用語)回覆使用者。" + "嚴禁簡體字。回覆長度控制在 500 字內,可用 Markdown 條列。" + ) + return _call_gemini(system_prompt, question, temperature=0.5, caller="openclaw_qa_golden") + + +# ───────────────────────────────────────────────────────────────────────────── +# Tests +# ───────────────────────────────────────────────────────────────────────────── + +def test_environment_ready(): + """sanity check:跑黃金集前確認 GCP host + model + Gemini key 都 ready。""" + assert _ollama_reachable(_HOST), f"Ollama 主機不可達:{_HOST}" + assert _ollama_has_model(_HOST, _MODEL), ( + f"GCP Ollama 尚未拉 {_MODEL}(請於 Phase 8 由 A1 完成 ollama pull)" + ) + assert _HAS_GEMINI, "GEMINI_API_KEY 未設" + + +def test_golden_set_ab_comparison(capsys): + """跑 10 題雙模型 A/B 對照,量化指標印到 stdout。 + + 本測試不對「正確性」做 hard assert;目的是給統帥盲測前的「品質量化基線」。 + 僅 hard assert: + - 雙模型至少都有回應(非全 None) + - Gemini baseline 簡體字數量 == 0(baseline 不該污染) + """ + # 啟用 flag 讓 _call_qwen3_qa 走真實邏輯 + os.environ['OPENCLAW_QA_OLLAMA_FIRST'] = 'true' + + rows = [] + for item in GOLDEN_SET: + qid = item['id'] + question = item['question'] + kws = item['expect_keywords'] + + ollama_resp = _call_ollama(question) + gemini_resp = _call_gemini_baseline(question) + + rows.append({ + 'qid': qid, + 'category': item['category'], + 'question': question, + 'ollama': _score_response(qid, question, ollama_resp or '', kws), + 'gemini': _score_response(qid, question, gemini_resp or '', kws), + }) + + # 列印量化基線(pytest -s 才看得到) + print("\n" + "=" * 100) + print("OpenClaw QA 黃金集 A/B 量化基線(Ollama qwen3:14b vs Gemini 2.5 Flash)") + print("=" * 100) + for r in rows: + print(f"\n[{r['qid']}] ({r['category']}) {r['question']}") + for side in ('ollama', 'gemini'): + s = r[side] + print( + f" {side:>7}: len={s['length']:>4} simp={s['simplified_count']:>2} " + f"kw={s['keyword_hits']}/{len(GOLDEN_SET[0]['expect_keywords'])} " + f"lines={s['lines']:>2} refusal={s['is_refusal']}" + ) + print(f" preview: {s['preview']}") + + # 匯出 JSON 給後續分析 + out_path = os.path.join(os.path.dirname(__file__), 'logs', 'qa_golden_baseline.json') + os.makedirs(os.path.dirname(out_path), exist_ok=True) + with open(out_path, 'w', encoding='utf-8') as f: + json.dump(rows, f, ensure_ascii=False, indent=2) + print(f"\n基線已存:{out_path}") + + # Hard assertions(最少安全網) + ollama_responded = sum(1 for r in rows if r['ollama']['length'] > 0) + gemini_responded = sum(1 for r in rows if r['gemini']['length'] > 0) + assert ollama_responded >= 8, f"Ollama 回應率過低:{ollama_responded}/10" + assert gemini_responded >= 9, f"Gemini 回應率過低:{gemini_responded}/10" + + # Gemini baseline 不該有簡體污染(用以驗證測量本身正確) + for r in rows: + assert r['gemini']['simplified_count'] == 0, ( + f"Gemini baseline 簡體污染(指標可能誤判):{r['qid']} {r['gemini']['preview']}" + ) diff --git a/tests/test_openclaw_qa_routing.py b/tests/test_openclaw_qa_routing.py new file mode 100644 index 0000000..70368cd --- /dev/null +++ b/tests/test_openclaw_qa_routing.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +tests/test_openclaw_qa_routing.py +OpenClaw Q&A 路由 + 品質守門 unit tests +(Operation Ollama-First v5.0 — Phase 3, A7 fullstack-engineer) + +涵蓋: + - feature flag OPENCLAW_QA_OLLAMA_FIRST=false → 走 Gemini-first(regression test) + - flag=true + 高品質 Ollama 回應 → 直接回 Ollama 結果,不走 Gemini + - flag=true + 低品質 Ollama 回應 → 升級至 Gemini,並標 fallback_to=openclaw_qa_gemini_fallback + - flag=true + Ollama 呼叫失敗 → 升級至 Gemini + - _is_low_quality_response 各規則:空字串 / 長度過短 / 簡體污染 / 拒答 / 流水帳 + +執行: + pytest tests/test_openclaw_qa_routing.py -v +""" + +import os +import sys +import time +from typing import Any, Dict, Optional + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import services.openclaw_strategist_service as svc +import services.ai_call_logger as logger_mod +from services.ai_call_logger import _reset_kill_switch + + +# ───────────────────────────────────────────────────────────────────────────── +# Fixtures +# ───────────────────────────────────────────────────────────────────────────── + +@pytest.fixture(autouse=True) +def reset_state(monkeypatch): + """每個測試重置 logger kill-switch + stub DB 寫入收集 ai_calls 紀錄。""" + _reset_kill_switch() + captured = [] + + def fake_write(state): + captured.append({ + 'caller': state.caller, + 'provider': state.provider, + 'model': state.model, + 'status': state.status, + 'fallback_to': state.fallback_to, + 'error': state.error, + 'meta': dict(state.meta), + 'request_id': state.request_id, + }) + + monkeypatch.setattr(logger_mod, '_write_to_db', fake_write) + monkeypatch.setenv('AI_CALL_LOGGING_ENABLED', 'true') + # 預設 flag=false(戰前行為) + monkeypatch.delenv('OPENCLAW_QA_OLLAMA_FIRST', raising=False) + yield captured + + +def _wait_async(captured, n=1, timeout=2.0): + deadline = time.time() + timeout + while time.time() < deadline: + if len(captured) >= n: + return True + time.sleep(0.01) + return False + + +# ───────────────────────────────────────────────────────────────────────────── +# 1. _is_low_quality_response 純函式規則 +# ───────────────────────────────────────────────────────────────────────────── + +class TestLowQualityRules: + + def test_empty_string_is_low_quality(self): + assert svc._is_low_quality_response("") is True + assert svc._is_low_quality_response(None) is True + assert svc._is_low_quality_response(" \n ") is True + + def test_too_short_is_low_quality(self): + # 長度 < 50 字元 → 低品質 + assert svc._is_low_quality_response("你好,我是 OpenClaw") is True + + def test_acceptable_response_passes(self): + good = ( + "本週 momo 業績較上週成長 12%,主要受惠於家電與生活雜貨。\n" + "建議:持續關注 PChome 競價動態,必要時調整定價策略。\n" + "預估下週 momo 仍有 5-8% 成長空間。" + ) + assert svc._is_low_quality_response(good) is False + + def test_simplified_pollution_detected(self): + # 句中含 >= 3 個簡體字 hint → 低品質(Qwen 繁中短板核心檢查) + polluted = ( + "本周业绩比上周增长,您可以关注这个产品的价格变动趋势," + "我们建议处理掉滞销库存以提高资产效率" + ) + assert svc._is_low_quality_response(polluted) is True + + def test_two_simplified_chars_still_acceptable(self): + # 只有 2 個簡體 hint 字(边界以下)+ 結構良好 → 仍可接受 + # (避免過度敏感誤殺正常繁體回覆中混入零星簡體字的情境) + text = ( + "本週 momo 业绩成長明顯,建議持續關注競品動向。\n" + "重點品類:家電、3C、生活雜貨。\n" + "下週可加碼促銷檔期。" + ) + assert svc._is_low_quality_response(text) is False + + def test_refusal_pattern_detected(self): + for refusal in ['無法回答', '我不知道', '抱歉,我無法協助']: + text = f"關於這個問題,{refusal},請改問其他內容以便我協助您。" + assert svc._is_low_quality_response(text) is True, f"應被判定為拒答:{refusal}" + + def test_flowing_text_no_breaks_is_low_quality(self): + # 200+ 字無換行 → 流水帳 + text = "本週業績整體呈現上升趨勢。" * 20 # ~200+ 字 + assert "\n" not in text + assert len(text) > 200 + assert svc._is_low_quality_response(text) is True + + def test_long_text_with_breaks_is_acceptable(self): + # 200+ 字但有適度斷行 → 結構良好 + text = ( + "本週業績整體呈現上升趨勢,主要驅動類別為家電與生活雜貨大類別。\n" + "競品動向:PChome 在 3C 類發動大規模補貼戰,預估壓縮我方 3 至 5 個百分點毛利率。\n" + "蝦皮也在母嬰用品加碼免運券促銷,需密切觀察跟降節奏,避免市占下滑。\n" + "建議行動:(1) 加碼家電促銷檔期,重點操作大尺寸電視與廚房家電," + "(2) 觀察 PChome 補貼是否延續至下週,準備二段反擊方案," + "(3) 對價差大於 5% 的 SKU 主動啟動 EA 流程,避免毛利持續流失。" + ) + assert len(text) > 200 + assert svc._is_low_quality_response(text) is False + + +# ───────────────────────────────────────────────────────────────────────────── +# 2. Routing:feature flag = false 時維持 Gemini-first 路徑(regression) +# ───────────────────────────────────────────────────────────────────────────── + +class TestFlagOff: + + def test_flag_false_routes_to_legacy(self, monkeypatch, reset_state): + """flag=false(預設)→ 不應該呼叫 _call_qwen3_qa,直接走 _legacy_gemini_first_qa。""" + monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'false') + legacy_called = {'count': 0} + ollama_called = {'count': 0} + + def fake_legacy(q, ctx, request_id=None): + legacy_called['count'] += 1 + return "[legacy gemini reply]" + + def fake_ollama(q, ctx, rid): + ollama_called['count'] += 1 + return "[should not be called]" + + monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) + monkeypatch.setattr(svc, '_call_qwen3_qa', fake_ollama) + + result = svc.generate_strategy_response("本週業績如何?") + assert result == "[legacy gemini reply]" + assert legacy_called['count'] == 1 + assert ollama_called['count'] == 0 + + def test_flag_unset_defaults_to_off(self, monkeypatch, reset_state): + """環境變數完全未設 → 預設 false → 走 legacy。""" + monkeypatch.delenv('OPENCLAW_QA_OLLAMA_FIRST', raising=False) + legacy_called = {'count': 0} + + def fake_legacy(q, ctx, request_id=None): + legacy_called['count'] += 1 + return "[legacy reply]" + + monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) + # 不 stub _call_qwen3_qa;如果意外被呼叫會打到真網路 → fail + result = svc.generate_strategy_response("競品分析") + assert legacy_called['count'] == 1 + assert result == "[legacy reply]" + + def test_empty_query_short_circuits(self, monkeypatch, reset_state): + """空 query 不應觸發任何 LLM 呼叫。""" + monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') + legacy_called = {'count': 0} + ollama_called = {'count': 0} + monkeypatch.setattr(svc, '_legacy_gemini_first_qa', + lambda q, ctx, request_id=None: legacy_called.update({'count': legacy_called['count']+1}) or "") + monkeypatch.setattr(svc, '_call_qwen3_qa', + lambda q, ctx, rid: ollama_called.update({'count': ollama_called['count']+1}) or "") + + out = svc.generate_strategy_response("") + assert "請輸入您的問題" in out + assert legacy_called['count'] == 0 + assert ollama_called['count'] == 0 + + +# ───────────────────────────────────────────────────────────────────────────── +# 3. Routing:feature flag = true + Ollama 高/低品質 +# ───────────────────────────────────────────────────────────────────────────── + +class TestFlagOn: + + def test_flag_true_high_quality_returns_ollama(self, monkeypatch, reset_state): + """flag=true + Ollama 回高品質 → 直接回 Ollama,不走 Gemini。""" + monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') + legacy_called = {'count': 0} + good_reply = ( + "本週 momo 業績成長 12%,主要驅動類別為家電。\n" + "建議:持續關注 PChome 競價並加碼家電促銷檔期。\n" + "下週預估仍有 5-8% 成長空間。" + ) + monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: good_reply) + + def fake_legacy(q, ctx, request_id=None): + legacy_called['count'] += 1 + return "[gemini fallback]" + + monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) + + out = svc.generate_strategy_response("本週業績如何?") + assert out == good_reply + assert legacy_called['count'] == 0 # Gemini 沒被呼叫 + + def test_flag_true_low_quality_falls_back_to_gemini(self, monkeypatch, reset_state): + """flag=true + Ollama 回低品質(簡體污染)→ fallback Gemini。""" + monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') + bad_reply = "本周业绩增长,您可以关注这个产品的价格变动,我们建议处理库存" + legacy_called = {'count': 0} + + monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: bad_reply) + + def fake_legacy(q, ctx, request_id=None): + legacy_called['count'] += 1 + return "[gemini high quality reply]" + + monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) + + out = svc.generate_strategy_response("本週業績如何?") + assert out == "[gemini high quality reply]" + assert legacy_called['count'] == 1 + + def test_flag_true_ollama_returns_none_falls_back(self, monkeypatch, reset_state): + """flag=true + Ollama 呼叫失敗(回 None)→ fallback Gemini。""" + monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') + legacy_called = {'count': 0} + monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: None) + + def fake_legacy(q, ctx, request_id=None): + legacy_called['count'] += 1 + return "[gemini reply after ollama down]" + + monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy) + + out = svc.generate_strategy_response("test") + assert out == "[gemini reply after ollama down]" + assert legacy_called['count'] == 1 + + +# ───────────────────────────────────────────────────────────────────────────── +# 4. _call_qwen3_qa: ai_call_logger 整合 + fallback_to 標記 +# ───────────────────────────────────────────────────────────────────────────── + +class TestCallQwen3Telemetry: + + def test_qwen3_logs_ok_status_on_success(self, monkeypatch, reset_state): + """高品質回應 → ai_calls 應記 status=ok, caller=openclaw_qa, provider=gcp_ollama""" + captured = reset_state + + class FakeResp: + status_code = 200 + def raise_for_status(self): pass + def json(self): + return { + 'response': '本週 momo 業績成長 12%,建議加碼家電促銷。', + 'prompt_eval_count': 150, + 'eval_count': 60, + } + + monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp()) + + result = svc._call_qwen3_qa("本週業績?", None, "qa-test123") + assert result is not None + assert "業績成長" in result + + assert _wait_async(captured, 1) + assert len(captured) == 1 + rec = captured[0] + assert rec['caller'] == 'openclaw_qa' + assert rec['provider'] == 'gcp_ollama' + assert rec['model'] == svc.OPENCLAW_QA_OLLAMA_MODEL + assert rec['status'] == 'ok' + assert rec['fallback_to'] is None + assert rec['meta'].get('flag') == 'OPENCLAW_QA_OLLAMA_FIRST' + assert rec['request_id'] == "qa-test123" + + def test_qwen3_logs_fallback_on_exception(self, monkeypatch, reset_state): + """Ollama 連線失敗 → ai_calls 應記 fallback_to=openclaw_qa_gemini_fallback + status=fallback""" + captured = reset_state + + def boom(*a, **kw): + raise svc.requests.ConnectionError("connection refused") + + monkeypatch.setattr(svc.requests, 'post', boom) + + result = svc._call_qwen3_qa("test", None, "qa-fail123") + assert result is None + + assert _wait_async(captured, 1) + rec = captured[0] + assert rec['status'] == 'fallback' + assert rec['fallback_to'] == 'openclaw_qa_gemini_fallback' + assert rec['error'] is not None + assert 'ConnectionError' in rec['error'] + + def test_qwen3_logs_fallback_on_empty_response(self, monkeypatch, reset_state): + """Ollama 回空 response → 視為 empty_response,標 fallback。""" + captured = reset_state + + class FakeResp: + status_code = 200 + def raise_for_status(self): pass + def json(self): + return {'response': '', 'prompt_eval_count': 100, 'eval_count': 0} + + monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp()) + + result = svc._call_qwen3_qa("test", None, "qa-empty") + assert result is None + + assert _wait_async(captured, 1) + rec = captured[0] + assert rec['status'] == 'fallback' + assert rec['fallback_to'] == 'openclaw_qa_gemini_fallback' + assert rec['error'] == 'empty_response' + + +# ───────────────────────────────────────────────────────────────────────────── +# 5. 環境變數讀取即時性(runtime toggle) +# ───────────────────────────────────────────────────────────────────────────── + +class TestRuntimeToggle: + + def test_flag_helper_reads_env_each_call(self, monkeypatch): + """_qa_ollama_first_enabled() 應每次重讀 env,允許 runtime 灰度切換。""" + monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'false') + assert svc._qa_ollama_first_enabled() is False + + monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true') + assert svc._qa_ollama_first_enabled() is True + + # 各種真值字串 + for v in ('TRUE', 'True', '1', 'yes', 'on'): + monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', v) + assert svc._qa_ollama_first_enabled() is True, f"應視為 true: {v!r}" + + for v in ('false', '0', 'no', 'off', '', 'foo'): + monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', v) + assert svc._qa_ollama_first_enabled() is False, f"應視為 false: {v!r}"