feat(p1+p3): logger 接 13 caller + Q&A/Nemotron/日報 feature flag 灰度

Phase 1 A4 — 13 個呼叫點接 ai_call_logger（覆蓋率 11.8% → 預估 50%+） - TOP-1 nemoton_dispatcher: nemotron_dispatch caller (NIM 配額追蹤) - TOP-2 openclaw_strategist: 4 reports (daily/weekly/monthly/meta) + qa caller - TOP-3 hermes_analyst: hermes_analyst + hermes_intent (順修 commit 00591c5 殘留 bug) - TOP-4 code_review_pipeline: code_review_hermes/openclaw/elephant 三鏈 (request_id 串) - TOP-5 openclaw_bot_routes: openclaw_bot_main/gemini/nim 三層 fallback Phase 3 A7 — OpenClaw Q&A → qwen3:14b（feature flag OFF） - OPENCLAW_QA_OLLAMA_FIRST 灰度開關 - 繁中強制 system prompt + Gemini fallback chain - _is_low_quality_response 品質守門（簡體字檢測 + 拒答訊號 + 結構分數） - 黃金集 A/B 對照測試框架（10 樣本去 PII） Phase 3 A8 — OpenClaw 日報 → Hermes 模板（feature flag OFF） - OPENCLAW_DAILY_HERMES_TEMPLATE 灰度開關 - _compute_daily_kpi 純 SQL + Hermes 規則引擎 - _compute_gemini_insight 精簡 200 字洞察 prompt - templates/daily_report_v2.j2 + _SafeUndefined 缺欄位優雅降級 - scripts/compare_daily_report_versions.py 雙版本盲測 Phase 3 A9 — Nemotron NIM → qwen3:14b（feature flag OFF） - NEMOTRON_OLLAMA_FIRST 灰度開關（A2 紅燈：deepseek-r1 假支援，改 qwen3） - _call_qwen3_dispatch + 既有 NIM tool_calls 解析共用 - 保留 ADR-004「🟡 [降級模式]」Hermes 規則引擎兜底 H6 PII fix — chat_id 進 ai_calls.meta 改 SHA1[:8]（4 處 Bot Q&A） Code Review pipeline — N3 動態 provider tag（gcp/secondary/111）+ A4 logger 三鏈 37 unit tests 全綠（routing 15 + golden 5 + qwen3 8 + daily template 8 + nemotron 1） Operation Ollama-First v5.0 / Phase 1 A4 + Phase 3 A7+A8+A9 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 23:05:38 +08:00
parent 078bf2683c
commit 838267c293
12 changed files with 3122 additions and 297 deletions
--- a/routes/openclaw_bot_routes.py
+++ b/routes/openclaw_bot_routes.py
@@ -24,6 +24,7 @@ import os
 import json
 import re
 import threading
+import hashlib  # Operation Ollama-First v5.0 P1: H6 PII fix — chat_id 進 meta 改 hash[:8]
 from contextvars import ContextVar
 from contextlib import contextmanager
 import requests
@@ -48,6 +49,7 @@ from services.openclaw_bot.telegram_api import (
    send_photo,
    send_typing,
 )
+from services.ai_call_logger import log_ai_call  # Operation Ollama-First v5.0 P1
 from services.openclaw_bot.menu_keyboards import (
    _BACK,
    _SUBMENUS,
@@ -85,7 +87,17 @@ try:
 except ImportError:
    _PCHOME_AVAILABLE = False

+# V-New: 引入 Ollama 探測機制
+try:
+    from services.ollama_service import resolve_ollama_host
+    _OLLAMA_AVAILABLE = True
+except ImportError:
+    _OLLAMA_AVAILABLE = False
+
 # AI 引擎：Gemini Flash（主，2~5秒）→ NIM（備援，45~90秒）
+# LOCKED-GEMINI: PPT 簡報文案需長 context (5K+ rows + 多輪歷史) + 繁中商業敘事
+# Ollama qwen2.5-coder:7b 為 PPT 失敗時 L3 fallback（已在 _call_ollama 路徑）
+# ADR-028 鎖定場景 #7
 GEMINI_API_KEY  = os.getenv('GEMINI_API_KEY', '')
 GEMINI_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta/models'
 GEMINI_MODEL    = 'gemini-2.0-flash'
@@ -2469,6 +2481,29 @@ def _ppt_ai_analysis(prompt_data: str, report_type: str = '') -> str:
                .get('content', {}).get('parts', [{}])[0]
                .get('text', '').strip())

+    def _call_ollama(prompt: str, tokens: int) -> str:
+        if not _OLLAMA_AVAILABLE:
+            return ""
+        try:
+            host = resolve_ollama_host()
+            # 簡報分析使用 qwen2.5-coder:7b (已升級 GCP) 或 hermes3
+            model = os.getenv('OPENCLAW_OLLAMA_MODEL', 'qwen2.5-coder:7b')
+            r = requests.post(
+                f"{host}/api/generate",
+                json={
+                    'model': model,
+                    'prompt': prompt,
+                    'stream': False,
+                    'options': {'num_predict': tokens, 'temperature': 0.3}
+                },
+                timeout=90
+            )
+            r.raise_for_status()
+            return r.json().get('response', '').strip()
+        except Exception as e:
+            sys_log.warning(f"[PPT] Ollama error: {e}")
+            return ""
+
    if not NVIDIA_API_KEY:
        if GEMINI_API_KEY:
            try:
@@ -2532,6 +2567,29 @@ def _ppt_ai_analysis(prompt_data: str, report_type: str = '') -> str:
            return result_text
        except Exception as e2:
            sys_log.error(f"[PPT] Gemini fallback error: {e2}")
+
+    # ── Ollama (GCP/111) Final Fallback ───────────────────────
+    if _OLLAMA_AVAILABLE:
+        try:
+            sys_log.info("[PPT] Trying local/GCP Ollama as final fallback")
+            raw = _call_ollama(f"{sys_instruction}\n\n--- 資料 ---\n{prompt_data}", max_tokens)
+            result_text = _clean_ai_text(raw)
+            if result_text and len(result_text) > 100:
+                if _LEARNING_ENABLED:
+                    import threading as _thr
+                    _thr.Thread(
+                        target=store_insight,
+                        kwargs={
+                            'insight_type': report_type or 'analysis',
+                            'content': result_text,
+                            'period': datetime.now(TAIPEI_TZ).strftime('%Y-%m-%d'),
+                        },
+                        daemon=True
+                    ).start()
+                return result_text
+        except Exception as e3:
+            sys_log.error(f"[PPT] Ollama final fallback error: {e3}")
+
    return '（AI 分析暫時無法使用，請稍後重試）'


@@ -6768,18 +6826,29 @@ def openclaw_answer(question: str, chat_id: int = None):
                + "請用繁體中文直接回答，不要開場白，300字以內。"
            )

-            resp = ollama_service.generate(question, system_prompt=sys_prompt, timeout=180)
-            if resp.success and resp.content:
-                if chat_id:
-                    openclaw_session.append_turn(chat_id, question, resp.content)
-                if _LEARNING_ENABLED:
-                    import threading as _thr
-                    _thr.Thread(target=store_conversation,
-                        args=(0, 0, question, resp.content, "ollama", []),
-                        daemon=True).start()
-                return resp.content, None
-            else:
-                sys_log.warning(f"[Ollama] 生成失敗: {resp.error}，fallback 到 Gemini")
+            # Phase 1 v5.0: 包 ai_call_logger 追蹤 Bot Q&A 主鏈 Ollama
+            _qa_req_id = f"qa-{chat_id or 0}-{int(_time_mod.time())}"
+            with log_ai_call(
+                caller='openclaw_bot_main',
+                provider='gcp_ollama',
+                model=getattr(ollama_service, 'model', 'llama3.1:8b'),
+                request_id=_qa_req_id,
+                meta={'chat_id_hash': hashlib.sha1(str(chat_id or 0).encode()).hexdigest()[:8], 'has_db_ctx': bool(db_ctx)},
+            ) as _ctx:
+                resp = ollama_service.generate(question, system_prompt=sys_prompt, timeout=180)
+                if resp.success and resp.content:
+                    if chat_id:
+                        openclaw_session.append_turn(chat_id, question, resp.content)
+                    if _LEARNING_ENABLED:
+                        import threading as _thr
+                        _thr.Thread(target=store_conversation,
+                            args=(0, 0, question, resp.content, "ollama", []),
+                            daemon=True).start()
+                    return resp.content, None
+                else:
+                    sys_log.warning(f"[Ollama] 生成失敗: {resp.error}，fallback 到 Gemini")
+                    _ctx.set_error(f"ollama generate failed: {resp.error}")
+                    _ctx.fallback_to_caller('openclaw_bot_gemini')
    except Exception as e:
        sys_log.warning(f"[Ollama] 例外發生: {e}，fallback 到 Gemini")

@@ -6817,15 +6886,30 @@ def openclaw_answer(question: str, chat_id: int = None):
                "tool_config": {"function_calling_config": {"mode": "AUTO"}},
                "generationConfig": {"temperature": 0.3, "maxOutputTokens": 600},
            }
-            r1 = requests.post(
-                f"{GEMINI_BASE_URL}/{GEMINI_MODEL}:generateContent?key={GEMINI_API_KEY}",
-                headers={"Content-Type": "application/json"},
-                json=payload, timeout=30,
-            )
-            r1.raise_for_status()
-            resp1 = r1.json()
-            candidate = resp1.get("candidates", [{}])[0]
-            parts     = candidate.get("content", {}).get("parts", [])
+            # Phase 1 v5.0: 包 ai_call_logger 追蹤 Gemini FC 第一輪
+            _qa_gemini_req_id = f"qa-{chat_id or 0}-{int(_time_mod.time())}"
+            with log_ai_call(
+                caller='openclaw_bot_gemini',
+                provider='gemini',
+                model=GEMINI_MODEL,
+                request_id=_qa_gemini_req_id,
+                meta={'chat_id_hash': hashlib.sha1(str(chat_id or 0).encode()).hexdigest()[:8], 'turn': 1},
+            ) as _ctx_g1:
+                r1 = requests.post(
+                    f"{GEMINI_BASE_URL}/{GEMINI_MODEL}:generateContent?key={GEMINI_API_KEY}",
+                    headers={"Content-Type": "application/json"},
+                    json=payload, timeout=30,
+                )
+                r1.raise_for_status()
+                resp1 = r1.json()
+                # Gemini REST: usageMetadata.{promptTokenCount, candidatesTokenCount}
+                _um = resp1.get("usageMetadata", {}) or {}
+                _ctx_g1.set_tokens(
+                    input=_um.get("promptTokenCount", 0),
+                    output=_um.get("candidatesTokenCount", 0),
+                )
+                candidate = resp1.get("candidates", [{}])[0]
+                parts     = candidate.get("content", {}).get("parts", [])

            # 如果沒有 function call，直接回傳文字
            tool_calls = [p["functionCall"] for p in parts if "functionCall" in p]
@@ -6870,15 +6954,28 @@ def openclaw_answer(question: str, chat_id: int = None):
                    "maxOutputTokens": 600,
                },
            }
-            r2 = requests.post(
-                f"{GEMINI_BASE_URL}/{GEMINI_MODEL}:generateContent?key={GEMINI_API_KEY}",
-                headers={"Content-Type": "application/json"},
-                json=payload2, timeout=35,
-            )
-            r2.raise_for_status()
-            resp2    = r2.json()
-            parts2   = resp2.get("candidates", [{}])[0].get("content", {}).get("parts", [])
-            final    = "".join(p.get("text", "") for p in parts2 if "text" in p).strip()
+            # Phase 1 v5.0: 包 ai_call_logger 追蹤 Gemini FC 第二輪
+            with log_ai_call(
+                caller='openclaw_bot_gemini',
+                provider='gemini',
+                model=GEMINI_MODEL,
+                request_id=_qa_gemini_req_id,
+                meta={'chat_id_hash': hashlib.sha1(str(chat_id or 0).encode()).hexdigest()[:8], 'turn': 2, 'tools_used': used_sources},
+            ) as _ctx_g2:
+                r2 = requests.post(
+                    f"{GEMINI_BASE_URL}/{GEMINI_MODEL}:generateContent?key={GEMINI_API_KEY}",
+                    headers={"Content-Type": "application/json"},
+                    json=payload2, timeout=35,
+                )
+                r2.raise_for_status()
+                resp2    = r2.json()
+                _um2 = resp2.get("usageMetadata", {}) or {}
+                _ctx_g2.set_tokens(
+                    input=_um2.get("promptTokenCount", 0),
+                    output=_um2.get("candidatesTokenCount", 0),
+                )
+                parts2   = resp2.get("candidates", [{}])[0].get("content", {}).get("parts", [])
+                final    = "".join(p.get("text", "") for p in parts2 if "text" in p).strip()

            if final:
                sys_log.info(f"[FC] done tools={used_sources} reply={len(final)}chars")
@@ -6931,19 +7028,34 @@ def openclaw_answer(question: str, chat_id: int = None):
                + f"\n用戶問：{question}\n"
                "請用繁體中文直接回答，不要開場白，300字以內。"
            )
-            r = requests.post(
-                f"{NVIDIA_BASE_URL}/chat/completions",
-                headers={"Authorization": f"Bearer {NVIDIA_API_KEY}",
-                         "Content-Type": "application/json"},
-                json={
-                    "model": CHAT_MODEL,
-                    "messages": [{"role": "user", "content": nim_prompt}],
-                    "max_tokens": 500, "temperature": 0.3,
-                },
-                timeout=20,
-            )
-            r.raise_for_status()
-            return r.json()["choices"][0]["message"]["content"].strip(), None
+            # Phase 1 v5.0: 包 ai_call_logger 追蹤 Bot Q&A NIM 三層 fallback
+            _qa_nim_req_id = f"qa-{chat_id or 0}-{int(_time_mod.time())}"
+            with log_ai_call(
+                caller='openclaw_bot_nim',
+                provider='nim',
+                model=CHAT_MODEL,
+                request_id=_qa_nim_req_id,
+                meta={'chat_id_hash': hashlib.sha1(str(chat_id or 0).encode()).hexdigest()[:8], 'has_db_ctx': bool(db_ctx)},
+            ) as _ctx_nim:
+                r = requests.post(
+                    f"{NVIDIA_BASE_URL}/chat/completions",
+                    headers={"Authorization": f"Bearer {NVIDIA_API_KEY}",
+                             "Content-Type": "application/json"},
+                    json={
+                        "model": CHAT_MODEL,
+                        "messages": [{"role": "user", "content": nim_prompt}],
+                        "max_tokens": 500, "temperature": 0.3,
+                    },
+                    timeout=20,
+                )
+                r.raise_for_status()
+                _body = r.json()
+                _u = _body.get("usage", {}) or {}
+                _ctx_nim.set_tokens(
+                    input=_u.get("prompt_tokens", 0),
+                    output=_u.get("completion_tokens", 0),
+                )
+                return _body["choices"][0]["message"]["content"].strip(), None
        except Exception as e:
            sys_log.error(f"[FC] NIM fallback error: {e}")

--- a/scripts/compare_daily_report_versions.py
+++ b/scripts/compare_daily_report_versions.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""
+scripts/compare_daily_report_versions.py
+─────────────────────────────────────────────────────────────────
+Operation Ollama-First v5.0 / Phase 3 / A8 — 日報雙版本盲測腳本
+
+用途：
+    跑同一天的「舊版 Gemini 全文」vs「新版 Hermes 模板」
+    輸出兩份檔案到 reports/，供統帥盲測選擇預設模式。
+
+使用：
+    python3 scripts/compare_daily_report_versions.py --date 2026-05-03
+    python3 scripts/compare_daily_report_versions.py            # 預設昨日
+
+紀律：
+    - 不寫入 ai_insights（避免污染 production 資料）
+    - 不發 Telegram（純 dry-run）
+    - 兩版本皆獨立執行，互不干擾
+    - 失敗時報錯但不刪舊檔
+"""
+
+import os
+import sys
+import argparse
+from datetime import date, timedelta, datetime
+from pathlib import Path
+
+# 確保可 import 本專案 services
+ROOT = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(ROOT))
+
+
+def _ensure_reports_dir() -> Path:
+    reports_dir = ROOT / 'reports'
+    reports_dir.mkdir(exist_ok=True)
+    return reports_dir
+
+
+def _run_legacy(target_date: date) -> str:
+    """跑舊版（_legacy_full_gemini_daily_report）"""
+    os.environ['OPENCLAW_DAILY_HERMES_TEMPLATE'] = 'false'
+    # 強制 reload module（避免 cache）
+    import importlib
+    import services.openclaw_strategist_service as svc
+    importlib.reload(svc)
+    print(f"[legacy] 開始跑 Gemini 全文版日報 target_date={target_date}")
+    result = svc._legacy_full_gemini_daily_report()
+    return result.get('content', '') or result.get('report_content', '') or str(result)
+
+
+def _run_hermes_template(target_date: date) -> str:
+    """跑新版（_generate_daily_report_hermes_template）"""
+    os.environ['OPENCLAW_DAILY_HERMES_TEMPLATE'] = 'true'
+    import importlib
+    import services.openclaw_strategist_service as svc
+    importlib.reload(svc)
+    print(f"[hermes] 開始跑 Hermes 模板版日報 target_date={target_date}")
+    result = svc._generate_daily_report_hermes_template()
+    return result.get('content', '') or result.get('report_content', '') or str(result)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='OpenClaw 日報雙版本盲測')
+    parser.add_argument('--date', help='YYYY-MM-DD（預設昨日）')
+    args = parser.parse_args()
+
+    if args.date:
+        target_date = datetime.strptime(args.date, '%Y-%m-%d').date()
+    else:
+        target_date = date.today() - timedelta(days=1)
+
+    reports_dir = _ensure_reports_dir()
+    date_tag = target_date.strftime('%Y%m%d')
+
+    legacy_file = reports_dir / f'daily_report_legacy_{date_tag}.md'
+    hermes_file = reports_dir / f'daily_report_v2_{date_tag}.md'
+
+    # 跑舊版
+    try:
+        legacy_content = _run_legacy(target_date)
+        legacy_file.write_text(legacy_content, encoding='utf-8')
+        print(f"✅ legacy 版輸出：{legacy_file}")
+    except Exception as e:
+        print(f"❌ legacy 版失敗：{e}", file=sys.stderr)
+
+    # 跑新版
+    try:
+        hermes_content = _run_hermes_template(target_date)
+        hermes_file.write_text(hermes_content, encoding='utf-8')
+        print(f"✅ hermes 模板版輸出：{hermes_file}")
+    except Exception as e:
+        print(f"❌ hermes 版失敗：{e}", file=sys.stderr)
+
+    print(f"\n盲測檢查（建議）：")
+    print(f"  diff <(head -50 {legacy_file}) <(head -50 {hermes_file})")
+    print(f"  wc -w {legacy_file} {hermes_file}")
+    print(f"  # 統帥盲測時可遮 caller 名稱避免偏見")
+
+
+if __name__ == '__main__':
+    main()
--- a/services/code_review_pipeline_service.py
+++ b/services/code_review_pipeline_service.py
@@ -29,7 +29,12 @@ from typing import Any, Dict, List, Optional

 from database.manager import get_session
 from sqlalchemy import text
-from services.hermes_analyst_service import HERMES_URL as _HERMES_URL, HERMES_MODEL as _HERMES_MODEL
+# ADR-027 Phase 2 N3：HERMES_MODEL 仍 import（純常數），HERMES_URL 改 lazy
+# 每次 _hermes_scan 才透過 get_hermes_url() 取最新解析（GCP 優先 / 111 備援），
+# 避免 import-time freeze 導致主機切換不生效。
+from services.hermes_analyst_service import HERMES_MODEL as _HERMES_MODEL
+from config import get_hermes_url
+from services.ai_call_logger import log_ai_call  # Operation Ollama-First v5.0 P1

 logger = logging.getLogger(__name__)

@@ -38,6 +43,9 @@ _current_pipeline: Dict[str, Any] = {}
 _pipeline_lock = threading.Lock()

 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
+# LOCKED-GEMINI: Code Review 全 repo diff 可達 100K+ tokens，超過 Ollama 32K context
+# 未來可升 Claude Opus 4.7 (200K context, Arena code Elo 1548) — Phase 7 任務
+# ADR-028 鎖定場景 #5
 REVIEW_MODEL   = os.getenv("OPENCLAW_MODEL", "gemini-2.5-flash")
 INTERNAL_TOKEN = os.getenv("INTERNAL_WEBHOOK_TOKEN", "")
 AUTO_FIX_ENABLED = os.getenv("CODE_REVIEW_AUTO_FIX_ENABLED", "true").lower() == "true"
@@ -215,14 +223,36 @@ class CodeReviewPipeline:

 只輸出 JSON 陣列，不含其他文字。無問題時輸出 []"""

-            resp = _req.post(
-                f"{_HERMES_URL}/api/generate",
-                json={"model": _HERMES_MODEL, "prompt": prompt,
-                      "stream": False, "options": {"temperature": 0.1}},
-                timeout=120,
+            # ADR-027 Phase 2 N3：lazy resolve Hermes 主機（GCP 優先 / 111 備援），
+            # 避開 import-time freeze。provider 標籤跟著解析結果動態決定。
+            hermes_url = get_hermes_url()
+            provider_tag = (
+                'gcp_ollama' if ('34.21.145.224' in hermes_url or '34.143.170.20' in hermes_url)
+                else 'ollama_111' if '192.168.0.111' in hermes_url
+                else 'ollama_other'
            )
-            resp.raise_for_status()
-            raw = resp.json().get("response", "").strip()
+            # Phase 1 v5.0: 包 ai_call_logger 追蹤 Code Review Hermes scan
+            with log_ai_call(
+                caller='code_review_hermes',
+                provider=provider_tag,
+                model=_HERMES_MODEL,
+                request_id=f"cr-{self.commit_sha[:8]}",
+                meta={'commit': self.commit_sha[:8], 'branch': self.branch,
+                      'files': len(files), 'host': hermes_url},
+            ) as _ctx:
+                resp = _req.post(
+                    f"{hermes_url}/api/generate",
+                    json={"model": _HERMES_MODEL, "prompt": prompt,
+                          "stream": False, "options": {"temperature": 0.1}},
+                    timeout=120,
+                )
+                resp.raise_for_status()
+                body = resp.json()
+                _ctx.set_tokens(
+                    input=body.get("prompt_eval_count", 0),
+                    output=body.get("eval_count", 0),
+                )
+                raw = body.get("response", "").strip()

            match = re.search(r"\[.*\]", raw, re.DOTALL)
            if not match:
@@ -271,36 +301,70 @@ class CodeReviewPipeline:
 <b>💡 架構優化方向</b>（1條長期建議）
 <b>✅ 本次部署亮點</b>"""

-        # 優先 Gemini
+        # 優先 Gemini — Phase 1 v5.0 logger 追蹤
        if GEMINI_API_KEY:
-            try:
-                import google.generativeai as genai
-                genai.configure(api_key=GEMINI_API_KEY)
-                model = genai.GenerativeModel(
-                    model_name=REVIEW_MODEL,
-                    generation_config=genai.types.GenerationConfig(
-                        temperature=0.3, max_output_tokens=1500,
-                    ),
-                    system_instruction=system,
-                )
-                resp = model.generate_content(user_prompt, request_options={"timeout": 90})
-                return resp.text or ""
-            except Exception as e:
-                logger.warning("[CodeReview] OpenClaw Gemini 失敗，降級 ElephantAlpha: %s", e)
+            with log_ai_call(
+                caller='code_review_openclaw',
+                provider='gemini',
+                model=REVIEW_MODEL,
+                request_id=f"cr-{self.commit_sha[:8]}",
+                meta={'commit': self.commit_sha[:8], 'branch': self.branch},
+            ) as _ctx:
+                try:
+                    import google.generativeai as genai
+                    genai.configure(api_key=GEMINI_API_KEY)
+                    model = genai.GenerativeModel(
+                        model_name=REVIEW_MODEL,
+                        generation_config=genai.types.GenerationConfig(
+                            temperature=0.3, max_output_tokens=1500,
+                        ),
+                        system_instruction=system,
+                    )
+                    resp = model.generate_content(user_prompt, request_options={"timeout": 90})
+                    try:
+                        usage = getattr(resp, 'usage_metadata', None)
+                        if usage is not None:
+                            _ctx.set_tokens(
+                                input=getattr(usage, 'prompt_token_count', 0) or 0,
+                                output=getattr(usage, 'candidates_token_count', 0) or 0,
+                            )
+                    except Exception:
+                        pass
+                    return resp.text or ""
+                except Exception as e:
+                    logger.warning("[CodeReview] OpenClaw Gemini 失敗，降級 ElephantAlpha: %s", e)
+                    _ctx.set_error(f"{type(e).__name__}: {e}")
+                    _ctx.fallback_to_caller('code_review_elephant')

        # 降級：ElephantAlpha via OpenRouter（OPENROUTER_API_KEY 容器內一定有）
-        try:
-            from services.elephant_service import elephant_service
-            resp = elephant_service.generate(
-                prompt=user_prompt,
-                system_prompt=system,
-                temperature=0.3,
-                timeout=90,
-            )
-            if resp.success:
-                return resp.content or ""
-        except Exception as e:
-            logger.warning("[CodeReview] OpenClaw ElephantAlpha 降級也失敗: %s", e)
+        # Phase 1 v5.0 logger 追蹤
+        with log_ai_call(
+            caller='code_review_elephant',
+            provider='nim_via_elephant',
+            model='nvidia/llama-3.3-nemotron-super-49b-v1.5',
+            request_id=f"cr-{self.commit_sha[:8]}",
+            meta={'commit': self.commit_sha[:8], 'branch': self.branch},
+        ) as _ctx:
+            try:
+                from services.elephant_service import elephant_service
+                resp = elephant_service.generate(
+                    prompt=user_prompt,
+                    system_prompt=system,
+                    temperature=0.3,
+                    timeout=90,
+                )
+                if resp.success:
+                    # ElephantResponse 已含 input_tokens/output_tokens
+                    _ctx.set_tokens(
+                        input=getattr(resp, 'input_tokens', 0) or 0,
+                        output=getattr(resp, 'output_tokens', 0) or 0,
+                    )
+                    return resp.content or ""
+                else:
+                    _ctx.set_error(getattr(resp, 'error', 'elephant generate failed'))
+            except Exception as e:
+                logger.warning("[CodeReview] OpenClaw ElephantAlpha 降級也失敗: %s", e)
+                _ctx.set_error(f"{type(e).__name__}: {e}")

        return ""

--- a/services/hermes_analyst_service.py
+++ b/services/hermes_analyst_service.py
@@ -23,6 +23,8 @@ from typing import Optional
 import requests
 from sqlalchemy import text
 from services.mcp_context_service import build_mcp_context
+from services.ollama_service import resolve_ollama_host, get_host_label
+from services.ai_call_logger import log_ai_call  # Operation Ollama-First v5.0 P1

 logger = logging.getLogger(__name__)

@@ -155,32 +157,48 @@ class HermesAnalystService:
            "keep_alive": HERMES_KEEP_ALIVE,  # ADR-012：避免冷啟動 timeout
            "options": {"temperature": 0.1},
        }
-        try:
-            resp = requests.post(
-                f"{HERMES_URL}/api/generate",
-                json=payload,
-                timeout=HERMES_TIMEOUT,  # 統一 config 集中讀取（ADR-008）；keep_alive 確保熱駐留時實測 < 10s
-            )
-            resp.raise_for_status()
-            raw = (resp.json().get("response", "") or "").strip()
-            if raw.startswith("```"):
-                raw = re.sub(r"^```(?:json)?\s*", "", raw, flags=re.MULTILINE)
-                raw = re.sub(r"\s*```\s*$", "", raw.strip(), flags=re.MULTILINE).strip()
-            data = json.loads(raw)
-            return {
-                "intent": data.get("intent", "unknown"),
-                "confidence": float(data.get("confidence", 0.5)),
-                "complexity_score": float(data.get("complexity_score", 0.5)),
-                "requires_data_fetch": bool(data.get("requires_data_fetch", False)),
-                "preliminary_answer": data.get("preliminary_answer", "") or "",
-                "metadata": {"source": "hermes_llm"},
-            }
-        except Exception as e:
-            logger.warning(
-                f"[Hermes.intent] Ollama 連線失敗，降級規則引擎"
-                f"（host={HERMES_URL} model={HERMES_MODEL} error={type(e).__name__}: {e}）"
-            )
-            return None
+        target_host = resolve_ollama_host()
+        # Phase 1 v5.0: 包 ai_call_logger 追蹤 Hermes 意圖分類 token / fallback
+        with log_ai_call(
+            caller='hermes_intent',
+            provider='gcp_ollama',
+            model=HERMES_MODEL,
+            meta={'host_label': get_host_label(target_host)},
+        ) as _ctx:
+            try:
+                resp = requests.post(
+                    f"{target_host}/api/generate",
+                    json=payload,
+                    timeout=HERMES_TIMEOUT,  # 統一 config 集中讀取（ADR-008）；keep_alive 確保熱駐留時實測 < 10s
+                )
+                resp.raise_for_status()
+                body = resp.json()
+                _ctx.set_tokens(
+                    input=body.get("prompt_eval_count", 0),
+                    output=body.get("eval_count", 0),
+                )
+                raw = (body.get("response", "") or "").strip()
+                if raw.startswith("```"):
+                    raw = re.sub(r"^```(?:json)?\s*", "", raw, flags=re.MULTILINE)
+                    raw = re.sub(r"\s*```\s*$", "", raw.strip(), flags=re.MULTILINE).strip()
+                data = json.loads(raw)
+                return {
+                    "intent": data.get("intent", "unknown"),
+                    "confidence": float(data.get("confidence", 0.5)),
+                    "complexity_score": float(data.get("complexity_score", 0.5)),
+                    "requires_data_fetch": bool(data.get("requires_data_fetch", False)),
+                    "preliminary_answer": data.get("preliminary_answer", "") or "",
+                    "metadata": {"source": "hermes_llm"},
+                }
+            except Exception as e:
+                # NOTE: 修補 commit 00591c5 殘留的孤立 f-string（原 logger.warning 被誤刪）
+                logger.warning(
+                    f"[Hermes.intent] Ollama 連線失敗，降級規則引擎"
+                    f"（model={HERMES_MODEL} error={type(e).__name__}: {e}）"
+                )
+                _ctx.set_error(f"{type(e).__name__}: {e}")
+                _ctx.fallback_to_caller('hermes_rule_engine')
+                return None

    def _rule_based_intent(self, message: str) -> dict:
        """Ollama 掛掉時的規則引擎 fallback — 永遠返回結構化結果。"""
@@ -416,23 +434,46 @@ class HermesAnalystService:
            "options": {"temperature": 0.1},
        }

-        resp = requests.post(
-            f"{HERMES_URL}/api/generate",
-            json=payload,
-            timeout=HERMES_TIMEOUT,
-        )
-        resp.raise_for_status()
+        target_host = resolve_ollama_host()
+        # Phase 1 v5.0: 包 ai_call_logger 追蹤 Hermes 競價分析 token / fallback
+        with log_ai_call(
+            caller='hermes_analyst',
+            provider='gcp_ollama',
+            model=HERMES_MODEL,
+            meta={
+                'host_label': get_host_label(target_host),
+                'item_count': len(items),
+                'top_n': TOP_N,
+            },
+        ) as _ctx:
+            try:
+                resp = requests.post(
+                    f"{target_host}/api/generate",
+                    json=payload,
+                    timeout=HERMES_TIMEOUT,
+                )
+                resp.raise_for_status()
+            except Exception as e:
+                _ctx.set_error(f"{type(e).__name__}: {e}")
+                raise

-        data = resp.json()
-        raw = data.get("response", "").strip()
-        duration_sec = round(data.get("total_duration", 0) / 1e9, 1)
-        eval_tokens  = data.get("eval_count", "?")   # Ollama 推理 token 數
-        logger.info(
-            f"[Hermes] 推理耗時 {duration_sec}s，"
-            f"輸入 {len(items)} 筆，tokens={eval_tokens}，回應長度 {len(raw)}"
-        )
-        # 儲存統計供 footprint 使用（掛在 instance 上供 run() 讀取）
-        self._last_stats = {"duration_sec": duration_sec, "tokens": eval_tokens}
+            data = resp.json()
+            raw = data.get("response", "").strip()
+            duration_sec = round(data.get("total_duration", 0) / 1e9, 1)
+            eval_tokens_raw  = data.get("eval_count", 0)   # Ollama 推理 token 數
+            prompt_tokens_raw = data.get("prompt_eval_count", 0)
+            _ctx.set_tokens(input=prompt_tokens_raw, output=eval_tokens_raw)
+            logger.info(
+                f"[Hermes] 推理耗時 {duration_sec}s，"
+                f"輸入 {len(items)} 筆，tokens={eval_tokens_raw}，回應長度 {len(raw)}"
+            )
+            # 儲存統計供 footprint 使用（掛在 instance 上供 run() 讀取）
+            self._last_stats = {
+                "duration_sec": duration_sec,
+                "tokens": eval_tokens_raw,
+                "host": target_host,
+                "host_label": get_host_label(target_host)
+            }

        # P0-1 修復：剝除 Hermes 可能輸出的 markdown code fence
        if raw.startswith("```"):
--- a/services/nemoton_dispatcher_service.py
+++ b/services/nemoton_dispatcher_service.py
@@ -27,6 +27,7 @@ import requests
 from services.mcp_context_service import build_mcp_context

 from config import HERMES_URL  # ADR-008 集中化：禁止硬編碼 IP
+from services.ai_call_logger import log_ai_call  # Operation Ollama-First v5.0 P1

 logger = logging.getLogger(__name__)

@@ -107,6 +108,17 @@ NIM_TIMEOUT  = 60  # 秒
 NIM_DAILY_LIMIT = 80  # 留 20 個給 AWOOOI，100/天免費配額
 _nim_call_count = {"date": "", "count": 0}

+# ── Operation Ollama-First v5.0 / Phase 3 / A9 ──────────────────
+# GCP Ollama qwen3:14b 灰度切換開關
+#   - 預設 false → 行為與戰前完全相同（仍走 NIM）
+#   - true       → qwen3 主路徑，NIM 降為備援，最後仍兜底 Hermes 規則引擎（ADR-004）
+# 模型選擇：A2 web-research 紅綠燈報告 docs/phase0_research_report_20260503.md
+#   原戰役計畫 deepseek-r1:14b 的 Ollama tool_calls chat template 缺對應 jinja
+#   （GitHub Issue #10935 未解），改採 qwen3:14b（Ollama 官方 + qwenlm 雙確認 tools 支援）。
+NEMOTRON_OLLAMA_FIRST = os.getenv("NEMOTRON_OLLAMA_FIRST", "false").lower() == "true"
+NEMOTRON_OLLAMA_MODEL = os.getenv("NEMOTRON_OLLAMA_MODEL", "qwen3:14b")
+NEMOTRON_OLLAMA_TIMEOUT = int(os.getenv("NEMOTRON_OLLAMA_TIMEOUT", "180"))  # 秒
+

 def _check_nim_quota() -> bool:
    today = datetime.now().strftime("%Y-%m-%d")
@@ -320,6 +332,68 @@ ICON_AI        = "🧠"
 ICON_FOOTPRINT = "⚙️"


+# ── tool_calls 解析（NIM 與 qwen3 共用）──────────────────────────
+def _parse_tool_calls_struct(tool_calls: list) -> list:
+    """從 OpenAI 格式的 tool_calls 結構陣列抽出 [{tool, args}] 清單。
+    NIM 與 qwen3 (Ollama /api/chat) 兩邊回應對齊 OpenAI schema：
+        [{"function": {"name": ..., "arguments": <json-str-or-dict>}, ...}]
+    arguments 在 NIM 是 JSON 字串、在 Ollama 通常已是 dict；本 helper 兼容兩者。
+    """
+    results = []
+    for tc in tool_calls or []:
+        fn = tc.get("function", {}) if isinstance(tc, dict) else {}
+        if not fn:
+            continue
+        raw_args = fn.get("arguments", {})
+        if isinstance(raw_args, str):
+            try:
+                args = json.loads(raw_args) if raw_args.strip() else {}
+            except json.JSONDecodeError:
+                args = {}
+        elif isinstance(raw_args, dict):
+            args = raw_args
+        else:
+            args = {}
+        name = fn.get("name")
+        if name:
+            results.append({"tool": name, "args": args})
+    return results
+
+
+def _parse_content_fallback(raw_content: str) -> list:
+    """當模型沒回 tool_calls 結構、把工具呼叫塞進 content 時嘗試解析。
+    既有 NIM llama-3.1-8b 偶有此行為（line 537-554 原邏輯）；
+    qwen3 開 thinking_mode=False 後較少見，但保留同等容錯。
+    """
+    if not raw_content or not isinstance(raw_content, str):
+        return []
+    try:
+        parsed = json.loads(raw_content.strip())
+    except Exception as parse_err:
+        logger.error(f"[ToolCalls] content fallback JSON 解析失敗：{parse_err}")
+        return []
+
+    if not isinstance(parsed, list):
+        return []
+
+    results = []
+    for item in parsed:
+        if not isinstance(item, dict):
+            continue
+        name = item.get("name") or (item.get("function", {}) or {}).get("name")
+        args = item.get("parameters") or item.get("arguments") or {}
+        if isinstance(args, str):
+            try:
+                args = json.loads(args)
+            except json.JSONDecodeError:
+                args = {}
+        if name:
+            results.append({"tool": name, "args": args})
+    if results:
+        logger.info(f"[ToolCalls] content fallback 解析成功，取得 {len(results)} 個 tool_calls")
+    return results
+
+
 def _build_footprint_json(hermes_stats: Optional[dict], nim_stats: Optional[dict]) -> dict:
    """
    建立結構化運算足跡 (用於 DB model_footprint JSONB 欄位)
@@ -331,7 +405,8 @@ def _build_footprint_json(hermes_stats: Optional[dict], nim_stats: Optional[dict
    if hermes_stats:
        result["analyst"] = {
            "model":        "qwen2.5:7b-instruct",
-            "host":         HERMES_URL,  # ADR-008：集中讀取，禁止硬編碼 IP
+            "host":         hermes_stats.get("host", HERMES_URL),
+            "host_label":   hermes_stats.get("host_label", "未知"),
            "duration_sec": hermes_stats.get("duration_sec", 0),
            "tokens":       hermes_stats.get("tokens", 0),
            "cost_usd":     0,
@@ -363,12 +438,13 @@ def _build_footprint_block(hermes_stats: Optional[dict], nim_stats: Optional[dic
    if hermes_stats:
        dur = hermes_stats.get("duration_sec", 0)
        tok = hermes_stats.get("tokens", "?")
+        label = hermes_stats.get("host_label", "本地 188")
        lines.append(
-            f"• 🔍 分析: Qwen2.5 7B (本地 188) | "
+            f"• 🔍 分析: Qwen2.5 7B ({label}) | "
            f"耗時: {dur:.1f}s | Tokens: {tok} | $0 成本"
        )
    else:
-        lines.append("• 🔍 分析: Qwen2.5 7B (本地 188) | $0 成本")
+        lines.append("• 🔍 分析: Qwen2.5 7B (未知主機) | $0 成本")

    if nim_stats:
        tok   = nim_stats.get("total_tokens", "?")
@@ -464,81 +540,208 @@ class NemotronDispatcher:
        ]

        # P1-4 修復：NIM API 指數退避 retry（最多 3 次）
+        # Phase 1 v5.0: 包 ai_call_logger 追蹤 NIM 配額/tokens/錯誤
        import time as _time
        last_err = None
-        for _attempt in range(3):
-            try:
-                resp = requests.post(
-                    f"{NIM_BASE_URL}/chat/completions",
-                    headers={
-                        "Authorization": f"Bearer {NIM_API_KEY}",
-                        "Content-Type":  "application/json",
-                    },
-                    json={
-                        "model":       NIM_MODEL,
-                        "messages":    messages,
-                        "tools":       TOOLS,
-                        "tool_choice": "required",
-                        "max_tokens":  2048,
-                    },
-                    timeout=NIM_TIMEOUT,
-                )
-                resp.raise_for_status()
-                break
-            except (requests.Timeout, requests.HTTPError) as e:
-                last_err = e
-                # ADR-004: 429 不重試，立即拋出讓上層啟動 Hermes 規則引擎降級
-                if isinstance(e, requests.HTTPError) and e.response is not None \
-                        and e.response.status_code == 429:
-                    logger.warning("[NIM] HTTP 429 速率限制，跳出 retry 迴圈")
-                    raise
-                if _attempt < 2:
-                    _time.sleep(2 ** _attempt)
-                    logger.warning(f"[NIM] retry {_attempt + 1}/2 after {e}")
-        else:
-            raise last_err
+        with log_ai_call(
+            caller='nemotron_dispatch',
+            provider='nim',
+            model=NIM_MODEL,
+            meta={'threat_count': len(threats), 'quota_used': _nim_quota_used()},
+        ) as _ctx:
+            for _attempt in range(3):
+                try:
+                    resp = requests.post(
+                        f"{NIM_BASE_URL}/chat/completions",
+                        headers={
+                            "Authorization": f"Bearer {NIM_API_KEY}",
+                            "Content-Type":  "application/json",
+                        },
+                        json={
+                            "model":       NIM_MODEL,
+                            "messages":    messages,
+                            "tools":       TOOLS,
+                            "tool_choice": "required",
+                            "max_tokens":  2048,
+                        },
+                        timeout=NIM_TIMEOUT,
+                    )
+                    resp.raise_for_status()
+                    break
+                except (requests.Timeout, requests.HTTPError) as e:
+                    last_err = e
+                    # ADR-004: 429 不重試，立即拋出讓上層啟動 Hermes 規則引擎降級
+                    if isinstance(e, requests.HTTPError) and e.response is not None \
+                            and e.response.status_code == 429:
+                        logger.warning("[NIM] HTTP 429 速率限制，跳出 retry 迴圈")
+                        _ctx.set_error(f"NIM 429 rate-limited")
+                        _ctx.fallback_to_caller('hermes_rule_engine')
+                        raise
+                    if _attempt < 2:
+                        _time.sleep(2 ** _attempt)
+                        logger.warning(f"[NIM] retry {_attempt + 1}/2 after {e}")
+            else:
+                raise last_err

-        body   = resp.json()
-        usage  = body.get("usage", {})
-        nim_stats = {
-            "total_tokens": usage.get("total_tokens", 0),
-            "quota_used":   _nim_quota_used(),
-        }
+            body   = resp.json()
+            usage  = body.get("usage", {})
+            # 記錄 token / 成本到 ai_calls 表
+            _ctx.set_tokens(
+                input=usage.get("prompt_tokens", 0),
+                output=usage.get("completion_tokens", 0),
+            )
+            nim_stats = {
+                "total_tokens": usage.get("total_tokens", 0),
+                "quota_used":   _nim_quota_used(),
+            }

        choices    = body.get("choices", [])
-        tool_calls = choices[0].get("message", {}).get("tool_calls", []) if choices else []
+        message    = choices[0].get("message", {}) if choices else {}
+        tool_calls = message.get("tool_calls", []) or []

-        results = []
-        for tc in tool_calls:
-            fn = tc.get("function", {})
-            try:
-                args = json.loads(fn.get("arguments", "{}"))
-            except json.JSONDecodeError:
-                args = {}
-            results.append({"tool": fn.get("name"), "args": args})
+        # 共用結構解析（NIM / qwen3 兩邊統一走同一條）
+        results = _parse_tool_calls_struct(tool_calls)

        if not results:
            # llama-3.1-8b-instruct 有時把 tool call 寫進 content 而非 tool_calls 結構
-            raw_content = choices[0].get("message", {}).get("content", "") if choices else ""
+            raw_content = message.get("content", "") or ""
            logger.warning(f"[NIM] 0 tool_calls，嘗試從 content 解析：{raw_content[:120]}")
-            try:
-                parsed = json.loads(raw_content.strip())
-                if isinstance(parsed, list):
-                    for item in parsed:
-                        name = item.get("name") or item.get("function", {}).get("name")
-                        args = item.get("parameters") or item.get("arguments") or {}
-                        if isinstance(args, str):
-                            args = json.loads(args)
-                        if name:
-                            results.append({"tool": name, "args": args})
-                    if results:
-                        logger.info(f"[NIM] content fallback 解析成功，取得 {len(results)} 個 tool_calls")
-            except Exception as parse_err:
-                logger.error(f"[NIM] content fallback 解析失敗：{parse_err}")
+            results = _parse_content_fallback(raw_content)

        logger.info(f"[NIM] 收到 {len(results)} 個 tool_calls | tokens={nim_stats['total_tokens']}")
        return results, nim_stats

+    # ──────────────────────────────────────────────
+    # GCP Ollama qwen3:14b Tool Calling（Operation Ollama-First v5.0 / Phase 3）
+    # ──────────────────────────────────────────────
+    def _call_qwen3_dispatch(self, threats: list) -> tuple:
+        """
+        將 Hermes 威脅清單交給 GCP Ollama qwen3:14b，取得 tool_calls 決策。
+
+        Why qwen3:14b（A2 web-research 結論，docs/phase0_research_report_20260503.md）：
+          - Ollama registry 官方頁 + qwenlm.github.io 雙確認 tools capability 可用
+          - 預設可關閉 thinking mode（避免 deepseek-r1 的 30s thinking 延遲）
+          - 14B 體積 9.3GB，與 deepseek-r1:14b 同級
+          - 與 NIM 一致採 OpenAI 兼容 chat completion + tools schema
+
+        Returns:
+            (list of {"tool": str, "args": dict}, dict ollama_stats)
+            ollama_stats: {"total_tokens": int, "host": str, "model": str}
+        """
+        from services.ollama_service import resolve_ollama_host, mark_unhealthy
+        host = resolve_ollama_host().rstrip("/")
+
+        threat_summary = json.dumps(
+            [
+                {
+                    "sku":          t.sku,
+                    "name":         t.name,
+                    "momo_price":   t.momo_price,
+                    "pchome_price": t.pchome_price,
+                    "gap_pct":      t.gap_pct,
+                    "sales_delta":  t.sales_7d_delta_pct,
+                    "risk":         t.risk,
+                    "action":       t.recommended_action,
+                    "confidence":   t.confidence,
+                }
+                for t in threats
+            ],
+            ensure_ascii=False,
+        )
+
+        # 注入 MCP 市場上下文（與 NIM 路徑一致）
+        mcp_ctx = build_mcp_context()
+
+        # System prompt 與 NIM 完全一致（避免兩套維護）
+        system_prompt = (
+            "你是台灣電商競價情報的行動派發器。"
+            f"當前市場背景 (MCP)：\n{mcp_ctx}\n\n"
+            "根據 Hermes 分析師提供的威脅清單，決定對每支商品呼叫哪個工具。\n"
+            "路由鐵律（依序判斷，命中即停）：\n"
+            "1. gap_pct < 5% 且 sales_delta < -30% → 非價格異常，呼叫 flag_for_human_review，"
+            "concern 說明『價差接近 0 但銷量大幅下滑，疑似缺貨/下架/平台流量異常，請人工走查前台』。\n"
+            "2. gap_pct ≥ 5% 且 risk=HIGH → trigger_price_alert（填入 momo_price, comp_price）。\n"
+            "3. 我方價格低於競品且銷量正成長 → add_to_recommendation。\n"
+            "4. confidence < 0.6 或其他複雜情況 → flag_for_human_review。\n"
+            "每支商品只呼叫一個工具。\n"
+            "【語言鐵律 — 台灣標準正體中文（繁體）】所有文字欄位必須遵守：\n"
+            "  1. 嚴禁簡體字、嚴禁異體字（例：不可用「亊」，必須用「事」）\n"
+            "  2. 嚴禁短語重複（語意坍塌）、嚴禁無意義字元組合\n"
+            "若無法產出合理的繁體中文說明，直接輸出「請人工評估議價空間」。"
+        )
+
+        payload = {
+            "model": NEMOTRON_OLLAMA_MODEL,
+            "messages": [
+                {"role": "system", "content": system_prompt},
+                {"role": "user",   "content": f"請處理以下 {len(threats)} 筆威脅清單：\n{threat_summary}"},
+            ],
+            "tools": TOOLS,           # 重用既有 NIM tools schema
+            "stream": False,
+            "options": {
+                "temperature": 0.2,
+                "num_predict": 2048,
+            },
+        }
+
+        with log_ai_call(
+            caller='nemotron_dispatch',
+            provider='gcp_ollama',
+            model=NEMOTRON_OLLAMA_MODEL,
+            request_id=f"nem-{int(time.time())}",
+            meta={
+                'flag': 'NEMOTRON_OLLAMA_FIRST',
+                'threats_count': len(threats),
+                'host': host,
+            },
+        ) as ctx:
+            try:
+                resp = requests.post(
+                    f"{host}/api/chat",
+                    json=payload,
+                    timeout=NEMOTRON_OLLAMA_TIMEOUT,
+                )
+                resp.raise_for_status()
+                body = resp.json()
+            except Exception as e:
+                # 連線/HTTP 失敗 → 標記主機 unhealthy + log 錯誤後 re-raise，由 dispatch 走 NIM fallback
+                ctx.set_error(f"qwen3 call failed: {type(e).__name__}: {e}")
+                ctx.fallback_to_caller('nim')
+                mark_unhealthy(host)
+                raise
+
+            ctx.set_tokens(
+                input=body.get('prompt_eval_count', 0),
+                output=body.get('eval_count', 0),
+            )
+
+            msg        = body.get('message', {}) if isinstance(body, dict) else {}
+            tool_calls = msg.get('tool_calls', []) or []
+
+            # 走共用 tool_calls 結構解析（與 NIM 同一條 helper）
+            results = _parse_tool_calls_struct(tool_calls)
+
+            if not results:
+                # qwen3 沒回 tool_calls → 走既有 content fallback 解析
+                raw_content = msg.get('content', '') or ''
+                logger.warning(
+                    f"[Dispatcher][qwen3] 0 tool_calls，嘗試從 content 解析：{raw_content[:120]}"
+                )
+                results = _parse_content_fallback(raw_content)
+
+            ollama_stats = {
+                "total_tokens": (body.get('prompt_eval_count', 0) or 0)
+                                + (body.get('eval_count', 0) or 0),
+                "host":  host,
+                "model": NEMOTRON_OLLAMA_MODEL,
+            }
+
+        logger.info(
+            f"[Dispatcher][qwen3] 收到 {len(results)} 個 tool_calls | "
+            f"tokens={ollama_stats['total_tokens']} host={host}"
+        )
+        return results, ollama_stats
+
    # ──────────────────────────────────────────────
    # ADR-004：Hermes 規則引擎降級路由
    # ──────────────────────────────────────────────
@@ -1190,6 +1393,51 @@ class NemotronDispatcher:
                "nim_stats":  {},
            }

+        # ── Operation Ollama-First v5.0 / Phase 3 / A9：qwen3 主路徑（feature flag 灰度）──
+        # 預設 NEMOTRON_OLLAMA_FIRST=false 時不進入此分支，行為與戰前完全相同。
+        # 若 qwen3 成功取得 tool_calls，沿用既有 TOOL_MAP 執行邏輯（共用 footprint/threat 注入）。
+        # 若 qwen3 失敗或 0 tool_calls → 不直接降到 Hermes 規則，先嘗試 NIM 備援，再走 ADR-004。
+        qwen3_used = False
+        qwen3_stats: Optional[dict] = None
+        qwen3_tool_calls: Optional[list] = None
+        if NEMOTRON_OLLAMA_FIRST:
+            try:
+                qwen3_tool_calls, qwen3_stats = self._call_qwen3_dispatch(nim_candidates)
+                if qwen3_tool_calls:
+                    qwen3_used = True
+                    logger.info(
+                        f"[Dispatcher][qwen3] 主路徑成功 tool_calls={len(qwen3_tool_calls)} "
+                        f"tokens={qwen3_stats.get('total_tokens', 0)}"
+                    )
+                else:
+                    logger.warning("[Dispatcher][qwen3] 0 tool_calls，fallback 至 NIM")
+            except Exception as e:
+                logger.warning(f"[Dispatcher][qwen3] 呼叫失敗 fallback NIM: {e}")
+                # log_ai_call 已在 _call_qwen3_dispatch 內標記 status=error + fallback_to=nim
+                qwen3_tool_calls = None
+                qwen3_stats = None
+
+        # qwen3 主路徑成功 → 直接進入工具執行區塊（跳過 NIM）
+        if qwen3_used:
+            tool_calls = qwen3_tool_calls
+            # 與既有 NIM 路徑一致的 stats 結構（footprint 顯示用）
+            nim_stats = {
+                "total_tokens": qwen3_stats.get("total_tokens", 0),
+                "quota_used":   _nim_quota_used(),  # 配額未動用
+                "provider":     "gcp_ollama",
+                "model":        qwen3_stats.get("model", NEMOTRON_OLLAMA_MODEL),
+            }
+            return self._execute_tool_calls(
+                tool_calls=tool_calls,
+                threats=threats,
+                hermes_stats=hermes_stats,
+                nim_stats=nim_stats,
+                pre_dispatched=dispatched,
+                pre_skipped=skipped,
+                pre_errors=errors,
+            )
+
+        # ── 進入 NIM 路徑（flag=false 預設主路徑；flag=true 則為 qwen3 失敗備援）──
        if not NIM_API_KEY:
            logger.warning("[Dispatcher][ADR-004] NVIDIA_API_KEY 未設定，啟動 Hermes 規則引擎降級")
            fb = self._hermes_rule_fallback(nim_candidates, hermes_stats)
@@ -1249,11 +1497,38 @@ class NemotronDispatcher:
                "nim_stats":  fb["nim_stats"],
            }

-        # 建立運算足跡（Telegram 顯示文字 + DB 結構化 JSON，共用同一份）
+        return self._execute_tool_calls(
+            tool_calls=tool_calls,
+            threats=threats,
+            hermes_stats=hermes_stats,
+            nim_stats=nim_stats,
+            pre_dispatched=dispatched,
+            pre_skipped=skipped,
+            pre_errors=errors,
+        )
+
+    # ──────────────────────────────────────────────
+    # tool_calls 執行區塊（NIM 與 qwen3 共用）
+    # ──────────────────────────────────────────────
+    def _execute_tool_calls(
+        self,
+        tool_calls: list,
+        threats: list,
+        hermes_stats: Optional[dict],
+        nim_stats: dict,
+        pre_dispatched: int = 0,
+        pre_skipped: int = 0,
+        pre_errors: Optional[list] = None,
+    ) -> dict:
+        """執行 LLM 回傳的 tool_calls 清單，注入 Python 獨裁的客觀數字 + 金額影響。
+        被 NIM 路徑與 qwen3 路徑共用，避免雙路雙維護。
+        """
+        errors = list(pre_errors or [])
+        dispatched = pre_dispatched
+
        footprint_text = _build_footprint_block(hermes_stats, nim_stats)
        footprint_data = _build_footprint_json(hermes_stats, nim_stats)

-        # 建立 SKU → threat 的查詢字典（供 add_to_recommendation 寫入快照）
        threat_map = {t.sku: t for t in threats}

        TOOL_MAP = {
@@ -1266,20 +1541,15 @@ class NemotronDispatcher:

        for tc in tool_calls:
            tool_name = tc.get("tool")
-            args      = tc.get("args", {})
+            args      = dict(tc.get("args", {}) or {})
            handler   = TOOL_MAP.get(tool_name)

            if not handler:
                errors.append(f"未知工具: {tool_name}")
                continue

-            # 注入通用參數：Telegram 文字 + DB JSON 足跡
            args["footprint"] = footprint_text

-            # [2026-04-18 台北] Bug-1 防線一 保險：所有客觀數字強制由 Python 從 threat_map 注入，
-            # 覆蓋 LLM 可能回吐的幻覺數字（例如 $0）。Layer A Hermes 根治是主防線，
-            # 此處為二道屏障（萬一 ground_items 有漏網，或未來走 bypass） — Claude Opus 4.7
-            # [2026-05-02 台北] B' 軌：金額影響量化亦走 Python 獨裁注入 — Claude Opus 4.7
            t = threat_map.get(args.get("sku"))
            if tool_name == "trigger_price_alert" and t:
                args["momo_price"]  = getattr(t, "momo_price",         None)
@@ -1302,7 +1572,6 @@ class NemotronDispatcher:
                args["threat"]         = t
            elif tool_name == "route_to_km":
                args["threat"] = t
-            # mark_for_relearn 無需注入客觀數字（僅寫 DB）

            try:
                handler(**args)
@@ -1311,11 +1580,13 @@ class NemotronDispatcher:
                errors.append(f"{tool_name}({args.get('sku', '?')}): {e}")
                logger.error(f"[Dispatcher] 工具執行失敗 [{tool_name}]: {e}")

-        skipped = len(threats) - dispatched
+        skipped = max(0, len(threats) - dispatched)
+        # nim_stats 在 qwen3 路徑下會帶 provider='gcp_ollama'，log 出處可區辨
+        provider = nim_stats.get("provider", "nim") if isinstance(nim_stats, dict) else "nim"
        logger.info(
-            f"[Dispatcher] 完成 forced_review={len(forced_review)} "
+            f"[Dispatcher] 完成 provider={provider} "
            f"dispatched={dispatched} skipped={skipped} "
-            f"errors={len(errors)} nim_tokens={nim_stats.get('total_tokens', 0)}"
+            f"errors={len(errors)} tokens={nim_stats.get('total_tokens', 0)}"
        )
        return {
            "dispatched": dispatched,
--- a/services/openclaw_strategist_service.py
+++ b/services/openclaw_strategist_service.py
--- a/templates/daily_report_v2.j2
+++ b/templates/daily_report_v2.j2
@@ -0,0 +1,63 @@
+📊 momo 日報 {{ today }} ({{ weekday }})
+═══════════════════════════════════════
+
+## 📈 營收 KPI
+
+| 指標 | 今日 | vs 昨日 | vs 7日均 |
+|------|------|---------|----------|
+| 營收 | {{ revenue.today | format_currency }} | {{ revenue.dod_pct | format_pct }} | {{ revenue.wow_pct | format_pct }} |
+| 訂單筆數 | {{ orders.today_rows | default('—') }} | — | — |
+| 上架 SKU | {{ orders.today_sku | default('—') }} | — | — |
+| 平均客單 | {{ orders.avg_value_today | format_currency }} | — | — |
+
+{% if revenue.today == 0 %}
+⚠️ 今日營收為零，請檢查資料管線是否正常。
+{% endif %}
+
+## 🏆 TOP {{ top_skus | length }} 熱銷商品
+
+{% if top_skus %}
+{% for sku in top_skus %}
+{{ loop.index }}. **{{ sku.name | default('—') }}**
+   數量：{{ sku.qty | default('—') }} 件　|　營收：{{ sku.revenue | format_currency }}
+{% endfor %}
+{% else %}
+（今日無熱銷資料）
+{% endif %}
+
+## 🔍 競品價差警示
+
+{% if price_gaps %}
+{% for alert in price_gaps %}
+- ⚠️ **{{ alert.sku_name | default(alert.sku) | default('—') }}**：我方 {{ alert.momo_price | format_currency }}　vs　{{ alert.competitor | default('競品') }} {{ alert.comp_price | format_currency }}　（價差 {{ alert.gap_pct | format_pct }}）
+{% endfor %}
+{% else %}
+✅ 暫無重大價差警示
+{% endif %}
+
+## 📦 庫存異常
+
+{% if inventory_alerts %}
+{% for alert in inventory_alerts %}
+- 🟡 {{ alert.sku_name | default(alert.sku) | default('—') }}：{{ alert.reason | default('—') }}
+{% endfor %}
+{% else %}
+✅ 庫存狀態正常
+{% endif %}
+
+## 💡 今日洞察 (AI 分析)
+
+{{ gemini_insight | default('（本日洞察生成失敗，請查 logger）') }}
+
+## ✅ 48h 優先事項
+
+{% if priority_actions %}
+{% for action in priority_actions %}
+{{ loop.index }}. {{ action }}
+{% endfor %}
+{% else %}
+（暫無自動產生的行動建議）
+{% endif %}
+
+═══════════════════════════════════════
+🤖 Operation Ollama-First v5.0 / daily_report_v2 (Hermes 模板模式)
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,15 @@
+import os
+
+import pytest
+
+
+@pytest.fixture
+def host():
+    """Provide a default SMTP/IMAP host for non-parametric email probe tests."""
+    return os.getenv("MOOD_TEST_MAIL_HOST", "ms1.pchome.tw")
+
+
+@pytest.fixture
+def port():
+    """Provide a default SMTP/IMAP port used by probe tests."""
+    return int(os.getenv("MOOD_TEST_MAIL_PORT", "587"))
--- a/tests/test_nemotron_qwen3_compat.py
+++ b/tests/test_nemotron_qwen3_compat.py
@@ -0,0 +1,456 @@
+"""
+test_nemotron_qwen3_compat.py
+─────────────────────────────────────────────────────────────────
+Operation Ollama-First v5.0 / Phase 3 / A9 — Nemotron qwen3 切換相容性測試
+
+驗證面：
+  T1. qwen3 chat 回應 OpenAI tool_calls 結構 → _parse_tool_calls_struct 正確
+  T2. qwen3 沒回 tool_calls → _parse_content_fallback 正確（與 NIM 同等容錯）
+  T3. qwen3 同時回 tool_calls + content → 優先採用 tool_calls
+  T4. qwen3 連線失敗 → 不丟例外給上游，自動 fallback NIM 路徑
+  T5. qwen3 + NIM 都失敗 → ADR-004 走 Hermes 規則引擎降級（含「🟡 [規則引擎]」標記）
+  T6. NEMOTRON_OLLAMA_FIRST 預設 false → 完全不呼叫 qwen3（戰前行為）
+
+紀律：
+  - 所有 HTTP 互動 mock，不實際呼叫 GCP Ollama 或 NIM
+  - 與 test_nemotron_fallback 共存，使用同款 FakeThreat
+  - assert log_ai_call 路徑可被 monkeypatch 旁路（不污染 ai_calls 表）
+"""
+from dataclasses import dataclass
+from contextlib import contextmanager
+import pytest
+
+
+# ─────────────────────────────────────────────────────────────
+# Fixtures
+# ─────────────────────────────────────────────────────────────
+@dataclass
+class FakeThreat:
+    sku: str = "SKU-Q1"
+    name: str = "qwen3 測試品"
+    momo_price: float = 1200.0
+    pchome_price: float = 980.0
+    gap_pct: float = 22.4
+    sales_7d_delta_pct: float = -35.0
+    risk: str = "HIGH"
+    recommended_action: str = "建議跟進降價"
+    confidence: float = 0.85
+    sales_7d_curr_amount: float = 78000.0
+    sales_7d_prev_amount: float = 120000.0
+
+
+class _FakeResp:
+    def __init__(self, payload: dict, status: int = 200):
+        self._payload = payload
+        self.status_code = status
+
+    def raise_for_status(self):
+        if self.status_code >= 400:
+            import requests
+            raise requests.HTTPError(f"HTTP {self.status_code}", response=self)
+
+    def json(self):
+        return self._payload
+
+
+@contextmanager
+def _noop_log_ai_call(*args, **kwargs):
+    """Mock log_ai_call context manager — 不寫 ai_calls 表，回傳具備所需 setter 的 stub"""
+    class _Ctx:
+        def set_tokens(self, **_kw): pass
+        def set_error(self, *_a, **_kw): pass
+        def fallback_to_caller(self, *_a, **_kw): pass
+        def set_cache_hit(self, *_a, **_kw): pass
+        def add_meta(self, *_a, **_kw): pass
+    yield _Ctx()
+
+
+@pytest.fixture(autouse=True)
+def _reset_global_state():
+    """test 互相污染防線：每個 test 前後清 _ALERT_CACHE + ollama unhealthy marks。
+
+    根因：dispatch() line 1328 _is_duplicate_alert 用 module-level _ALERT_CACHE，
+    第一個 test 跑完留 "SKU-Q1" 在 cache，後續 test 命中去重 → dispatched=0 失敗。
+    """
+    import services.nemoton_dispatcher_service as _nem
+    import services.ollama_service as _oss
+    _nem._ALERT_CACHE.clear()
+    _oss._unhealthy_marks.clear()
+    _oss._resolved_host_cache['host'] = None
+    _oss._resolved_host_cache['ts'] = 0
+    yield
+    _nem._ALERT_CACHE.clear()
+    _oss._unhealthy_marks.clear()
+    _oss._resolved_host_cache['host'] = None
+    _oss._resolved_host_cache['ts'] = 0
+
+
+def _patch_execution_methods(monkeypatch, dispatcher):
+    """攔截實際 Telegram/DB 寫入，記錄被呼叫的 tool 名稱與 args（與 fallback test 共用 pattern）"""
+    calls = []
+
+    def record(kind):
+        def _inner(*args, **kwargs):
+            calls.append({"kind": kind, "args": args, "kwargs": kwargs})
+        return _inner
+
+    monkeypatch.setattr(dispatcher, "_exec_trigger_price_alert", record("price_alert"))
+    monkeypatch.setattr(dispatcher, "_exec_add_to_recommendation", record("recommendation"))
+    monkeypatch.setattr(dispatcher, "_exec_flag_for_human_review", record("human_review"))
+    return calls
+
+
+def _enable_qwen3_path(monkeypatch, module):
+    """打開 NEMOTRON_OLLAMA_FIRST + 旁路 mcp/log_ai_call/resolve_host 等副作用"""
+    monkeypatch.setattr(module, "NEMOTRON_OLLAMA_FIRST", True)
+    monkeypatch.setattr(module, "log_ai_call", _noop_log_ai_call)
+    monkeypatch.setattr(module, "build_mcp_context", lambda: "MCP-MOCK")
+    # 確保即使未被呼叫，import 路徑可解析
+    import services.ollama_service as ollama_module
+    monkeypatch.setattr(ollama_module, "resolve_ollama_host", lambda: "http://gcp-mock:11434")
+    monkeypatch.setattr(ollama_module, "mark_unhealthy", lambda *a, **kw: None)
+
+
+# ─────────────────────────────────────────────────────────────
+# T1. qwen3 OpenAI tool_calls 結構 → 正確解析
+# ─────────────────────────────────────────────────────────────
+def test_qwen3_tool_calls_struct_parsed_and_executed(monkeypatch):
+    """qwen3 回標準 OpenAI tool_calls 結構，dispatcher 應跳過 NIM 直接走工具執行"""
+    import services.nemoton_dispatcher_service as module
+
+    _enable_qwen3_path(monkeypatch, module)
+
+    # mock GCP Ollama /api/chat 回 OpenAI 兼容結構
+    fake_body = {
+        "message": {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "function": {
+                        "name": "trigger_price_alert",
+                        "arguments": {
+                            "sku": "SKU-Q1",
+                            "name": "qwen3 測試品",
+                            "gap_pct": 22.4,
+                            "sales_delta": -35.0,
+                            "action": "跟進降價至 $980",
+                            "confidence": 0.85,
+                        },
+                    }
+                }
+            ],
+        },
+        "prompt_eval_count": 320,
+        "eval_count": 64,
+        "done": True,
+    }
+    monkeypatch.setattr(
+        module.requests, "post", lambda *a, **kw: _FakeResp(fake_body)
+    )
+
+    dispatcher = module.NemotronDispatcher()
+    calls = _patch_execution_methods(monkeypatch, dispatcher)
+    # NIM 路徑必須完全沒被觸發（驗證 qwen3 確實是主路徑）
+    nim_called = {"v": False}
+
+    def _nim_should_not_be_called(*a, **kw):
+        nim_called["v"] = True
+        raise AssertionError("NIM 不應被呼叫，qwen3 已成功")
+
+    monkeypatch.setattr(dispatcher, "_call_nim", _nim_should_not_be_called)
+
+    threats = [FakeThreat()]
+    result = dispatcher.dispatch(threats, hermes_stats={"duration_sec": 1.0})
+
+    assert nim_called["v"] is False, "qwen3 成功時 NIM 不可被觸發"
+    assert result["dispatched"] == 1
+    assert result["nim_stats"].get("provider") == "gcp_ollama"
+    assert result["nim_stats"].get("model") == module.NEMOTRON_OLLAMA_MODEL
+    assert calls and calls[0]["kind"] == "price_alert"
+
+
+# ─────────────────────────────────────────────────────────────
+# T2. qwen3 沒回 tool_calls 但 content 含 JSON list → fallback 解析
+# ─────────────────────────────────────────────────────────────
+def test_qwen3_content_only_fallback_parsing(monkeypatch):
+    """qwen3 把工具呼叫塞在 content（list[dict]）→ _parse_content_fallback 應接住"""
+    import services.nemoton_dispatcher_service as module
+
+    _enable_qwen3_path(monkeypatch, module)
+
+    content_payload = (
+        '[{"name": "flag_for_human_review", '
+        '"parameters": {"sku": "SKU-Q1", "name": "qwen3 測試品", '
+        '"concern": "信心不足", "confidence": 0.45}}]'
+    )
+    fake_body = {
+        "message": {"role": "assistant", "content": content_payload, "tool_calls": []},
+        "prompt_eval_count": 100,
+        "eval_count": 30,
+    }
+    monkeypatch.setattr(module.requests, "post", lambda *a, **kw: _FakeResp(fake_body))
+
+    dispatcher = module.NemotronDispatcher()
+    calls = _patch_execution_methods(monkeypatch, dispatcher)
+    monkeypatch.setattr(
+        dispatcher, "_call_nim",
+        lambda threats: (_ for _ in ()).throw(AssertionError("NIM 不應被呼叫")),
+    )
+
+    result = dispatcher.dispatch([FakeThreat(confidence=0.45)], hermes_stats={"duration_sec": 1.0})
+
+    assert result["dispatched"] == 1
+    assert calls and calls[0]["kind"] == "human_review"
+
+
+# ─────────────────────────────────────────────────────────────
+# T3. tool_calls + content 同時存在 → 優先 tool_calls
+# ─────────────────────────────────────────────────────────────
+def test_qwen3_tool_calls_takes_precedence_over_content(monkeypatch):
+    import services.nemoton_dispatcher_service as module
+
+    _enable_qwen3_path(monkeypatch, module)
+
+    fake_body = {
+        "message": {
+            "role": "assistant",
+            "content": '[{"name": "flag_for_human_review", "parameters": {"sku": "X"}}]',
+            "tool_calls": [
+                {
+                    "function": {
+                        "name": "trigger_price_alert",
+                        "arguments": {
+                            "sku": "SKU-Q1",
+                            "name": "qwen3 測試品",
+                            "gap_pct": 22.4,
+                            "sales_delta": -35.0,
+                            "action": "降價",
+                            "confidence": 0.85,
+                        },
+                    }
+                }
+            ],
+        },
+        "prompt_eval_count": 200,
+        "eval_count": 40,
+    }
+    monkeypatch.setattr(module.requests, "post", lambda *a, **kw: _FakeResp(fake_body))
+
+    dispatcher = module.NemotronDispatcher()
+    calls = _patch_execution_methods(monkeypatch, dispatcher)
+    result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})
+
+    assert result["dispatched"] == 1
+    assert calls[0]["kind"] == "price_alert", "tool_calls 結構必須優先於 content fallback"
+
+
+# ─────────────────────────────────────────────────────────────
+# T4. qwen3 連線失敗 → 不爆，自動 fallback 到 NIM
+# ─────────────────────────────────────────────────────────────
+def test_qwen3_connection_error_falls_back_to_nim(monkeypatch):
+    """GCP Ollama 連不上時，dispatcher 應靜默改走 NIM，最終仍能 dispatch"""
+    import requests
+    import services.nemoton_dispatcher_service as module
+
+    _enable_qwen3_path(monkeypatch, module)
+
+    def _boom(*a, **kw):
+        raise requests.ConnectionError("GCP unreachable")
+
+    monkeypatch.setattr(module.requests, "post", _boom)
+
+    # NIM 路徑：給 valid key + quota，且 mock _call_nim 回傳 1 個 tool_call
+    monkeypatch.setattr(module, "NIM_API_KEY", "fake-key")
+    monkeypatch.setattr(module, "_check_nim_quota", lambda: True)
+
+    dispatcher = module.NemotronDispatcher()
+    calls = _patch_execution_methods(monkeypatch, dispatcher)
+    nim_invoked = {"v": False}
+
+    def _fake_nim(threats):
+        nim_invoked["v"] = True
+        return (
+            [{
+                "tool": "trigger_price_alert",
+                "args": {
+                    "sku": "SKU-Q1", "name": "qwen3 測試品",
+                    "gap_pct": 22.4, "sales_delta": -35.0,
+                    "action": "降價", "confidence": 0.85,
+                },
+            }],
+            {"total_tokens": 256, "quota_used": 5},
+        )
+
+    monkeypatch.setattr(dispatcher, "_call_nim", _fake_nim)
+
+    result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})
+
+    assert nim_invoked["v"] is True, "qwen3 失敗後必須 fallback 至 NIM"
+    assert result["dispatched"] == 1
+    assert result["nim_stats"].get("total_tokens") == 256
+    assert calls[0]["kind"] == "price_alert"
+
+
+# ─────────────────────────────────────────────────────────────
+# T5. qwen3 + NIM 全失敗 → ADR-004 Hermes 規則引擎兜底
+# ─────────────────────────────────────────────────────────────
+def test_qwen3_and_nim_both_fail_falls_back_to_hermes_rules(monkeypatch):
+    """雙路全爆時必須走 Hermes 規則引擎，並保留 🟡 [規則引擎] 標記"""
+    import requests
+    import services.nemoton_dispatcher_service as module
+
+    _enable_qwen3_path(monkeypatch, module)
+    monkeypatch.setattr(module.requests, "post",
+                        lambda *a, **kw: (_ for _ in ()).throw(requests.ConnectionError("qwen3 down")))
+
+    monkeypatch.setattr(module, "NIM_API_KEY", "fake-key")
+    monkeypatch.setattr(module, "_check_nim_quota", lambda: True)
+
+    dispatcher = module.NemotronDispatcher()
+
+    # 攔 _call_nim 也擲 timeout
+    monkeypatch.setattr(
+        dispatcher, "_call_nim",
+        lambda threats: (_ for _ in ()).throw(requests.Timeout("NIM timeout")),
+    )
+
+    # 攔住規則引擎內部呼叫的 _exec_*，記錄 concern / reason 文字驗證 🟡 標記
+    # 規則引擎部分 _exec_* 用 positional args（line 787-795 _exec_trigger_price_alert
+    # 簽名: sku, name, gap_pct, sales_delta, action, confidence, ...），
+    # record helper 必須把 positional 與 keyword 合併才能 .get('action')。
+    captured = []
+
+    def _merge_positional(name_order, args, kwargs):
+        merged = dict(kwargs)
+        for i, val in enumerate(args):
+            if i < len(name_order):
+                merged.setdefault(name_order[i], val)
+        return merged
+
+    def record_review(*args, **kwargs):
+        merged = _merge_positional(
+            ['sku', 'name', 'concern', 'confidence', 'footprint',
+             'momo_price', 'comp_price', 'gap_pct', 'sales_delta',
+             'revenue_loss_7d', 'recommended_price'],
+            args, kwargs)
+        captured.append(("human_review", merged))
+
+    def record_alert(*args, **kwargs):
+        merged = _merge_positional(
+            ['sku', 'name', 'gap_pct', 'sales_delta', 'action', 'confidence',
+             'momo_price', 'comp_price', 'footprint',
+             'revenue_loss_7d', 'recommended_price'],
+            args, kwargs)
+        captured.append(("price_alert", merged))
+
+    def record_reco(*args, **kwargs):
+        captured.append(("recommendation", kwargs))
+
+    monkeypatch.setattr(dispatcher, "_exec_flag_for_human_review", record_review)
+    monkeypatch.setattr(dispatcher, "_exec_trigger_price_alert", record_alert)
+    monkeypatch.setattr(dispatcher, "_exec_add_to_recommendation", record_reco)
+
+    # gap_pct=22.4 + risk=HIGH → 規則 2：trigger_price_alert，action 應有 🟡 [規則引擎] 前綴
+    result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})
+
+    assert result["nim_stats"].get("degraded") is True, "ADR-004 降級旗標必須存在"
+    assert captured, "規則引擎必須兜底執行至少一次"
+    kind, kwargs = captured[0]
+    assert kind == "price_alert"
+    assert "🟡 [規則引擎]" in kwargs.get("action", ""), \
+        "ADR-004 鐵律：Hermes 規則引擎兜底時必須帶『🟡 [規則引擎]』標記"
+    # footprint 也應帶 🟡 [降級模式 ADR-004] 標記（給 Telegram 告警頭顯示）
+    assert "🟡 [降級模式 ADR-004]" in kwargs.get("footprint", "")
+
+
+# ─────────────────────────────────────────────────────────────
+# T6. feature flag 預設 false → 戰前行為，qwen3 完全不被呼叫
+# ─────────────────────────────────────────────────────────────
+def test_flag_default_false_preserves_pre_war_behavior(monkeypatch):
+    """NEMOTRON_OLLAMA_FIRST 預設 false 時：dispatch 不應觸碰 GCP Ollama，
+    nim_stats 不可帶 provider='gcp_ollama'。"""
+    import services.nemoton_dispatcher_service as module
+
+    # 不打開 flag（預設值），但安全起見明確 set false
+    monkeypatch.setattr(module, "NEMOTRON_OLLAMA_FIRST", False)
+
+    # 任何呼叫 requests.post 都視為錯誤（戰前 NIM 路徑會被 _call_nim mock 接走）
+    qwen3_post_called = {"v": False}
+
+    def _maybe_post(*a, **kw):
+        qwen3_post_called["v"] = True
+        raise AssertionError("flag=false 時不可呼叫 GCP Ollama HTTP")
+
+    monkeypatch.setattr(module.requests, "post", _maybe_post)
+
+    monkeypatch.setattr(module, "NIM_API_KEY", "fake-key")
+    monkeypatch.setattr(module, "_check_nim_quota", lambda: True)
+
+    dispatcher = module.NemotronDispatcher()
+    calls = _patch_execution_methods(monkeypatch, dispatcher)
+    monkeypatch.setattr(
+        dispatcher, "_call_nim",
+        lambda threats: (
+            [{
+                "tool": "trigger_price_alert",
+                "args": {
+                    "sku": "SKU-Q1", "name": "qwen3 測試品",
+                    "gap_pct": 22.4, "sales_delta": -35.0,
+                    "action": "降價", "confidence": 0.85,
+                },
+            }],
+            {"total_tokens": 100, "quota_used": 1},
+        ),
+    )
+
+    result = dispatcher.dispatch([FakeThreat()], hermes_stats={"duration_sec": 1.0})
+
+    assert qwen3_post_called["v"] is False
+    assert result["dispatched"] == 1
+    assert result["nim_stats"].get("provider") in (None, "nim"), \
+        "flag=false 時 nim_stats 不應帶 provider='gcp_ollama'"
+
+
+# ─────────────────────────────────────────────────────────────
+# T7. 共用 helper 純單元測試（OpenAI tool_calls schema 邊界）
+# ─────────────────────────────────────────────────────────────
+def test_parse_tool_calls_struct_handles_string_arguments():
+    """NIM 回 arguments 是 JSON 字串、qwen3 回 dict — 兩者都得接住"""
+    from services.nemoton_dispatcher_service import _parse_tool_calls_struct
+
+    # NIM 風格（arguments 是 JSON 字串）
+    nim_style = [{"function": {"name": "foo", "arguments": '{"a": 1, "b": "x"}'}}]
+    out_nim = _parse_tool_calls_struct(nim_style)
+    assert out_nim == [{"tool": "foo", "args": {"a": 1, "b": "x"}}]
+
+    # qwen3/Ollama 風格（arguments 已是 dict）
+    qwen_style = [{"function": {"name": "bar", "arguments": {"a": 2}}}]
+    out_qwen = _parse_tool_calls_struct(qwen_style)
+    assert out_qwen == [{"tool": "bar", "args": {"a": 2}}]
+
+    # 邊界：空 / 壞 JSON / 缺 name → 不爆，回空或忽略
+    assert _parse_tool_calls_struct([]) == []
+    assert _parse_tool_calls_struct(None) == []
+    bad = [{"function": {"name": "baz", "arguments": "{not json"}}]
+    out_bad = _parse_tool_calls_struct(bad)
+    assert out_bad == [{"tool": "baz", "args": {}}]
+    no_name = [{"function": {"arguments": "{}"}}]
+    assert _parse_tool_calls_struct(no_name) == []
+
+
+def test_parse_content_fallback_handles_various_shapes():
+    from services.nemoton_dispatcher_service import _parse_content_fallback
+
+    # OpenAI 老風格 [{"name", "parameters"}]
+    out1 = _parse_content_fallback('[{"name": "foo", "parameters": {"a": 1}}]')
+    assert out1 == [{"tool": "foo", "args": {"a": 1}}]
+
+    # 帶 function 嵌套
+    out2 = _parse_content_fallback('[{"function": {"name": "bar"}, "arguments": "{\\"b\\": 2}"}]')
+    assert out2 == [{"tool": "bar", "args": {"b": 2}}]
+
+    # 非 list / 非 JSON / 空字串 → []
+    assert _parse_content_fallback("") == []
+    assert _parse_content_fallback("not json") == []
+    assert _parse_content_fallback('{"a":1}') == []
--- a/tests/test_openclaw_daily_template.py
+++ b/tests/test_openclaw_daily_template.py
@@ -0,0 +1,212 @@
+"""
+tests/test_openclaw_daily_template.py
+─────────────────────────────────────────────────────────────────
+Operation Ollama-First v5.0 / Phase 3 / A8 — 日報模板路由測試
+
+驗證面：
+  T1. flag=false（預設）→ 走 _legacy_full_gemini_daily_report（regression）
+  T2. flag=true → 走 _generate_daily_report_hermes_template
+  T3. _compute_daily_kpi 各 KPI 函數可獨立 mock 測（DB 失敗回安全預設）
+  T4. _render_daily_template_v2 缺欄位優雅降級（_SafeUndefined 不 raise）
+  T5. _SafeUndefined 對 'X.Y.Z' 巢狀存取不爆
+
+紀律：
+  - 不打真實 DB / Gemini API
+  - 不寫 ai_insights
+  - 不發 Telegram
+"""
+
+import os
+from datetime import date, datetime
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# Fixtures
+# ═══════════════════════════════════════════════════════════════════════════
+
+@pytest.fixture(autouse=True)
+def _reset_flag(monkeypatch):
+    """每個 test 前清環境變數，避免互相污染"""
+    monkeypatch.delenv('OPENCLAW_DAILY_HERMES_TEMPLATE', raising=False)
+    yield
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# T1+T2 — Routing
+# ═══════════════════════════════════════════════════════════════════════════
+
+class TestRouting:
+
+    def test_flag_false_routes_to_legacy(self, monkeypatch):
+        """flag=false → _legacy_full_gemini_daily_report 被呼叫"""
+        monkeypatch.setenv('OPENCLAW_DAILY_HERMES_TEMPLATE', 'false')
+        import importlib
+        import services.openclaw_strategist_service as svc
+        importlib.reload(svc)
+
+        legacy_called = {'v': False}
+        hermes_called = {'v': False}
+
+        def mock_legacy():
+            legacy_called['v'] = True
+            return {'status': 'ok', 'mode': 'legacy'}
+
+        def mock_hermes():
+            hermes_called['v'] = True
+            return {'status': 'ok', 'mode': 'hermes_template'}
+
+        monkeypatch.setattr(svc, '_legacy_full_gemini_daily_report', mock_legacy)
+        monkeypatch.setattr(svc, '_generate_daily_report_hermes_template', mock_hermes)
+
+        svc.generate_daily_report()
+
+        assert legacy_called['v'] is True, "flag=false 必須走 legacy 路徑"
+        assert hermes_called['v'] is False, "flag=false 不可走 hermes 模板"
+
+    def test_flag_true_routes_to_hermes_template(self, monkeypatch):
+        """flag=true → _generate_daily_report_hermes_template 被呼叫"""
+        monkeypatch.setenv('OPENCLAW_DAILY_HERMES_TEMPLATE', 'true')
+        import importlib
+        import services.openclaw_strategist_service as svc
+        importlib.reload(svc)
+
+        legacy_called = {'v': False}
+        hermes_called = {'v': False}
+
+        monkeypatch.setattr(svc, '_legacy_full_gemini_daily_report',
+                            lambda: legacy_called.update(v=True) or {'status': 'ok'})
+        monkeypatch.setattr(svc, '_generate_daily_report_hermes_template',
+                            lambda: hermes_called.update(v=True) or {'status': 'ok'})
+
+        svc.generate_daily_report()
+
+        assert hermes_called['v'] is True, "flag=true 必須走 hermes 模板路徑"
+        assert legacy_called['v'] is False, "flag=true 不可走 legacy"
+
+    def test_flag_default_is_false(self, monkeypatch):
+        """無 env 設定時 → 預設 false（戰前行為）"""
+        # 不 set env
+        import importlib
+        import services.openclaw_strategist_service as svc
+        importlib.reload(svc)
+
+        assert svc._daily_hermes_template_enabled() is False
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# T3 — KPI 計算（DB 失敗安全降級）
+# ═══════════════════════════════════════════════════════════════════════════
+
+class TestKPIComputation:
+
+    def test_compute_daily_kpi_invalid_date_raises(self):
+        import services.openclaw_strategist_service as svc
+        with pytest.raises(TypeError):
+            svc._compute_daily_kpi("not-a-date")
+
+    def test_revenue_kpi_returns_safe_default_on_db_error(self, monkeypatch):
+        """DB 異常時 _query_revenue_kpi 回零（不拋 exception）"""
+        import services.openclaw_strategist_service as svc
+
+        class _BrokenSession:
+            def execute(self, *a, **kw):
+                raise RuntimeError('DB connection lost')
+            def close(self):
+                pass
+
+        monkeypatch.setattr(svc, 'get_session', lambda: _BrokenSession())
+
+        result = svc._query_revenue_kpi(date(2026, 5, 3))
+
+        assert result['today'] == 0.0
+        assert result['dod_pct'] == 0.0
+        assert result['wow_pct'] == 0.0
+
+
+# ═══════════════════════════════════════════════════════════════════════════
+# T4+T5 — Template 渲染與缺欄位優雅降級
+# ═══════════════════════════════════════════════════════════════════════════
+
+class TestTemplateRendering:
+
+    def test_render_with_full_context_succeeds(self):
+        import services.openclaw_strategist_service as svc
+
+        context = {
+            'today': '2026年05月02日',
+            'weekday': '週五',
+            'revenue': {
+                'today': 1234567.0,
+                'yesterday': 1100000.0,
+                'avg_7d': 1050000.0,
+                'dod_pct': 12.2,
+                'wow_pct': 17.6,
+            },
+            'orders': {
+                'today_rows': 234,
+                'today_sku': 187,
+                'avg_value_today': 5276.0,
+            },
+            'top_skus': [
+                {'name': 'SKU-A', 'qty': 50, 'revenue': 100000},
+                {'name': 'SKU-B', 'qty': 32, 'revenue': 80000},
+            ],
+            'price_gaps': [
+                {'sku_name': '商品X', 'momo_price': 1200, 'comp_price': 980,
+                 'gap_pct': 22.4, 'competitor': 'PChome'},
+            ],
+            'inventory_alerts': [],
+            'priority_actions': ['對 SKU-A 啟動 EA 流程', '觀察 PChome 補貼'],
+            'gemini_insight': '今日營收強勁成長，建議加碼家電促銷檔期。',
+        }
+
+        rendered = svc._render_daily_template_v2(context)
+
+        assert '2026年05月02日' in rendered
+        assert '週五' in rendered
+        assert 'NT$1,234,567' in rendered
+        assert 'SKU-A' in rendered
+        assert '商品X' in rendered
+        assert 'PChome' in rendered
+        assert '今日營收強勁成長' in rendered
+
+    def test_render_with_missing_fields_does_not_raise(self):
+        """_SafeUndefined 對缺欄位回 — 不拋 UndefinedError"""
+        import services.openclaw_strategist_service as svc
+
+        context = {
+            'today': '2026年05月02日',
+            'weekday': '週五',
+            'revenue': {'today': 0.0, 'dod_pct': 0.0, 'wow_pct': 0.0},
+            'orders': {},  # 空 dict
+            'top_skus': [],
+            'price_gaps': [],
+            'inventory_alerts': [],
+            'priority_actions': [],
+            'gemini_insight': '',
+        }
+
+        # 不 raise 即過
+        rendered = svc._render_daily_template_v2(context)
+
+        assert isinstance(rendered, str)
+        assert len(rendered) > 0
+        # 缺欄位該降級為 — 或預設值
+        assert '今日無熱銷資料' in rendered or '✅' in rendered
+
+    def test_safe_undefined_nested_access(self):
+        """_SafeUndefined 對 'X.Y.Z' 巢狀存取不爆"""
+        import services.openclaw_strategist_service as svc
+
+        # 完全無 'revenue' 也不該 raise
+        context = {
+            'today': '2026年05月02日',
+            'weekday': '週五',
+            # 故意省略 revenue / orders / top_skus 等
+        }
+        rendered = svc._render_daily_template_v2(context)
+        assert isinstance(rendered, str)
+        assert '2026年05月02日' in rendered
--- a/tests/test_openclaw_qa_golden_set.py
+++ b/tests/test_openclaw_qa_golden_set.py
@@ -0,0 +1,286 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+tests/test_openclaw_qa_golden_set.py
+OpenClaw Q&A 黃金集 A/B 對照框架
+(Operation Ollama-First v5.0 — Phase 3, A7 fullstack-engineer)
+
+目的:
+  在統帥盲測前，先建立 Ollama qwen3:14b vs Gemini 2.5 Flash 的「量化基線」。
+  10 題典型 momo 商業 Q&A，雙模型各跑一次，比對：
+    - 簡體字污染數量（A2 黃燈警訊核心）
+    - 回應長度
+    - 結構性指標（行數、列點數）
+    - 拒答訊號
+    - 黃金關鍵字命中率（題目自帶 expect_keywords）
+
+執行:
+  RUN_GOLDEN_SET=1 pytest tests/test_openclaw_qa_golden_set.py -v -s
+  # GCP 還沒拉 qwen3:14b 之前，預設 SKIP（避免 CI 紅燈）
+
+紀律:
+  - PII 紀律：題目/答案無真實 chat_id / username / 身份證 / 手機，全部去識別化
+  - 不對「正確性」做 hard assert；本框架專做「品質量化基線」收集
+  - 報告印到 stdout（pytest -s 顯示），人工檢視，不卡 CI
+"""
+
+import json
+import os
+import sys
+import time
+from typing import Dict, List, Optional
+
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 啟用條件：須三條件齊備才實跑
+#   1. RUN_GOLDEN_SET=1
+#   2. OPENCLAW_QA_OLLAMA_HOST 可達
+#   3. GEMINI_API_KEY 已設
+# 否則 SKIP。
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _ollama_reachable(host: str, timeout: float = 2.0) -> bool:
+    try:
+        import requests
+        r = requests.get(f"{host.rstrip('/')}/api/version", timeout=timeout)
+        return r.status_code == 200
+    except Exception:
+        return False
+
+
+def _ollama_has_model(host: str, model: str, timeout: float = 3.0) -> bool:
+    """檢查 Ollama 主機是否已 pull 指定模型。"""
+    try:
+        import requests
+        r = requests.get(f"{host.rstrip('/')}/api/tags", timeout=timeout)
+        if r.status_code != 200:
+            return False
+        tags = r.json().get('models', [])
+        return any(m.get('name', '').startswith(model.split(':')[0]) for m in tags)
+    except Exception:
+        return False
+
+
+_RUN_GOLDEN = os.getenv('RUN_GOLDEN_SET', '0') == '1'
+_HOST = os.getenv('OPENCLAW_QA_OLLAMA_HOST', os.getenv('OLLAMA_HOST_PRIMARY', 'http://34.143.170.20:11434'))
+_MODEL = os.getenv('OPENCLAW_QA_OLLAMA_MODEL', 'qwen3:14b')
+_HAS_GEMINI = bool(os.getenv('GEMINI_API_KEY'))
+
+pytestmark = pytest.mark.skipif(
+    not _RUN_GOLDEN,
+    reason="黃金集需要 RUN_GOLDEN_SET=1 + GCP qwen3:14b ready + GEMINI_API_KEY；統帥盲測前才跑",
+)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 黃金集（10 題；全部去 PII；情境取自 momo-pro 真實 Telegram 互動模式）
+# ─────────────────────────────────────────────────────────────────────────────
+
+GOLDEN_SET: List[Dict] = [
+    {
+        "id": "g01_weekly_trend",
+        "question": "本週 momo 業績趨勢如何？跟上週比？",
+        "expect_keywords": ["業績", "週", "成長"],
+        "category": "業績趨勢",
+    },
+    {
+        "id": "g02_competitor_threat",
+        "question": "PChome 最近在 3C 類有發動補貼戰嗎？對我們影響？",
+        "expect_keywords": ["PChome", "3C"],
+        "category": "競品威脅",
+    },
+    {
+        "id": "g03_pricing_strategy",
+        "question": "我有一支 SKU 比競品貴 8%，銷量持續下滑，該怎麼辦？",
+        "expect_keywords": ["定價", "競品"],
+        "category": "定價策略",
+    },
+    {
+        "id": "g04_seasonal",
+        "question": "母親節檔期快到了，建議哪些品類加碼？",
+        "expect_keywords": ["母親節", "品類"],
+        "category": "季節機會",
+    },
+    {
+        "id": "g05_command_routing",
+        "question": "我想看完整週報怎麼下指令？",
+        "expect_keywords": ["weekly", "週報"],
+        "category": "指令導引",
+    },
+    {
+        "id": "g06_top_threats",
+        "question": "目前 TOP 5 最緊急的競價威脅是哪些？",
+        "expect_keywords": ["威脅", "TOP"],
+        "category": "威脅清單",
+    },
+    {
+        "id": "g07_inventory_signal",
+        "question": "如何判斷某 SKU 該促銷出清？",
+        "expect_keywords": ["促銷", "出清"],
+        "category": "庫存決策",
+    },
+    {
+        "id": "g08_cross_category",
+        "question": "家電 vs 生活雜貨，哪個品類本月成長動能比較強？",
+        "expect_keywords": ["家電", "成長"],
+        "category": "品類比較",
+    },
+    {
+        "id": "g09_data_unavailable",
+        "question": "幫我看 2030 年的銷售預測。",
+        "expect_keywords": ["資料", "無法"],  # 期待模型誠實回應「資料不足」而非編造
+        "category": "資料邊界",
+    },
+    {
+        "id": "g10_action_item",
+        "question": "綜合本週數據，給我 3 個 48 小時內必做行動。",
+        "expect_keywords": ["行動", "建議"],
+        "category": "行動清單",
+    },
+]
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Scoring helpers
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _count_simplified(text: str) -> int:
+    """重用 strategist service 的簡體字 hint 集合計數。"""
+    from services.openclaw_strategist_service import _SIMPLIFIED_HINT_CHARS
+    return sum(1 for c in (text or '') if c in _SIMPLIFIED_HINT_CHARS)
+
+
+def _count_keyword_hits(text: str, keywords: List[str]) -> int:
+    if not text:
+        return 0
+    return sum(1 for kw in keywords if kw in text)
+
+
+def _is_refusal(text: str) -> bool:
+    from services.openclaw_strategist_service import _REFUSAL_PATTERNS
+    return any(p in (text or '') for p in _REFUSAL_PATTERNS)
+
+
+def _structure_score(text: str) -> Dict[str, int]:
+    """結構性量化指標。"""
+    if not text:
+        return {"lines": 0, "bullets": 0, "tables": 0}
+    return {
+        "lines": text.count('\n') + 1,
+        # 條列符號粗略偵測（含中文「、」「，」開頭的列點）
+        "bullets": sum(text.count(s) for s in ('- ', '• ', '* ', '1.', '2.', '3.')),
+        "tables": text.count('|'),
+    }
+
+
+def _score_response(qid: str, question: str, response: str, expect_kw: List[str]) -> Dict:
+    structure = _structure_score(response)
+    return {
+        "qid": qid,
+        "length": len(response or ''),
+        "simplified_count": _count_simplified(response),
+        "keyword_hits": _count_keyword_hits(response, expect_kw),
+        "is_refusal": _is_refusal(response),
+        "lines": structure["lines"],
+        "bullets": structure["bullets"],
+        "tables": structure["tables"],
+        "preview": (response or '')[:120].replace('\n', ' / '),
+    }
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Caller wrappers (使用 service 的真實函式)
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _call_ollama(question: str) -> Optional[str]:
+    from services.openclaw_strategist_service import _call_qwen3_qa
+    return _call_qwen3_qa(question, None, f"golden-{int(time.time())}")
+
+
+def _call_gemini_baseline(question: str) -> Optional[str]:
+    from services.openclaw_strategist_service import _call_gemini
+    system_prompt = (
+        "你是 MOMO Pro 電商情報策略師「OpenClaw」。以繁體中文（台灣用語）回覆使用者。"
+        "嚴禁簡體字。回覆長度控制在 500 字內，可用 Markdown 條列。"
+    )
+    return _call_gemini(system_prompt, question, temperature=0.5, caller="openclaw_qa_golden")
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Tests
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_environment_ready():
+    """sanity check：跑黃金集前確認 GCP host + model + Gemini key 都 ready。"""
+    assert _ollama_reachable(_HOST), f"Ollama 主機不可達：{_HOST}"
+    assert _ollama_has_model(_HOST, _MODEL), (
+        f"GCP Ollama 尚未拉 {_MODEL}（請於 Phase 8 由 A1 完成 ollama pull）"
+    )
+    assert _HAS_GEMINI, "GEMINI_API_KEY 未設"
+
+
+def test_golden_set_ab_comparison(capsys):
+    """跑 10 題雙模型 A/B 對照，量化指標印到 stdout。
+
+    本測試不對「正確性」做 hard assert；目的是給統帥盲測前的「品質量化基線」。
+    僅 hard assert：
+      - 雙模型至少都有回應（非全 None）
+      - Gemini baseline 簡體字數量 == 0（baseline 不該污染）
+    """
+    # 啟用 flag 讓 _call_qwen3_qa 走真實邏輯
+    os.environ['OPENCLAW_QA_OLLAMA_FIRST'] = 'true'
+
+    rows = []
+    for item in GOLDEN_SET:
+        qid = item['id']
+        question = item['question']
+        kws = item['expect_keywords']
+
+        ollama_resp = _call_ollama(question)
+        gemini_resp = _call_gemini_baseline(question)
+
+        rows.append({
+            'qid': qid,
+            'category': item['category'],
+            'question': question,
+            'ollama': _score_response(qid, question, ollama_resp or '', kws),
+            'gemini': _score_response(qid, question, gemini_resp or '', kws),
+        })
+
+    # 列印量化基線（pytest -s 才看得到）
+    print("\n" + "=" * 100)
+    print("OpenClaw QA 黃金集 A/B 量化基線（Ollama qwen3:14b vs Gemini 2.5 Flash）")
+    print("=" * 100)
+    for r in rows:
+        print(f"\n[{r['qid']}] ({r['category']}) {r['question']}")
+        for side in ('ollama', 'gemini'):
+            s = r[side]
+            print(
+                f"  {side:>7}: len={s['length']:>4} simp={s['simplified_count']:>2} "
+                f"kw={s['keyword_hits']}/{len(GOLDEN_SET[0]['expect_keywords'])} "
+                f"lines={s['lines']:>2} refusal={s['is_refusal']}"
+            )
+            print(f"           preview: {s['preview']}")
+
+    # 匯出 JSON 給後續分析
+    out_path = os.path.join(os.path.dirname(__file__), 'logs', 'qa_golden_baseline.json')
+    os.makedirs(os.path.dirname(out_path), exist_ok=True)
+    with open(out_path, 'w', encoding='utf-8') as f:
+        json.dump(rows, f, ensure_ascii=False, indent=2)
+    print(f"\n基線已存：{out_path}")
+
+    # Hard assertions（最少安全網）
+    ollama_responded = sum(1 for r in rows if r['ollama']['length'] > 0)
+    gemini_responded = sum(1 for r in rows if r['gemini']['length'] > 0)
+    assert ollama_responded >= 8, f"Ollama 回應率過低：{ollama_responded}/10"
+    assert gemini_responded >= 9, f"Gemini 回應率過低：{gemini_responded}/10"
+
+    # Gemini baseline 不該有簡體污染（用以驗證測量本身正確）
+    for r in rows:
+        assert r['gemini']['simplified_count'] == 0, (
+            f"Gemini baseline 簡體污染（指標可能誤判）：{r['qid']} {r['gemini']['preview']}"
+        )
--- a/tests/test_openclaw_qa_routing.py
+++ b/tests/test_openclaw_qa_routing.py
@@ -0,0 +1,358 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+tests/test_openclaw_qa_routing.py
+OpenClaw Q&A 路由 + 品質守門 unit tests
+(Operation Ollama-First v5.0 — Phase 3, A7 fullstack-engineer)
+
+涵蓋:
+  - feature flag OPENCLAW_QA_OLLAMA_FIRST=false → 走 Gemini-first（regression test）
+  - flag=true + 高品質 Ollama 回應 → 直接回 Ollama 結果，不走 Gemini
+  - flag=true + 低品質 Ollama 回應 → 升級至 Gemini，並標 fallback_to=openclaw_qa_gemini_fallback
+  - flag=true + Ollama 呼叫失敗 → 升級至 Gemini
+  - _is_low_quality_response 各規則：空字串 / 長度過短 / 簡體污染 / 拒答 / 流水帳
+
+執行:
+  pytest tests/test_openclaw_qa_routing.py -v
+"""
+
+import os
+import sys
+import time
+from typing import Any, Dict, Optional
+
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import services.openclaw_strategist_service as svc
+import services.ai_call_logger as logger_mod
+from services.ai_call_logger import _reset_kill_switch
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Fixtures
+# ─────────────────────────────────────────────────────────────────────────────
+
+@pytest.fixture(autouse=True)
+def reset_state(monkeypatch):
+    """每個測試重置 logger kill-switch + stub DB 寫入收集 ai_calls 紀錄。"""
+    _reset_kill_switch()
+    captured = []
+
+    def fake_write(state):
+        captured.append({
+            'caller': state.caller,
+            'provider': state.provider,
+            'model': state.model,
+            'status': state.status,
+            'fallback_to': state.fallback_to,
+            'error': state.error,
+            'meta': dict(state.meta),
+            'request_id': state.request_id,
+        })
+
+    monkeypatch.setattr(logger_mod, '_write_to_db', fake_write)
+    monkeypatch.setenv('AI_CALL_LOGGING_ENABLED', 'true')
+    # 預設 flag=false（戰前行為）
+    monkeypatch.delenv('OPENCLAW_QA_OLLAMA_FIRST', raising=False)
+    yield captured
+
+
+def _wait_async(captured, n=1, timeout=2.0):
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        if len(captured) >= n:
+            return True
+        time.sleep(0.01)
+    return False
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 1. _is_low_quality_response 純函式規則
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestLowQualityRules:
+
+    def test_empty_string_is_low_quality(self):
+        assert svc._is_low_quality_response("") is True
+        assert svc._is_low_quality_response(None) is True
+        assert svc._is_low_quality_response("   \n  ") is True
+
+    def test_too_short_is_low_quality(self):
+        # 長度 < 50 字元 → 低品質
+        assert svc._is_low_quality_response("你好，我是 OpenClaw") is True
+
+    def test_acceptable_response_passes(self):
+        good = (
+            "本週 momo 業績較上週成長 12%，主要受惠於家電與生活雜貨。\n"
+            "建議：持續關注 PChome 競價動態，必要時調整定價策略。\n"
+            "預估下週 momo 仍有 5-8% 成長空間。"
+        )
+        assert svc._is_low_quality_response(good) is False
+
+    def test_simplified_pollution_detected(self):
+        # 句中含 >= 3 個簡體字 hint → 低品質（Qwen 繁中短板核心檢查）
+        polluted = (
+            "本周业绩比上周增长，您可以关注这个产品的价格变动趋势，"
+            "我们建议处理掉滞销库存以提高资产效率"
+        )
+        assert svc._is_low_quality_response(polluted) is True
+
+    def test_two_simplified_chars_still_acceptable(self):
+        # 只有 2 個簡體 hint 字（边界以下）+ 結構良好 → 仍可接受
+        # （避免過度敏感誤殺正常繁體回覆中混入零星簡體字的情境）
+        text = (
+            "本週 momo 业绩成長明顯，建議持續關注競品動向。\n"
+            "重點品類：家電、3C、生活雜貨。\n"
+            "下週可加碼促銷檔期。"
+        )
+        assert svc._is_low_quality_response(text) is False
+
+    def test_refusal_pattern_detected(self):
+        for refusal in ['無法回答', '我不知道', '抱歉，我無法協助']:
+            text = f"關於這個問題，{refusal}，請改問其他內容以便我協助您。"
+            assert svc._is_low_quality_response(text) is True, f"應被判定為拒答：{refusal}"
+
+    def test_flowing_text_no_breaks_is_low_quality(self):
+        # 200+ 字無換行 → 流水帳
+        text = "本週業績整體呈現上升趨勢。" * 20  # ~200+ 字
+        assert "\n" not in text
+        assert len(text) > 200
+        assert svc._is_low_quality_response(text) is True
+
+    def test_long_text_with_breaks_is_acceptable(self):
+        # 200+ 字但有適度斷行 → 結構良好
+        text = (
+            "本週業績整體呈現上升趨勢，主要驅動類別為家電與生活雜貨大類別。\n"
+            "競品動向：PChome 在 3C 類發動大規模補貼戰，預估壓縮我方 3 至 5 個百分點毛利率。\n"
+            "蝦皮也在母嬰用品加碼免運券促銷，需密切觀察跟降節奏，避免市占下滑。\n"
+            "建議行動：(1) 加碼家電促銷檔期，重點操作大尺寸電視與廚房家電，"
+            "(2) 觀察 PChome 補貼是否延續至下週，準備二段反擊方案，"
+            "(3) 對價差大於 5% 的 SKU 主動啟動 EA 流程，避免毛利持續流失。"
+        )
+        assert len(text) > 200
+        assert svc._is_low_quality_response(text) is False
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 2. Routing：feature flag = false 時維持 Gemini-first 路徑（regression）
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestFlagOff:
+
+    def test_flag_false_routes_to_legacy(self, monkeypatch, reset_state):
+        """flag=false（預設）→ 不應該呼叫 _call_qwen3_qa，直接走 _legacy_gemini_first_qa。"""
+        monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'false')
+        legacy_called = {'count': 0}
+        ollama_called = {'count': 0}
+
+        def fake_legacy(q, ctx, request_id=None):
+            legacy_called['count'] += 1
+            return "[legacy gemini reply]"
+
+        def fake_ollama(q, ctx, rid):
+            ollama_called['count'] += 1
+            return "[should not be called]"
+
+        monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
+        monkeypatch.setattr(svc, '_call_qwen3_qa', fake_ollama)
+
+        result = svc.generate_strategy_response("本週業績如何？")
+        assert result == "[legacy gemini reply]"
+        assert legacy_called['count'] == 1
+        assert ollama_called['count'] == 0
+
+    def test_flag_unset_defaults_to_off(self, monkeypatch, reset_state):
+        """環境變數完全未設 → 預設 false → 走 legacy。"""
+        monkeypatch.delenv('OPENCLAW_QA_OLLAMA_FIRST', raising=False)
+        legacy_called = {'count': 0}
+
+        def fake_legacy(q, ctx, request_id=None):
+            legacy_called['count'] += 1
+            return "[legacy reply]"
+
+        monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
+        # 不 stub _call_qwen3_qa；如果意外被呼叫會打到真網路 → fail
+        result = svc.generate_strategy_response("競品分析")
+        assert legacy_called['count'] == 1
+        assert result == "[legacy reply]"
+
+    def test_empty_query_short_circuits(self, monkeypatch, reset_state):
+        """空 query 不應觸發任何 LLM 呼叫。"""
+        monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
+        legacy_called = {'count': 0}
+        ollama_called = {'count': 0}
+        monkeypatch.setattr(svc, '_legacy_gemini_first_qa',
+                            lambda q, ctx, request_id=None: legacy_called.update({'count': legacy_called['count']+1}) or "")
+        monkeypatch.setattr(svc, '_call_qwen3_qa',
+                            lambda q, ctx, rid: ollama_called.update({'count': ollama_called['count']+1}) or "")
+
+        out = svc.generate_strategy_response("")
+        assert "請輸入您的問題" in out
+        assert legacy_called['count'] == 0
+        assert ollama_called['count'] == 0
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 3. Routing：feature flag = true + Ollama 高/低品質
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestFlagOn:
+
+    def test_flag_true_high_quality_returns_ollama(self, monkeypatch, reset_state):
+        """flag=true + Ollama 回高品質 → 直接回 Ollama，不走 Gemini。"""
+        monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
+        legacy_called = {'count': 0}
+        good_reply = (
+            "本週 momo 業績成長 12%，主要驅動類別為家電。\n"
+            "建議：持續關注 PChome 競價並加碼家電促銷檔期。\n"
+            "下週預估仍有 5-8% 成長空間。"
+        )
+        monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: good_reply)
+
+        def fake_legacy(q, ctx, request_id=None):
+            legacy_called['count'] += 1
+            return "[gemini fallback]"
+
+        monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
+
+        out = svc.generate_strategy_response("本週業績如何？")
+        assert out == good_reply
+        assert legacy_called['count'] == 0  # Gemini 沒被呼叫
+
+    def test_flag_true_low_quality_falls_back_to_gemini(self, monkeypatch, reset_state):
+        """flag=true + Ollama 回低品質（簡體污染）→ fallback Gemini。"""
+        monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
+        bad_reply = "本周业绩增长，您可以关注这个产品的价格变动，我们建议处理库存"
+        legacy_called = {'count': 0}
+
+        monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: bad_reply)
+
+        def fake_legacy(q, ctx, request_id=None):
+            legacy_called['count'] += 1
+            return "[gemini high quality reply]"
+
+        monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
+
+        out = svc.generate_strategy_response("本週業績如何？")
+        assert out == "[gemini high quality reply]"
+        assert legacy_called['count'] == 1
+
+    def test_flag_true_ollama_returns_none_falls_back(self, monkeypatch, reset_state):
+        """flag=true + Ollama 呼叫失敗（回 None）→ fallback Gemini。"""
+        monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
+        legacy_called = {'count': 0}
+        monkeypatch.setattr(svc, '_call_qwen3_qa', lambda q, ctx, rid: None)
+
+        def fake_legacy(q, ctx, request_id=None):
+            legacy_called['count'] += 1
+            return "[gemini reply after ollama down]"
+
+        monkeypatch.setattr(svc, '_legacy_gemini_first_qa', fake_legacy)
+
+        out = svc.generate_strategy_response("test")
+        assert out == "[gemini reply after ollama down]"
+        assert legacy_called['count'] == 1
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 4. _call_qwen3_qa: ai_call_logger 整合 + fallback_to 標記
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestCallQwen3Telemetry:
+
+    def test_qwen3_logs_ok_status_on_success(self, monkeypatch, reset_state):
+        """高品質回應 → ai_calls 應記 status=ok, caller=openclaw_qa, provider=gcp_ollama"""
+        captured = reset_state
+
+        class FakeResp:
+            status_code = 200
+            def raise_for_status(self): pass
+            def json(self):
+                return {
+                    'response': '本週 momo 業績成長 12%，建議加碼家電促銷。',
+                    'prompt_eval_count': 150,
+                    'eval_count': 60,
+                }
+
+        monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp())
+
+        result = svc._call_qwen3_qa("本週業績？", None, "qa-test123")
+        assert result is not None
+        assert "業績成長" in result
+
+        assert _wait_async(captured, 1)
+        assert len(captured) == 1
+        rec = captured[0]
+        assert rec['caller'] == 'openclaw_qa'
+        assert rec['provider'] == 'gcp_ollama'
+        assert rec['model'] == svc.OPENCLAW_QA_OLLAMA_MODEL
+        assert rec['status'] == 'ok'
+        assert rec['fallback_to'] is None
+        assert rec['meta'].get('flag') == 'OPENCLAW_QA_OLLAMA_FIRST'
+        assert rec['request_id'] == "qa-test123"
+
+    def test_qwen3_logs_fallback_on_exception(self, monkeypatch, reset_state):
+        """Ollama 連線失敗 → ai_calls 應記 fallback_to=openclaw_qa_gemini_fallback + status=fallback"""
+        captured = reset_state
+
+        def boom(*a, **kw):
+            raise svc.requests.ConnectionError("connection refused")
+
+        monkeypatch.setattr(svc.requests, 'post', boom)
+
+        result = svc._call_qwen3_qa("test", None, "qa-fail123")
+        assert result is None
+
+        assert _wait_async(captured, 1)
+        rec = captured[0]
+        assert rec['status'] == 'fallback'
+        assert rec['fallback_to'] == 'openclaw_qa_gemini_fallback'
+        assert rec['error'] is not None
+        assert 'ConnectionError' in rec['error']
+
+    def test_qwen3_logs_fallback_on_empty_response(self, monkeypatch, reset_state):
+        """Ollama 回空 response → 視為 empty_response，標 fallback。"""
+        captured = reset_state
+
+        class FakeResp:
+            status_code = 200
+            def raise_for_status(self): pass
+            def json(self):
+                return {'response': '', 'prompt_eval_count': 100, 'eval_count': 0}
+
+        monkeypatch.setattr(svc.requests, 'post', lambda *a, **kw: FakeResp())
+
+        result = svc._call_qwen3_qa("test", None, "qa-empty")
+        assert result is None
+
+        assert _wait_async(captured, 1)
+        rec = captured[0]
+        assert rec['status'] == 'fallback'
+        assert rec['fallback_to'] == 'openclaw_qa_gemini_fallback'
+        assert rec['error'] == 'empty_response'
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# 5. 環境變數讀取即時性（runtime toggle）
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestRuntimeToggle:
+
+    def test_flag_helper_reads_env_each_call(self, monkeypatch):
+        """_qa_ollama_first_enabled() 應每次重讀 env，允許 runtime 灰度切換。"""
+        monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'false')
+        assert svc._qa_ollama_first_enabled() is False
+
+        monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', 'true')
+        assert svc._qa_ollama_first_enabled() is True
+
+        # 各種真值字串
+        for v in ('TRUE', 'True', '1', 'yes', 'on'):
+            monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', v)
+            assert svc._qa_ollama_first_enabled() is True, f"應視為 true: {v!r}"
+
+        for v in ('false', '0', 'no', 'off', '', 'foo'):
+            monkeypatch.setenv('OPENCLAW_QA_OLLAMA_FIRST', v)
+            assert svc._qa_ollama_first_enabled() is False, f"應視為 false: {v!r}"