Some checks failed
CD Pipeline / deploy (push) Failing after 59s
- 建立 Gitea Actions CD pipeline (.gitea/workflows/cd.yaml) - 部署模式: rsync Python 檔案至 188 → docker restart (volume mount) - Dockerfile/requirements 變動時自動重建 Docker image - 部署通知: Telegram (開始/成功/失敗) - 健康檢查: https://mo.wooo.work/health (最多 5 次重試) - 同步最新 CLAUDE.md / ADR-008 / memory (2026-04-19) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
412 lines
18 KiB
Python
412 lines
18 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Hermes 3 競價情報分析服務 (Module 2)
|
||
|
||
角色:分析師 (Analyst)
|
||
模型:hermes3:latest @ 192.168.0.111:11434
|
||
輸入:SQL 漏斗篩選後的候選商品(~300筆)
|
||
輸出:Top N 威脅清單(結構化 JSON)→ 交給 NemoTron dispatcher
|
||
|
||
架構位置:
|
||
SQL漏斗 → [本服務] → NemotronDispatcher → Telegram 告警
|
||
"""
|
||
|
||
import json
|
||
import logging
|
||
import re
|
||
import time
|
||
from dataclasses import dataclass
|
||
from typing import Optional
|
||
|
||
import requests
|
||
from sqlalchemy import text
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
HERMES_MODEL = "hermes3:latest"
|
||
HERMES_URL = "http://192.168.0.111:11434"
|
||
HERMES_TIMEOUT = 120 # 秒,批量 300 筆最長預估 ~90s
|
||
TOP_N = 20 # 輸出前 N 個威脅,控制 NemoTron 每次消耗配額
|
||
|
||
|
||
@dataclass
|
||
class PriceThreat:
|
||
sku: str
|
||
name: str
|
||
category: str
|
||
momo_price: float
|
||
pchome_price: float
|
||
gap_pct: float # 正值代表我貴
|
||
sales_7d_delta_pct: float
|
||
risk: str # HIGH / MED / LOW
|
||
recommended_action: str
|
||
confidence: float
|
||
|
||
|
||
@dataclass
|
||
class AnalysisResult:
|
||
success: bool
|
||
threats: list
|
||
total_candidates: int
|
||
analysis_duration_sec: float
|
||
hermes_tokens: int = 0 # Ollama eval_count(供 footprint 顯示)
|
||
error: Optional[str] = None
|
||
|
||
|
||
class HermesAnalystService:
|
||
"""
|
||
競價情報分析師
|
||
負責從 DB 撈候選商品、交給 Hermes 3 分析、回傳結構化威脅清單
|
||
"""
|
||
|
||
SYSTEM_PROMPT = """你是一位台灣電商競價情報分析師。
|
||
規則:
|
||
1. 輸出「只能」是有效的 JSON 陣列,禁止任何前言或解釋文字
|
||
2. recommended_action 必須使用「台灣標準正體中文(繁體)」。
|
||
【語言鐵律 — 2026-04-18 台北強化】
|
||
a. 嚴禁簡體字(例:不可用「参给当为来国发会说时间过从实现这话动问题」,
|
||
必須用「參給當為來國發會說時間過從實現這話動問題」)
|
||
b. 嚴禁異體字(例:不可用「亊」,必須用「事」)
|
||
c. 嚴禁短語重複(例:不可輸出「當前事當前事」這種坍塌)
|
||
d. 嚴禁無意義字元組合或亂碼
|
||
e. 若無法產出合理的繁體中文說明,直接輸出「建議人工評估」
|
||
3. 風險等級判定:
|
||
- HIGH:價差 > 15% 且 7天銷量跌幅 > 20%
|
||
- MED:價差 > 10% 或 7天銷量跌幅 > 15%
|
||
- LOW:其他
|
||
4. confidence 根據數據確定性給分(0.0~1.0)
|
||
5. 【防幻覺鐵律】絕對禁止捏造輸入資料中未提供的數據(如折扣%、促銷活動、隱藏優惠)。
|
||
只能基於 gap_pct、sales_delta、competitor_tags 等已提供欄位做推論。
|
||
6. 【非價格異常路由】若 gap_pct 絕對值 < 5% 但 sales_delta < -30%:
|
||
- 判定為「非價格因素異常」(高機率:缺貨、下架、平台流量異常、頁面問題)
|
||
- risk 設為 MED,recommended_action 必須寫「價差接近零但業績異常下滑,建議立即人工走查前台頁面(確認是否缺貨/下架/頁面異常)」
|
||
- confidence 設為 0.5(因缺乏確切原因)"""
|
||
|
||
def __init__(self, engine=None):
|
||
self.engine = engine # SQLAlchemy engine,可外部注入
|
||
|
||
# ──────────────────────────────────────────────
|
||
# Step 1:SQL 漏斗 — 從 226萬筆壓縮到 ~300 筆候選
|
||
# ──────────────────────────────────────────────
|
||
def _validate_snapshot_columns(self) -> tuple:
|
||
"""
|
||
校驗 daily_sales_snapshot 欄位,回傳 (商品名稱欄位, 銷售金額欄位)
|
||
如果找不到,則 raise ValueError 避免 SQL 靜默失敗
|
||
"""
|
||
with self.engine.connect() as conn:
|
||
res = conn.execute(text("SELECT * FROM daily_sales_snapshot LIMIT 0"))
|
||
cols = list(res.keys())
|
||
|
||
def find_col(keywords):
|
||
for kw in keywords:
|
||
for col in cols:
|
||
if kw in col:
|
||
return col
|
||
return None
|
||
|
||
name_col = find_col(['商品名稱', '品名', 'Name', 'Product'])
|
||
sales_col = find_col(['銷售金額', '業績', '金額', 'Amount', 'Sales', 'Total', '總業績'])
|
||
|
||
if not name_col or not sales_col:
|
||
raise ValueError(f"daily_sales_snapshot 缺少必要欄位!目前欄位: {cols}")
|
||
|
||
return name_col, sales_col
|
||
|
||
def fetch_candidates(self) -> list:
|
||
"""
|
||
競價威脅候選漏斗(v2 — 直接 LEFT JOIN competitor_prices)
|
||
|
||
條件:近7天銷量下滑 > 10% 且 MOMO 自家有歷史價格紀錄
|
||
擴充:LEFT JOIN competitor_prices(PChome 快取,TTL 6h)
|
||
帶回 pchome_price + competitor_tags 供 Hermes 情境分析
|
||
|
||
無競品資料的商品仍回傳,pchome_price=NULL → _batch_analyze 跳過
|
||
"""
|
||
if self.engine is None:
|
||
raise RuntimeError("需要注入 SQLAlchemy engine")
|
||
|
||
name_col, sales_col = self._validate_snapshot_columns()
|
||
|
||
# 注意:products.i_code(MOMO商品頁URL碼,純數字)與
|
||
# daily_sales_snapshot.商品ID(訂單目錄碼,英數格式)是不同 ID 系統,
|
||
# 無法直接 JOIN。改以商品名稱(商品名稱 = p.name)做橋接。
|
||
sql_str = f"""
|
||
WITH latest_momo_price AS (
|
||
SELECT
|
||
p.i_code AS sku,
|
||
p.name,
|
||
p.category,
|
||
pr.price AS momo_price,
|
||
ROW_NUMBER() OVER (PARTITION BY p.id ORDER BY pr.timestamp DESC) AS rn
|
||
FROM products p
|
||
JOIN price_records pr ON pr.product_id = p.id
|
||
WHERE p.status = 'ACTIVE'
|
||
),
|
||
recent_sales AS (
|
||
SELECT
|
||
"{name_col}" AS product_name,
|
||
SUM(CASE WHEN snapshot_date::date >= CURRENT_DATE - 7
|
||
THEN COALESCE("{sales_col}"::numeric, 0) ELSE 0 END) AS sales_7d_curr,
|
||
SUM(CASE WHEN snapshot_date::date >= CURRENT_DATE - 14
|
||
AND snapshot_date::date < CURRENT_DATE - 7
|
||
THEN COALESCE("{sales_col}"::numeric, 0) ELSE 0 END) AS sales_7d_prev
|
||
FROM daily_sales_snapshot
|
||
GROUP BY "{name_col}"
|
||
)
|
||
SELECT
|
||
lmp.sku,
|
||
lmp.name,
|
||
lmp.category,
|
||
lmp.momo_price,
|
||
rs.sales_7d_curr,
|
||
rs.sales_7d_prev,
|
||
cp.price AS pchome_price,
|
||
cp.tags AS competitor_tags,
|
||
cp.match_score AS competitor_match_score
|
||
FROM latest_momo_price lmp
|
||
JOIN recent_sales rs ON rs.product_name = lmp.name
|
||
LEFT JOIN competitor_prices cp
|
||
ON cp.sku = lmp.sku
|
||
AND cp.source = 'pchome'
|
||
AND cp.expires_at > NOW()
|
||
AND cp.match_score >= 0.45
|
||
WHERE lmp.rn = 1
|
||
AND rs.sales_7d_prev > 0
|
||
AND (rs.sales_7d_curr - rs.sales_7d_prev) / rs.sales_7d_prev < -0.10
|
||
ORDER BY (rs.sales_7d_curr - rs.sales_7d_prev) / rs.sales_7d_prev ASC
|
||
LIMIT 300
|
||
"""
|
||
sql = text(sql_str)
|
||
|
||
with self.engine.connect() as conn:
|
||
rows = conn.execute(sql).fetchall()
|
||
|
||
return [dict(row._mapping) for row in rows]
|
||
|
||
# ──────────────────────────────────────────────
|
||
# Step 2:批量注入 Hermes 3 分析
|
||
# ──────────────────────────────────────────────
|
||
def _batch_analyze(self, candidates: list, pchome_prices: dict = None) -> tuple:
|
||
"""
|
||
將候選商品 + PChome 比價資料打包交 Hermes 分析
|
||
|
||
Args:
|
||
candidates: SQL 漏斗結果 (list of dict)
|
||
v2 起 candidates 已內含 pchome_price + competitor_tags
|
||
pchome_prices: 舊式外部注入 {sku: price}(向下相容,優先度低於 candidates 內建值)
|
||
|
||
Returns:
|
||
tuple(raw_threats, items)
|
||
- raw_threats: Hermes LLM 輸出的 JSON 陣列(含 risk/action/confidence 等分類結果)
|
||
- items: Python 計算的客觀數據 source of truth(sku/momo/pchome/gap_pct/sales_delta)
|
||
|
||
[2026-04-18 台北] Bug-1 根治 Layer A:同時回傳 items 作為客觀數據的真理來源,
|
||
防止 run() 從 LLM 輸出讀 momo_price/pchome_price 時因 LLM 漏吐 → default=0 → $0 幻覺
|
||
— Claude Opus 4.7
|
||
"""
|
||
external = pchome_prices or {}
|
||
items = []
|
||
for c in candidates:
|
||
# 優先使用 DB 快取的競品價格;fallback 到外部注入
|
||
pchome_price = c.get("pchome_price") or external.get(c["sku"])
|
||
if not pchome_price:
|
||
continue
|
||
|
||
sales_prev = float(c.get("sales_7d_prev") or 0)
|
||
sales_curr = float(c.get("sales_7d_curr") or 0)
|
||
momo_price = float(c["momo_price"])
|
||
pchome_price = float(pchome_price)
|
||
delta_pct = round((sales_curr - sales_prev) / sales_prev * 100, 1) if sales_prev else 0
|
||
gap_pct = round((momo_price - pchome_price) / pchome_price * 100, 1)
|
||
|
||
# 競品語意標籤(JSONB 從 DB 來,可能是 list 或 JSON 字串)
|
||
raw_tags = c.get("competitor_tags") or []
|
||
if isinstance(raw_tags, str):
|
||
try:
|
||
import json as _json
|
||
raw_tags = _json.loads(raw_tags)
|
||
except Exception:
|
||
raw_tags = []
|
||
|
||
item = {
|
||
"sku": c["sku"],
|
||
"name": c["name"][:30], # 截斷避免 token 爆炸
|
||
"category": c.get("category", ""),
|
||
"momo": c["momo_price"],
|
||
"pchome": pchome_price,
|
||
"gap_pct": gap_pct, # Python 預算好,Hermes 只做分類
|
||
"sales_delta": delta_pct,
|
||
}
|
||
if raw_tags:
|
||
item["competitor_tags"] = raw_tags # 語意情境給 Hermes 加分
|
||
|
||
items.append(item)
|
||
|
||
if not items:
|
||
return [], []
|
||
|
||
prompt = (
|
||
f"分析以下 {len(items)} 支商品的競價威脅,回傳前 {TOP_N} 個最高風險商品。\n\n"
|
||
f"資料:{json.dumps(items, ensure_ascii=False)}\n\n"
|
||
f"輸出格式(JSON 陣列,每筆含):\n"
|
||
f'[{{"sku": string, "name": string, "category": string, '
|
||
f'"momo_price": number, "pchome_price": number, '
|
||
f'"gap_pct": number, "sales_7d_delta_pct": number, '
|
||
f'"risk": "HIGH|MED|LOW", "recommended_action": string, "confidence": number}}]'
|
||
)
|
||
|
||
payload = {
|
||
"model": HERMES_MODEL,
|
||
"system": self.SYSTEM_PROMPT,
|
||
"prompt": prompt,
|
||
"stream": False,
|
||
"options": {"temperature": 0.1},
|
||
}
|
||
|
||
resp = requests.post(
|
||
f"{HERMES_URL}/api/generate",
|
||
json=payload,
|
||
timeout=HERMES_TIMEOUT,
|
||
)
|
||
resp.raise_for_status()
|
||
|
||
data = resp.json()
|
||
raw = data.get("response", "").strip()
|
||
duration_sec = round(data.get("total_duration", 0) / 1e9, 1)
|
||
eval_tokens = data.get("eval_count", "?") # Ollama 推理 token 數
|
||
logger.info(
|
||
f"[Hermes] 推理耗時 {duration_sec}s,"
|
||
f"輸入 {len(items)} 筆,tokens={eval_tokens},回應長度 {len(raw)}"
|
||
)
|
||
# 儲存統計供 footprint 使用(掛在 instance 上供 run() 讀取)
|
||
self._last_stats = {"duration_sec": duration_sec, "tokens": eval_tokens}
|
||
|
||
# P0-1 修復:剝除 Hermes 可能輸出的 markdown code fence
|
||
if raw.startswith("```"):
|
||
raw = re.sub(r"^```(?:json)?\s*", "", raw, flags=re.MULTILINE)
|
||
raw = re.sub(r"\s*```\s*$", "", raw.strip(), flags=re.MULTILINE)
|
||
raw = raw.strip()
|
||
logger.debug("[Hermes] 已剝除 markdown code fence")
|
||
|
||
return json.loads(raw), items
|
||
|
||
# ──────────────────────────────────────────────
|
||
# 公開介面
|
||
# ──────────────────────────────────────────────
|
||
def run(self, pchome_prices: dict = None) -> AnalysisResult:
|
||
"""
|
||
執行完整競價情報分析流程
|
||
|
||
Args:
|
||
pchome_prices: 舊式外部注入 {sku: price}(可選,向下相容)
|
||
v2 起競品價格由 competitor_prices DB 表提供(fetch_candidates LEFT JOIN)
|
||
若 DB 快取缺漏,此參數可作 fallback 補充
|
||
|
||
Returns:
|
||
AnalysisResult
|
||
"""
|
||
start = time.time()
|
||
|
||
try:
|
||
candidates = self.fetch_candidates()
|
||
logger.info(f"[漏斗] 候選商品 {len(candidates)} 筆")
|
||
|
||
if not candidates:
|
||
return AnalysisResult(
|
||
success=True, threats=[], total_candidates=0,
|
||
analysis_duration_sec=time.time() - start,
|
||
)
|
||
|
||
raw_threats, ground_items = self._batch_analyze(candidates, pchome_prices or {})
|
||
|
||
# [2026-04-18 台北] Bug-1 根治 Layer A:
|
||
# 客觀數據(momo/pchome/gap_pct/sales_delta)從 Python items 讀,不從 LLM 輸出讀
|
||
# LLM 只保留分類結果(risk / recommended_action / confidence)
|
||
# 避免 Hermes 漏吐欄位 → default=0 → Telegram $0 幻覺 — Claude Opus 4.7
|
||
items_by_sku = {i["sku"]: i for i in ground_items}
|
||
|
||
threats = []
|
||
for t in raw_threats:
|
||
sku = t.get("sku")
|
||
ground = items_by_sku.get(sku)
|
||
if not ground:
|
||
logger.warning(
|
||
f"[Hermes] LLM 回吐未知 SKU={sku},不在 items 清單,防幻覺跳過"
|
||
)
|
||
continue
|
||
threats.append(PriceThreat(
|
||
sku=sku,
|
||
name=ground["name"], # Python truth
|
||
category=ground.get("category", ""), # Python truth
|
||
momo_price=float(ground["momo"]), # Python truth(根治 $0)
|
||
pchome_price=float(ground["pchome"]), # Python truth(根治 $0)
|
||
gap_pct=float(ground["gap_pct"]), # Python truth
|
||
sales_7d_delta_pct=float(ground["sales_delta"]), # Python truth
|
||
risk=t.get("risk", "LOW"), # LLM 分類
|
||
recommended_action=t.get("recommended_action", ""), # LLM 洞察
|
||
confidence=float(t.get("confidence", 0.5)), # LLM 信心度
|
||
))
|
||
|
||
hermes_stats = getattr(self, "_last_stats", {})
|
||
return AnalysisResult(
|
||
success=True,
|
||
threats=threats,
|
||
total_candidates=len(candidates),
|
||
analysis_duration_sec=round(time.time() - start, 2),
|
||
hermes_tokens=hermes_stats.get("tokens", 0),
|
||
)
|
||
|
||
except requests.Timeout:
|
||
return AnalysisResult(
|
||
success=False, threats=[], total_candidates=0,
|
||
analysis_duration_sec=time.time() - start,
|
||
error="Hermes 推理超時(>120s),候選數量可能過多",
|
||
)
|
||
except json.JSONDecodeError as e:
|
||
return AnalysisResult(
|
||
success=False, threats=[], total_candidates=0,
|
||
analysis_duration_sec=time.time() - start,
|
||
error=f"Hermes JSON 解析失敗:{e}",
|
||
)
|
||
except Exception as e:
|
||
logger.exception("[HermesAnalyst] 分析失敗")
|
||
return AnalysisResult(
|
||
success=False, threats=[], total_candidates=0,
|
||
analysis_duration_sec=time.time() - start,
|
||
error=str(e),
|
||
)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# CLI 快速測試(不依賴 DB)
|
||
# python3 services/hermes_analyst_service.py
|
||
# ─────────────────────────────────────────────
|
||
if __name__ == "__main__":
|
||
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
||
|
||
service = HermesAnalystService()
|
||
|
||
fake_candidates = [
|
||
{"sku": "A001", "name": "玻尿酸面膜10片裝", "category": "美妝保養",
|
||
"momo_price": 320, "sales_7d_curr": 58000, "sales_7d_prev": 100000},
|
||
{"sku": "A003", "name": "舒特膚AD乳液200ml", "category": "美妝保養",
|
||
"momo_price": 1200, "sales_7d_curr": 65000, "sales_7d_prev": 100000},
|
||
{"sku": "A005", "name": "玻尿酸精華液30ml", "category": "美妝保養",
|
||
"momo_price": 890, "sales_7d_curr": 72000, "sales_7d_prev": 100000},
|
||
{"sku": "A007", "name": "眼霜15ml", "category": "美妝保養",
|
||
"momo_price": 680, "sales_7d_curr": 82000, "sales_7d_prev": 100000},
|
||
{"sku": "A009", "name": "美白化妝水150ml", "category": "美妝保養",
|
||
"momo_price": 420, "sales_7d_curr": 78000, "sales_7d_prev": 100000},
|
||
]
|
||
fake_pchome = {"A001": 280, "A003": 980, "A005": 760, "A007": 590, "A009": 350}
|
||
|
||
print("=== Hermes 3 競價分析 CLI 測試 ===\n")
|
||
raw = service._batch_analyze(fake_candidates, fake_pchome)
|
||
|
||
for t in raw:
|
||
icon = {"HIGH": "🔴", "MED": "🟡", "LOW": "🟢"}.get(t.get("risk", ""), "⚪")
|
||
print(f"{icon} [{t['sku']}] {t['name']}")
|
||
print(f" MOMO ${t['momo_price']} vs PChome ${t['pchome_price']} → 價差 {t['gap_pct']:.1f}%")
|
||
print(f" 銷量 {t['sales_7d_delta_pct']}% | 建議:{t['recommended_action']}\n")
|