feat(competitor): persist match attempts
All checks were successful
CD Pipeline / deploy (push) Successful in 2m9s
All checks were successful
CD Pipeline / deploy (push) Successful in 2m9s
This commit is contained in:
4
app.py
4
app.py
@@ -95,8 +95,8 @@ except Exception as e:
|
||||
sys_log.error(f"無法檢測磁碟空間: {e}")
|
||||
|
||||
# 🚩 系統版本定義 (備份與顯示用)
|
||||
# 🚩 2026-05-01 V10.71: Persist full campaign crawl snapshots
|
||||
SYSTEM_VERSION = "V10.71"
|
||||
# 🚩 2026-05-01 V10.72: Persist competitor match attempts and schedule remaining V2 pages
|
||||
SYSTEM_VERSION = "V10.72"
|
||||
|
||||
# ==========================================
|
||||
# 🔒 SQL Injection 防護函數
|
||||
|
||||
@@ -254,7 +254,7 @@ YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY', '')
|
||||
# ==========================================
|
||||
# 系統版本與路徑
|
||||
# ==========================================
|
||||
SYSTEM_VERSION = "V10.71"
|
||||
SYSTEM_VERSION = "V10.72"
|
||||
LOG_FILE_PATH = os.path.join(BASE_DIR, 'logs/system.log')
|
||||
public_url = PUBLIC_URL # 用於模板顯示
|
||||
|
||||
|
||||
@@ -142,13 +142,23 @@
|
||||
|
||||
目標:讓常用營運頁進入新版 shell,但先不重寫全部細節。
|
||||
|
||||
建議順序:
|
||||
推進順序:
|
||||
|
||||
1. `web/templates/vendor_stockout/index.html`(已以 `templates/vendor_stockout_index_v2.html` feature flag 方式落地)
|
||||
2. `web/templates/vendor_stockout/list.html`(已以 `templates/vendor_stockout_list_v2.html` feature flag 方式落地)
|
||||
3. `templates/auto_import_index.html`
|
||||
4. `templates/ai_recommend.html`
|
||||
5. `templates/settings.html` / `templates/crawler_management.html`
|
||||
1. `templates/ai_intelligence.html` / `templates/ai_recommend.html` / `templates/ai_history.html`
|
||||
- 原因:AI 挑品、PChome 比對、建議信心度與待補證據是目前商品看板下一步核心工作流。
|
||||
- 目標:新版 shell、AI 挑品清單、比對嘗試狀態、信心度證據缺口、操作回饋。
|
||||
2. `templates/auto_import_index.html`
|
||||
- 原因:所有真實資料要完整入庫,匯入頁是營運資料進入系統的主要入口。
|
||||
- 目標:新版 shell、匯入批次狀態、錯誤列、最近匯入紀錄與資料表落點。
|
||||
3. `templates/sales_analysis.html` / `templates/daily_sales.html` / `templates/monthly_summary_analysis.html`
|
||||
- 原因:AI 挑品信心度需要銷售額、銷量、成本、毛利等證據支撐。
|
||||
- 目標:保留原報表功能,改版為可掃描的營運分析工作台。
|
||||
4. `web/templates/vendor_stockout/index.html`、`list.html`、`import.html`
|
||||
- 狀態:首頁、列表、匯入已先以 `templates/vendor_stockout_*_v2.html` 方式落地;後續補齊 history、vendor management、send email。
|
||||
5. 系統管理相關頁:`templates/settings.html`、`templates/system_settings.html`、`templates/user_management.html`、`templates/logs.html`
|
||||
- 目標:新版 shell、設定群組化、狀態/權限/日誌統一呈現。
|
||||
6. PChome / 趨勢 / 其他工具頁:`templates/pchome_crawler.html`、`templates/price_comparison.html`、`templates/trends.html`
|
||||
- 目標:接入新版 shell 與資料完整性狀態,不改核心爬蟲規則。
|
||||
|
||||
工作內容:
|
||||
|
||||
|
||||
52
migrations/023_competitor_match_attempts.sql
Normal file
52
migrations/023_competitor_match_attempts.sql
Normal file
@@ -0,0 +1,52 @@
|
||||
-- =============================================================================
|
||||
-- Migration 023: 競品比對嘗試歷史表
|
||||
-- MOMO PRO — PChome match observability
|
||||
-- 2026-05-01 台北
|
||||
-- =============================================================================
|
||||
-- 說明:
|
||||
-- competitor_prices / competitor_price_history 只保存成功配對。
|
||||
-- competitor_match_attempts 採 append-only,保存每一次 PChome 比對嘗試,
|
||||
-- 包含 matched、low_score、no_result、no_match、error。
|
||||
-- 這讓「待比對」商品也有可回溯資料,可用來改善搜尋詞、門檻與 AI 挑品信心度。
|
||||
-- =============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS competitor_match_attempts (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
|
||||
-- MOMO 側商品識別與當下快照
|
||||
sku VARCHAR(50) NOT NULL,
|
||||
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
|
||||
momo_product_id INTEGER,
|
||||
momo_product_name TEXT,
|
||||
momo_price NUMERIC(10,2),
|
||||
|
||||
-- 本次比對嘗試
|
||||
search_terms JSONB DEFAULT '[]'::jsonb,
|
||||
candidate_count INTEGER DEFAULT 0,
|
||||
attempt_status VARCHAR(30) NOT NULL,
|
||||
|
||||
-- 最佳候選,即使低分也保存,供後續分析
|
||||
best_competitor_product_id VARCHAR(100),
|
||||
best_competitor_product_name TEXT,
|
||||
best_competitor_price NUMERIC(10,2),
|
||||
best_match_score NUMERIC(4,3),
|
||||
|
||||
-- 錯誤或外部 API 異常
|
||||
error_message TEXT,
|
||||
|
||||
attempted_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_sku_source_time
|
||||
ON competitor_match_attempts (sku, source, attempted_at DESC);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_status_time
|
||||
ON competitor_match_attempts (attempt_status, attempted_at DESC);
|
||||
|
||||
GRANT ALL PRIVILEGES ON competitor_match_attempts TO momo;
|
||||
GRANT USAGE, SELECT ON SEQUENCE competitor_match_attempts_id_seq TO momo;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
RAISE NOTICE '✅ Migration 023 完成 — competitor_match_attempts 比對嘗試歷史表已建立';
|
||||
END $$;
|
||||
@@ -51,6 +51,7 @@ class FeederResult:
|
||||
errors: int
|
||||
duration_sec: float
|
||||
history_written: int = 0
|
||||
attempts_written: int = 0
|
||||
|
||||
|
||||
def _extract_tags(pchome_product) -> list:
|
||||
@@ -186,11 +187,11 @@ def _find_best_match(momo_name: str, pchome_products: list) -> Optional[tuple]:
|
||||
return (best, best_score) if best else None
|
||||
|
||||
|
||||
def _search_pchome_candidates(crawler, momo_name: str) -> list:
|
||||
def _search_pchome_candidates(crawler, momo_name: str, keywords: list = None) -> list:
|
||||
"""以多組搜尋詞擴大 PChome 候選池,找到可信候選後提早停止。"""
|
||||
candidates = []
|
||||
seen_ids = set()
|
||||
for keyword in _build_search_keywords(momo_name):
|
||||
for keyword in keywords or _build_search_keywords(momo_name):
|
||||
ok, _, products = crawler.search_products(keyword, limit=SEARCH_LIMIT)
|
||||
if not ok or not products:
|
||||
continue
|
||||
@@ -259,6 +260,7 @@ class CompetitorPriceFeeder:
|
||||
def __init__(self, engine=None):
|
||||
self.engine = engine
|
||||
self._history_table_ready = False
|
||||
self._attempt_table_ready = False
|
||||
|
||||
def _ensure_competitor_price_history_table(self, conn):
|
||||
"""確保競品價格歷史表存在;排程可自癒補表,不依賴手動 migration。"""
|
||||
@@ -321,6 +323,120 @@ class CompetitorPriceFeeder:
|
||||
|
||||
self._history_table_ready = True
|
||||
|
||||
def _ensure_competitor_match_attempts_table(self, conn):
|
||||
"""確保 PChome 比對嘗試表存在;成功、低分、無結果與錯誤都要留痕。"""
|
||||
if self._attempt_table_ready:
|
||||
return
|
||||
|
||||
from sqlalchemy import text
|
||||
if conn.dialect.name == "postgresql":
|
||||
conn.execute(text("""
|
||||
CREATE TABLE IF NOT EXISTS competitor_match_attempts (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
sku VARCHAR(50) NOT NULL,
|
||||
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
|
||||
momo_product_id INTEGER,
|
||||
momo_product_name TEXT,
|
||||
momo_price NUMERIC(10,2),
|
||||
search_terms JSONB DEFAULT '[]'::jsonb,
|
||||
candidate_count INTEGER DEFAULT 0,
|
||||
attempt_status VARCHAR(30) NOT NULL,
|
||||
best_competitor_product_id VARCHAR(100),
|
||||
best_competitor_product_name TEXT,
|
||||
best_competitor_price NUMERIC(10,2),
|
||||
best_match_score NUMERIC(4,3),
|
||||
error_message TEXT,
|
||||
attempted_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||||
)
|
||||
"""))
|
||||
conn.execute(text("""
|
||||
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_sku_source_time
|
||||
ON competitor_match_attempts (sku, source, attempted_at DESC)
|
||||
"""))
|
||||
conn.execute(text("""
|
||||
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_status_time
|
||||
ON competitor_match_attempts (attempt_status, attempted_at DESC)
|
||||
"""))
|
||||
else:
|
||||
conn.execute(text("""
|
||||
CREATE TABLE IF NOT EXISTS competitor_match_attempts (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
sku VARCHAR(50) NOT NULL,
|
||||
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
|
||||
momo_product_id INTEGER,
|
||||
momo_product_name TEXT,
|
||||
momo_price NUMERIC(10,2),
|
||||
search_terms TEXT DEFAULT '[]',
|
||||
candidate_count INTEGER DEFAULT 0,
|
||||
attempt_status VARCHAR(30) NOT NULL,
|
||||
best_competitor_product_id VARCHAR(100),
|
||||
best_competitor_product_name TEXT,
|
||||
best_competitor_price NUMERIC(10,2),
|
||||
best_match_score NUMERIC(4,3),
|
||||
error_message TEXT,
|
||||
attempted_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
"""))
|
||||
conn.execute(text("""
|
||||
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_sku_source_time
|
||||
ON competitor_match_attempts (sku, source, attempted_at DESC)
|
||||
"""))
|
||||
conn.execute(text("""
|
||||
CREATE INDEX IF NOT EXISTS idx_comp_match_attempts_status_time
|
||||
ON competitor_match_attempts (attempt_status, attempted_at DESC)
|
||||
"""))
|
||||
|
||||
self._attempt_table_ready = True
|
||||
|
||||
def _record_match_attempt(
|
||||
self,
|
||||
sku: str,
|
||||
momo_name: str,
|
||||
momo_product_id: int = None,
|
||||
momo_price: float = None,
|
||||
search_terms: list = None,
|
||||
candidate_count: int = 0,
|
||||
attempt_status: str = "unknown",
|
||||
best_product=None,
|
||||
best_score: float = None,
|
||||
error_message: str = None,
|
||||
source: str = "pchome",
|
||||
) -> None:
|
||||
"""追加一筆 PChome 比對嘗試紀錄,讓待比對/低信心也能回溯。"""
|
||||
from sqlalchemy import text
|
||||
|
||||
with self.engine.begin() as conn:
|
||||
self._ensure_competitor_match_attempts_table(conn)
|
||||
search_terms_expr = "CAST(:search_terms AS jsonb)" if conn.dialect.name == "postgresql" else ":search_terms"
|
||||
conn.execute(text(f"""
|
||||
INSERT INTO competitor_match_attempts
|
||||
(sku, source, momo_product_id, momo_product_name, momo_price,
|
||||
search_terms, candidate_count, attempt_status,
|
||||
best_competitor_product_id, best_competitor_product_name,
|
||||
best_competitor_price, best_match_score, error_message,
|
||||
attempted_at)
|
||||
VALUES
|
||||
(:sku, :source, :momo_product_id, :momo_product_name, :momo_price,
|
||||
{search_terms_expr}, :candidate_count, :attempt_status,
|
||||
:best_id, :best_name,
|
||||
:best_price, :best_score, :error_message,
|
||||
CURRENT_TIMESTAMP)
|
||||
"""), {
|
||||
"sku": sku,
|
||||
"source": source,
|
||||
"momo_product_id": momo_product_id,
|
||||
"momo_product_name": momo_name,
|
||||
"momo_price": momo_price,
|
||||
"search_terms": json.dumps(search_terms or [], ensure_ascii=False),
|
||||
"candidate_count": candidate_count,
|
||||
"attempt_status": attempt_status,
|
||||
"best_id": getattr(best_product, "product_id", None),
|
||||
"best_name": (getattr(best_product, "name", None) or "")[:300] or None,
|
||||
"best_price": getattr(best_product, "price", None),
|
||||
"best_score": best_score,
|
||||
"error_message": (error_message or "")[:1000] or None,
|
||||
})
|
||||
|
||||
def _fetch_active_skus(self) -> list:
|
||||
"""
|
||||
從 products 表取得待監控的 ACTIVE 商品清單
|
||||
@@ -489,22 +605,46 @@ class CompetitorPriceFeeder:
|
||||
skipped_low = 0
|
||||
errors = 0
|
||||
history_written = 0
|
||||
attempts_written = 0
|
||||
|
||||
for item in skus:
|
||||
sku = item["sku"]
|
||||
momo_name = item["name"]
|
||||
momo_product_id = item.get("product_id")
|
||||
momo_price = item.get("momo_price")
|
||||
search_terms = _build_search_keywords(momo_name)
|
||||
|
||||
try:
|
||||
products = _search_pchome_candidates(crawler, momo_name)
|
||||
products = _search_pchome_candidates(crawler, momo_name, search_terms)
|
||||
if not products:
|
||||
logger.debug(f"[Feeder] {sku} 無搜尋結果,跳過")
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
momo_product_id=momo_product_id,
|
||||
momo_price=momo_price,
|
||||
search_terms=search_terms,
|
||||
candidate_count=0,
|
||||
attempt_status="no_result",
|
||||
source=source,
|
||||
)
|
||||
attempts_written += 1
|
||||
skipped_no += 1
|
||||
continue
|
||||
|
||||
result = _find_best_match(momo_name, products)
|
||||
if not result:
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
momo_product_id=momo_product_id,
|
||||
momo_price=momo_price,
|
||||
search_terms=search_terms,
|
||||
candidate_count=len(products),
|
||||
attempt_status="no_match",
|
||||
source=source,
|
||||
)
|
||||
attempts_written += 1
|
||||
skipped_no += 1
|
||||
continue
|
||||
|
||||
@@ -514,6 +654,19 @@ class CompetitorPriceFeeder:
|
||||
logger.debug(
|
||||
f"[Feeder] {sku} 比對分數過低 ({score:.3f} < {MIN_MATCH_SCORE}),跳過"
|
||||
)
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
momo_product_id=momo_product_id,
|
||||
momo_price=momo_price,
|
||||
search_terms=search_terms,
|
||||
candidate_count=len(products),
|
||||
attempt_status="low_score",
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
source=source,
|
||||
)
|
||||
attempts_written += 1
|
||||
skipped_low += 1
|
||||
continue
|
||||
|
||||
@@ -527,8 +680,21 @@ class CompetitorPriceFeeder:
|
||||
momo_price=momo_price,
|
||||
source=source,
|
||||
)
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
momo_product_id=momo_product_id,
|
||||
momo_price=momo_price,
|
||||
search_terms=search_terms,
|
||||
candidate_count=len(products),
|
||||
attempt_status="matched",
|
||||
best_product=best_product,
|
||||
best_score=score,
|
||||
source=source,
|
||||
)
|
||||
matched += 1
|
||||
history_written += 1
|
||||
attempts_written += 1
|
||||
logger.debug(
|
||||
f"[Feeder] {sku} → PChome ${best_product.price} "
|
||||
f"score={score:.3f} tags={tags}"
|
||||
@@ -536,13 +702,27 @@ class CompetitorPriceFeeder:
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[Feeder] {sku} 處理失敗: {e}")
|
||||
try:
|
||||
self._record_match_attempt(
|
||||
sku,
|
||||
momo_name,
|
||||
momo_product_id=momo_product_id,
|
||||
momo_price=momo_price,
|
||||
search_terms=search_terms,
|
||||
attempt_status="error",
|
||||
error_message=str(e),
|
||||
source=source,
|
||||
)
|
||||
attempts_written += 1
|
||||
except Exception as attempt_error:
|
||||
logger.warning(f"[Feeder] {sku} 比對嘗試紀錄寫入失敗: {attempt_error}")
|
||||
errors += 1
|
||||
|
||||
duration = round(time.time() - start, 2)
|
||||
logger.info(
|
||||
f"[Feeder] 完成 matched={matched} skipped_no={skipped_no} "
|
||||
f"skipped_low={skipped_low} errors={errors} "
|
||||
f"history_written={history_written} 耗時={duration}s"
|
||||
f"history_written={history_written} attempts_written={attempts_written} 耗時={duration}s"
|
||||
)
|
||||
return FeederResult(
|
||||
total_skus=len(skus),
|
||||
@@ -552,6 +732,7 @@ class CompetitorPriceFeeder:
|
||||
errors=errors,
|
||||
duration_sec=duration,
|
||||
history_written=history_written,
|
||||
attempts_written=attempts_written,
|
||||
)
|
||||
|
||||
def run(self, source: str = "pchome") -> FeederResult:
|
||||
|
||||
28
tests/test_competitor_match_attempts_persistence.py
Normal file
28
tests/test_competitor_match_attempts_persistence.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
|
||||
|
||||
def test_competitor_feeder_persists_all_match_attempt_outcomes():
|
||||
source = (ROOT / "services/competitor_price_feeder.py").read_text(encoding="utf-8")
|
||||
migration = (ROOT / "migrations/023_competitor_match_attempts.sql").read_text(encoding="utf-8")
|
||||
|
||||
assert "attempts_written" in source
|
||||
assert "_ensure_competitor_match_attempts_table" in source
|
||||
assert "_record_match_attempt" in source
|
||||
assert "INSERT INTO competitor_match_attempts" in source
|
||||
assert "CAST(:search_terms AS jsonb)" in source
|
||||
assert 'attempt_status="matched"' in source
|
||||
assert 'attempt_status="low_score"' in source
|
||||
assert 'attempt_status="no_result"' in source
|
||||
assert 'attempt_status="no_match"' in source
|
||||
assert 'attempt_status="error"' in source
|
||||
assert "_search_pchome_candidates(crawler, momo_name, search_terms)" in source
|
||||
|
||||
assert "CREATE TABLE IF NOT EXISTS competitor_match_attempts" in migration
|
||||
assert "attempt_status" in migration
|
||||
assert "search_terms" in migration
|
||||
assert "best_match_score" in migration
|
||||
assert "error_message" in migration
|
||||
assert "idx_comp_match_attempts_sku_source_time" in migration
|
||||
Reference in New Issue
Block a user