Files
ewoooc/services/competitor_match_review_service.py
2026-06-01 12:19:48 +08:00

457 lines
18 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""PChome / MOMO 比價人工覆核決策服務。"""
from __future__ import annotations
import json
import os
from datetime import datetime, timedelta, timezone
from typing import Any
from sqlalchemy import inspect, text
VALID_REVIEW_ACTIONS = {
"accept_identity": {
"label": "採用同款",
"attempt_status": "manual_accepted",
"message": "已採用候選商品為正式 PChome 同款配對",
},
"reject_identity": {
"label": "否決候選",
"attempt_status": "manual_rejected",
"message": "已否決候選商品,本輪覆核已關閉",
},
"unit_price_required": {
"label": "標記單位價",
"attempt_status": "manual_unit_price_required",
"message": "已標記為需單位價比較,不寫入正式總價差",
},
"needs_research": {
"label": "需補搜尋",
"attempt_status": "manual_needs_research",
"message": "已標記為需補搜尋詞或重新抓取",
},
}
def _num(value: Any) -> float | None:
try:
if value is None:
return None
return float(value)
except (TypeError, ValueError):
return None
def _json_array_expr(conn, bind_name: str) -> str:
return f"CAST(:{bind_name} AS jsonb)" if conn.dialect.name == "postgresql" else f":{bind_name}"
def _json_object_expr(conn, bind_name: str) -> str:
return f"CAST(:{bind_name} AS jsonb)" if conn.dialect.name == "postgresql" else f":{bind_name}"
def _json_text_payload(value: Any) -> str | None:
if value is None:
return None
if isinstance(value, (dict, list)):
return json.dumps(value, ensure_ascii=False)
if isinstance(value, str):
stripped = value.strip()
if not stripped:
return None
try:
json.loads(stripped)
return stripped
except (TypeError, ValueError):
return json.dumps({"raw": stripped}, ensure_ascii=False)
return json.dumps(value, ensure_ascii=False)
def _has_table_column(conn, table_name: str, column_name: str) -> bool:
try:
columns = inspect(conn).get_columns(table_name)
return any(str(column.get("name")) == column_name for column in columns)
except Exception:
return False
def _candidate_diagnostic_text(attempt: dict[str, Any]) -> str:
parts: list[str] = []
error_message = str(attempt.get("error_message") or "").strip()
diagnostic_payload = _json_text_payload(attempt.get("match_diagnostic_json"))
if error_message:
parts.append(error_message)
if diagnostic_payload:
parts.append(f"match_diagnostic_json={diagnostic_payload}")
return "; ".join(parts)[:4000]
def _ensure_competitor_match_reviews_table(conn) -> None:
if conn.dialect.name == "postgresql":
conn.execute(text("""
CREATE TABLE IF NOT EXISTS competitor_match_reviews (
id BIGSERIAL PRIMARY KEY,
sku VARCHAR(50) NOT NULL,
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
review_action VARCHAR(40) NOT NULL,
review_reason TEXT,
reviewer_identity VARCHAR(120),
momo_product_id INTEGER,
momo_product_name TEXT,
momo_price NUMERIC(10,2),
candidate_product_id VARCHAR(100),
candidate_product_name TEXT,
candidate_price NUMERIC(10,2),
candidate_match_score NUMERIC(4,3),
candidate_diagnostic TEXT,
resulting_attempt_status VARCHAR(40),
reviewed_at TIMESTAMP NOT NULL DEFAULT NOW()
)
"""))
else:
conn.execute(text("""
CREATE TABLE IF NOT EXISTS competitor_match_reviews (
id INTEGER PRIMARY KEY AUTOINCREMENT,
sku VARCHAR(50) NOT NULL,
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
review_action VARCHAR(40) NOT NULL,
review_reason TEXT,
reviewer_identity VARCHAR(120),
momo_product_id INTEGER,
momo_product_name TEXT,
momo_price NUMERIC(10,2),
candidate_product_id VARCHAR(100),
candidate_product_name TEXT,
candidate_price NUMERIC(10,2),
candidate_match_score NUMERIC(4,3),
candidate_diagnostic TEXT,
resulting_attempt_status VARCHAR(40),
reviewed_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
)
"""))
conn.execute(text("""
CREATE INDEX IF NOT EXISTS idx_comp_match_reviews_sku_source_time
ON competitor_match_reviews (sku, source, reviewed_at DESC)
"""))
conn.execute(text("""
CREATE INDEX IF NOT EXISTS idx_comp_match_reviews_action_time
ON competitor_match_reviews (review_action, reviewed_at DESC)
"""))
conn.execute(text("""
CREATE INDEX IF NOT EXISTS idx_comp_match_reviews_candidate
ON competitor_match_reviews (candidate_product_id)
"""))
def _ensure_competitor_price_history_table(conn) -> None:
tags_type = "JSONB" if conn.dialect.name == "postgresql" else "TEXT"
pk_type = "BIGSERIAL PRIMARY KEY" if conn.dialect.name == "postgresql" else "INTEGER PRIMARY KEY AUTOINCREMENT"
conn.execute(text(f"""
CREATE TABLE IF NOT EXISTS competitor_price_history (
id {pk_type},
sku VARCHAR(50) NOT NULL,
source VARCHAR(30) NOT NULL DEFAULT 'pchome',
momo_product_id INTEGER,
momo_price NUMERIC(10,2),
price NUMERIC(10,2) NOT NULL,
original_price NUMERIC(10,2),
discount_pct INTEGER,
competitor_product_id VARCHAR(100),
competitor_product_name TEXT,
match_score NUMERIC(4,3),
tags {tags_type} DEFAULT '[]',
crawled_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
)
"""))
conn.execute(text("""
CREATE INDEX IF NOT EXISTS idx_comp_price_history_sku_source_time
ON competitor_price_history (sku, source, crawled_at DESC)
"""))
def _fetch_latest_attempt(conn, sku: str, source: str) -> dict[str, Any] | None:
latest_price_sql = """
pr.price AS current_momo_price
FROM competitor_match_attempts cma
LEFT JOIN products p ON p.i_code = cma.sku
LEFT JOIN LATERAL (
SELECT price
FROM price_records
WHERE product_id = p.id
ORDER BY timestamp DESC, id DESC
LIMIT 1
) pr ON TRUE
"""
if conn.dialect.name != "postgresql":
latest_price_sql = """
(
SELECT price
FROM price_records
WHERE product_id = p.id
ORDER BY timestamp DESC, id DESC
LIMIT 1
) AS current_momo_price
FROM competitor_match_attempts cma
LEFT JOIN products p ON p.i_code = cma.sku
"""
row = conn.execute(text(f"""
SELECT
cma.*,
p.id AS current_momo_product_id,
p.name AS current_momo_product_name,
{latest_price_sql}
WHERE cma.sku = :sku
AND cma.source = :source
ORDER BY cma.attempted_at DESC, cma.id DESC
LIMIT 1
"""), {"sku": sku, "source": source}).mappings().first()
return dict(row) if row else None
def _insert_manual_attempt(conn, attempt: dict[str, Any], action_meta: dict[str, str], source: str) -> None:
columns = [
"sku",
"source",
"momo_product_id",
"momo_product_name",
"momo_price",
"search_terms",
"candidate_count",
"attempt_status",
"best_competitor_product_id",
"best_competitor_product_name",
"best_competitor_price",
"best_match_score",
"error_message",
"attempted_at",
]
search_terms_expr = _json_array_expr(conn, "search_terms")
values = [
":sku",
":source",
":momo_product_id",
":momo_product_name",
":momo_price",
search_terms_expr,
":candidate_count",
":attempt_status",
":best_id",
":best_name",
":best_price",
":best_score",
":error_message",
"CURRENT_TIMESTAMP",
]
params = {
"sku": attempt.get("sku"),
"source": source,
"momo_product_id": attempt.get("momo_product_id") or attempt.get("current_momo_product_id"),
"momo_product_name": attempt.get("momo_product_name") or attempt.get("current_momo_product_name"),
"momo_price": attempt.get("momo_price") or attempt.get("current_momo_price"),
"search_terms": json.dumps([f"manual_review:{action_meta['attempt_status']}"], ensure_ascii=False),
"candidate_count": attempt.get("candidate_count") or 0,
"attempt_status": action_meta["attempt_status"],
"best_id": attempt.get("best_competitor_product_id"),
"best_name": attempt.get("best_competitor_product_name"),
"best_price": attempt.get("best_competitor_price"),
"best_score": attempt.get("best_match_score"),
"error_message": action_meta["message"],
}
if _has_table_column(conn, "competitor_match_attempts", "match_diagnostic_json"):
columns.append("match_diagnostic_json")
values.append(_json_object_expr(conn, "match_diagnostic_json"))
params["match_diagnostic_json"] = _json_text_payload(attempt.get("match_diagnostic_json"))
if _has_table_column(conn, "competitor_match_attempts", "comparison_mode"):
columns.append("comparison_mode")
values.append(":comparison_mode")
params["comparison_mode"] = attempt.get("comparison_mode")
if _has_table_column(conn, "competitor_match_attempts", "hard_veto"):
columns.append("hard_veto")
values.append(":hard_veto")
params["hard_veto"] = attempt.get("hard_veto")
if _has_table_column(conn, "competitor_match_attempts", "diagnostic_codes"):
columns.append("diagnostic_codes")
values.append(_json_array_expr(conn, "diagnostic_codes"))
params["diagnostic_codes"] = _json_text_payload(attempt.get("diagnostic_codes"))
conn.execute(text(f"""
INSERT INTO competitor_match_attempts
({", ".join(columns)})
VALUES
({", ".join(values)})
"""), params)
def _promote_manual_match(conn, attempt: dict[str, Any], source: str) -> None:
candidate_id = str(attempt.get("best_competitor_product_id") or "").strip()
candidate_name = str(attempt.get("best_competitor_product_name") or "").strip()
candidate_price = _num(attempt.get("best_competitor_price"))
if not candidate_id or not candidate_name or not candidate_price or candidate_price <= 0:
raise ValueError("採用同款需要候選 PChome 商品 ID、名稱與有效價格")
_ensure_competitor_price_history_table(conn)
_taipei = timezone(timedelta(hours=8))
ttl_hours = int(os.getenv("PCHOME_FEEDER_TTL_HOURS", "48"))
expires_at = (datetime.now(_taipei) + timedelta(hours=ttl_hours)).strftime("%Y-%m-%d %H:%M:%S")
match_score = max(_num(attempt.get("best_match_score")) or 0, 0.76)
tags = [
"identity_v2",
"manual_review",
"manual_accept",
"comparison_exact_identity",
]
tags_json = json.dumps(tags, ensure_ascii=False)
tags_expr = _json_array_expr(conn, "tags")
momo_product_id = attempt.get("momo_product_id") or attempt.get("current_momo_product_id")
momo_price = attempt.get("momo_price") or attempt.get("current_momo_price")
conn.execute(text(f"""
INSERT INTO competitor_prices
(sku, source, price, original_price, discount_pct,
competitor_product_id, competitor_product_name,
match_score, tags, crawled_at, expires_at)
VALUES
(:sku, :source, :price, NULL, NULL,
:candidate_id, :candidate_name,
:match_score, {tags_expr}, CURRENT_TIMESTAMP, :expires_at)
ON CONFLICT (sku, source) DO UPDATE
SET price = EXCLUDED.price,
original_price = EXCLUDED.original_price,
discount_pct = EXCLUDED.discount_pct,
competitor_product_id = EXCLUDED.competitor_product_id,
competitor_product_name = EXCLUDED.competitor_product_name,
match_score = EXCLUDED.match_score,
tags = EXCLUDED.tags,
crawled_at = CURRENT_TIMESTAMP,
expires_at = EXCLUDED.expires_at
"""), {
"sku": attempt.get("sku"),
"source": source,
"price": candidate_price,
"candidate_id": candidate_id,
"candidate_name": candidate_name[:200],
"match_score": match_score,
"tags": tags_json,
"expires_at": expires_at,
})
conn.execute(text(f"""
INSERT INTO competitor_price_history
(sku, source, momo_product_id, momo_price,
price, original_price, discount_pct,
competitor_product_id, competitor_product_name,
match_score, tags, crawled_at)
VALUES
(:sku, :source, :momo_product_id, :momo_price,
:price, NULL, NULL,
:candidate_id, :candidate_name,
:match_score, {tags_expr}, CURRENT_TIMESTAMP)
"""), {
"sku": attempt.get("sku"),
"source": source,
"momo_product_id": momo_product_id,
"momo_price": momo_price,
"price": candidate_price,
"candidate_id": candidate_id,
"candidate_name": candidate_name[:200],
"match_score": match_score,
"tags": tags_json,
})
def _expire_current_manual_candidate(conn, attempt: dict[str, Any], source: str) -> None:
"""Expire a current official match when the operator rejects its candidate."""
candidate_id = str(attempt.get("best_competitor_product_id") or "").strip()
sku = str(attempt.get("sku") or "").strip()
if not sku or not candidate_id:
return
conn.execute(text("""
UPDATE competitor_prices
SET expires_at = CURRENT_TIMESTAMP,
crawled_at = CURRENT_TIMESTAMP
WHERE sku = :sku
AND source = :source
AND competitor_product_id = :candidate_id
"""), {
"sku": sku,
"source": source,
"candidate_id": candidate_id,
})
def record_competitor_match_review(
engine,
sku: str,
review_action: str,
reviewer_identity: str = "dashboard",
review_reason: str = "",
source: str = "pchome",
) -> dict[str, Any]:
"""Record a human decision for the latest PChome match attempt."""
sku = str(sku or "").strip()
source = str(source or "pchome").strip() or "pchome"
review_action = str(review_action or "").strip()
review_reason = str(review_reason or "").strip()[:1000]
reviewer_identity = str(reviewer_identity or "dashboard").strip()[:120] or "dashboard"
if not sku:
return {"success": False, "message": "缺少 MOMO 商品 ID"}
action_meta = VALID_REVIEW_ACTIONS.get(review_action)
if not action_meta:
return {"success": False, "message": "不支援的覆核動作"}
with engine.begin() as conn:
_ensure_competitor_match_reviews_table(conn)
attempt = _fetch_latest_attempt(conn, sku, source)
if not attempt:
return {"success": False, "message": "找不到可覆核的 PChome 比對嘗試"}
if review_action == "accept_identity":
_promote_manual_match(conn, attempt, source)
else:
_expire_current_manual_candidate(conn, attempt, source)
_insert_manual_attempt(conn, attempt, action_meta, source)
conn.execute(text("""
INSERT INTO competitor_match_reviews
(sku, source, review_action, review_reason, reviewer_identity,
momo_product_id, momo_product_name, momo_price,
candidate_product_id, candidate_product_name, candidate_price,
candidate_match_score, candidate_diagnostic,
resulting_attempt_status, reviewed_at)
VALUES
(:sku, :source, :review_action, :review_reason, :reviewer_identity,
:momo_product_id, :momo_product_name, :momo_price,
:candidate_id, :candidate_name, :candidate_price,
:candidate_score, :candidate_diagnostic,
:resulting_attempt_status, CURRENT_TIMESTAMP)
"""), {
"sku": sku,
"source": source,
"review_action": review_action,
"review_reason": review_reason,
"reviewer_identity": reviewer_identity,
"momo_product_id": attempt.get("momo_product_id") or attempt.get("current_momo_product_id"),
"momo_product_name": attempt.get("momo_product_name") or attempt.get("current_momo_product_name"),
"momo_price": attempt.get("momo_price") or attempt.get("current_momo_price"),
"candidate_id": attempt.get("best_competitor_product_id"),
"candidate_name": attempt.get("best_competitor_product_name"),
"candidate_price": attempt.get("best_competitor_price"),
"candidate_score": attempt.get("best_match_score"),
"candidate_diagnostic": _candidate_diagnostic_text(attempt),
"resulting_attempt_status": action_meta["attempt_status"],
})
return {
"success": True,
"message": action_meta["message"],
"sku": sku,
"review_action": review_action,
"attempt_status": action_meta["attempt_status"],
}