Files
ewoooc/scripts/tools/sanitize_momo_urls.py
OoO 75de76ac12
Some checks failed
CD Pipeline / deploy (push) Has been cancelled
fix(momo): block EC404 auto-open with end-to-end URL guard
- normalize URLs at write time (scheduler crawlers, routes) to drop
  javascript:/EC404/placeholder i_code (momo_/manual_/pchome_)
- add global click+auxclick guard in base.html and ewoooc_base.html
  that intercepts blocked MOMO URLs and redirects to safe i_code URL
- per-page dashboards reuse the same isLikelyMomoIcode validation
- /api/track_momo_link records blocked events for diagnosis
- ship sanitize_momo_urls.py to clean existing polluted DB rows

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 12:00:34 +08:00

120 lines
3.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
修正 MOMO 商品與促銷商品網址中的壞連結(如 javascript:void(0)、EC404、非商品頁
將可修正者改為:
1) 以 i_code 組出正確商品網址
2) 無法修正時清空網址 (避免連到錯頁)
"""
import argparse
import logging
import os
import sys
BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.insert(0, BASE_DIR)
from database.manager import DatabaseManager
from database.models import Product
from database.edm_models import PromoProduct
from utils.momo_url_utils import normalize_momo_product_url
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
)
logger = logging.getLogger(__name__)
def _sanitize_records(records, label, commit=False):
updated = 0
skipped = 0
cleared = 0
unchanged = 0
for record in records:
old_url = getattr(record, "url", None)
if not old_url:
unchanged += 1
continue
normalized = normalize_momo_product_url(old_url, getattr(record, "i_code", None))
if normalized == old_url:
unchanged += 1
continue
if normalized is None:
if commit:
record.url = None
cleared += 1
logger.info(
"清空 %s 不可修正 URL | id=%s | i_code=%s | old=%s",
label,
getattr(record, "id", "n/a"),
getattr(record, "i_code", ""),
old_url,
)
else:
if commit:
record.url = normalized
updated += 1
logger.info(
"修正 %s URL | id=%s | i_code=%s | old=%s | new=%s",
label,
getattr(record, "id", "n/a"),
getattr(record, "i_code", ""),
old_url,
normalized,
)
logger.info(
"%s 結果 | unchanged=%s, updated=%s, cleared=%s",
label,
unchanged,
updated,
cleared,
)
return {"unchanged": unchanged, "updated": updated, "cleared": cleared}
def main(commit=False):
db = DatabaseManager()
session = db.get_session()
try:
product_rows = session.query(Product).all()
promo_rows = session.query(PromoProduct).all()
product_result = _sanitize_records(product_rows, "products", commit=commit)
promo_result = _sanitize_records(promo_rows, "promo_products", commit=commit)
if commit:
session.commit()
logger.info("變更已提交")
else:
session.rollback()
logger.info("Dry-run 模式:未提交變更")
logger.info(
"整體結果 | products: %s | promo_products: %s",
product_result,
promo_result,
)
except Exception as exc:
if commit:
session.rollback()
logger.exception("清理網址失敗: %s", exc)
raise
finally:
session.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--commit",
action="store_true",
help="實際寫回資料庫(不加此參數則為 dry-run",
)
args = parser.parse_args()
main(commit=args.commit)