"""Utilities for MOMO product URL normalization and fallback."""

import re
from typing import Optional
from urllib.parse import parse_qs, urlparse, urlunparse

MOMO_BASE_DOMAINS = {
    'www.momoshop.com.tw',
    'm.momoshop.com.tw',
}

ERR404_PATH = '/ecm/js/err404/ec404.html'
MOMO_ICODE_FALLBACK_MIN_LEN = 4
MOMO_ICODE_RE = re.compile(r'^[A-Za-z0-9_-]+$')


def is_probable_momo_icode(i_code: Optional[object]) -> bool:
    """判斷值是否像是合理的 MOMO 商品代碼。"""
    cleaned = str(i_code or '').strip()
    if not cleaned:
        return False

    lowered = cleaned.lower()
    if lowered in {'nan', 'none', 'null', 'undefined'}:
        return False

    if lowered.startswith(('momo_', 'manual_', 'pchome_')):
        return False

    if len(cleaned) < MOMO_ICODE_FALLBACK_MIN_LEN:
        return False

    return bool(MOMO_ICODE_RE.fullmatch(cleaned))


def build_momo_product_url(i_code: Optional[object]) -> Optional[str]:
    """Build fallback MOMO product detail URL from i_code."""
    if not is_probable_momo_icode(i_code):
        return None
    return f"https://www.momoshop.com.tw/goods/GoodsDetail.jsp?i_code={str(i_code).strip()}"


def extract_momo_i_code(url: Optional[object]) -> Optional[str]:
    """從 URL 萃取 i_code。"""
    if not url:
        return None

    raw = str(url).strip()
    if not raw:
        return None

    # URL 格式：直接解析
    try:
        normalized = raw if raw.startswith(('http://', 'https://')) else (
            f'https:{raw}' if raw.startswith('//') else raw
        )
        parsed = urlparse(normalized)
        if parsed.scheme in ('http', 'https'):
            query = parse_qs(parsed.query or '')
            i_code = (query.get('i_code') or [''])[0]
            if i_code:
                return i_code.strip()

            match = re.search(r'/goodsdetail/([^/?#]+)', parsed.path or '', re.I)
            if match:
                return match.group(1).strip()
    except Exception:
        pass

    # 備援匹配
    match = re.search(r'[?&]i_code=([^&#]+)', raw, re.I)
    if match:
        return match.group(1).strip()

    return None


def _normalize_quoted_url(url: str) -> str:
    """Normalize scheme-relative and path-relative URLs."""
    cleaned = (url or '').strip()
    if cleaned.startswith('//'):
        return f'https:{cleaned}'
    if cleaned.startswith('/'):
        return f'https://www.momoshop.com.tw{cleaned}'
    return cleaned


def is_valid_momo_product_url(url: str) -> bool:
    """Return whether URL looks like a valid MOMO product page."""
    if not url:
        return False

    parsed = urlparse(url)
    if parsed.scheme not in ('http', 'https'):
        return False
    if (parsed.hostname or '').lower() not in MOMO_BASE_DOMAINS:
        return False

    path = (parsed.path or '').lower()
    if ERR404_PATH in path:
        return False

    # 商品頁通常會有 GoodsDetail.jsp 或 goodsDetail/xxx
    if 'goodsdetail' in path:
        if 'i_code' not in parse_qs(parsed.query or '') and not re.search(r'/goodsdetail/[^/]+', path):
            return False
        query = parse_qs(parsed.query or '')
        if 'i_code' in query:
            return True
        # /goodsDetail/<i_code> 不一定有 query
        return bool(re.search(r'/goodsdetail/[^/]+', path))

    return False


def normalize_momo_product_url(url: Optional[object], i_code: Optional[object]) -> Optional[str]:
    """
    Normalize a MOMO URL and fall back to i_code product detail URL when invalid.

    Args:
        url: Original link.
        i_code: Product code for fallback URL.
    """
    fallback_code = extract_momo_i_code(url) or (str(i_code).strip() if is_probable_momo_icode(i_code) else None)
    fallback = build_momo_product_url(fallback_code)

    if not url:
        return fallback

    normalized = _normalize_quoted_url(str(url).strip())
    if not normalized:
        return fallback

    lower = normalized.lower()
    if lower.startswith('javascript:') or lower.startswith('void('):
        return fallback

    if is_valid_momo_product_url(normalized):
        return normalized

    # 兜底：若網址可解析且 host 仍是 MOMO，但不是預期路徑，仍可視為損壞資料
    parsed = urlparse(normalized)
    if parsed.scheme in ('http', 'https'):
        return fallback

    return fallback