Files
2026FIFAWorldCup/platform/backend/app/analytics/localization.py
wooo 3994996ea0
All checks were successful
2026 World Cup Quant Platform - Production Deployment / Code Quality, Security Gate & Testing (push) Successful in 4m32s
2026 World Cup Quant Platform - Production Deployment / Deploy to Production VM via Gitea CD (push) Successful in 1m15s
fix: normalize world cup match feed quality
2026-06-18 13:49:33 +08:00

300 lines
7.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Traditional Chinese localization helpers for public API payloads."""
from __future__ import annotations
import re
import unicodedata
from typing import Any
TEAM_NAMES = {
'Argentina': '阿根廷',
'Australia': '澳洲',
'Austria': '奧地利',
'Belgium': '比利時',
'Brazil': '巴西',
'Canada': '加拿大',
'Cape Verde': '維德角',
'Colombia': '哥倫比亞',
'Croatia': '克羅埃西亞',
'Curaçao': '古拉索',
'Czech Republic': '捷克',
'DR Congo': '剛果民主共和國',
'Ecuador': '厄瓜多',
'Egypt': '埃及',
'England': '英格蘭',
'France': '法國',
'Germany': '德國',
'Ghana': '迦納',
'Iran': '伊朗',
'Iraq': '伊拉克',
'Japan': '日本',
'Jordan': '約旦',
'Mexico': '墨西哥',
'Netherlands': '荷蘭',
'New Zealand': '紐西蘭',
'Norway': '挪威',
'Panama': '巴拿馬',
'Portugal': '葡萄牙',
'Qatar': '卡達',
'Saudi Arabia': '沙烏地阿拉伯',
'Senegal': '塞內加爾',
'South Africa': '南非',
'South Korea': '南韓',
'Spain': '西班牙',
'Sweden': '瑞典',
'Tunisia': '突尼西亞',
'Türkiye': '土耳其',
'Turkey': '土耳其',
'United States': '美國',
'USA': '美國',
'Uruguay': '烏拉圭',
'Uzbekistan': '烏茲別克',
}
COUNTRIES = {
'United States': '美國',
'USA': '美國',
'Canada': '加拿大',
'Mexico': '墨西哥',
}
CITIES = {
'New York': '紐約',
'New Jersey': '紐澤西',
'Los Angeles': '洛杉磯',
'Dallas': '達拉斯',
'Houston': '休士頓',
'Kansas City': '堪薩斯城',
'Miami': '邁阿密',
'Philadelphia': '費城',
'Seattle': '西雅圖',
'Atlanta': '亞特蘭大',
'Boston': '波士頓',
'San Francisco': '舊金山',
'Toronto': '多倫多',
'Vancouver': '溫哥華',
'Mexico City': '墨西哥城',
'Guadalajara': '瓜達拉哈拉',
'Monterrey': '蒙特雷',
}
VENUES = {
'MetLife Stadium': '大都會人壽體育場',
'SoFi Stadium': 'SoFi 體育場',
'AT&T Stadium': 'AT&T 體育場',
'NRG Stadium': 'NRG 體育場',
'Hard Rock Stadium': '硬石體育場',
'Lumen Field': '流明球場',
'Mercedes-Benz Stadium': '梅賽德斯-賓士體育場',
'Lincoln Financial Field': '林肯金融球場',
'Gillette Stadium': '吉列體育場',
"Levi's Stadium": "李維斯體育場",
'BMO Field': 'BMO 球場',
'BC Place': '卑詩體育館',
'Estadio Azteca': '阿茲特克體育場',
}
MARKETS = {
'1x2': '勝平負',
'h2h': '勝平負',
'h2h_3_way': '勝平負',
'asian_handicap': '亞洲讓球',
'spreads': '讓球盤',
'alternate_spreads': '讓球變體盤',
'ou': '大小球',
'totals': '大小球',
'alternate_totals': '大小球變體盤',
'btts': '雙方進球',
'draw_no_bet': '平手退回',
'team_total': '球隊大小球',
'team_totals': '球隊大小球',
'correct_score': '正確比分',
}
SELECTIONS = {
'home': '主隊',
'away': '客隊',
'draw': '平手',
'tie': '平手',
'over': '大分',
'under': '小分',
'yes': '',
'no': '',
}
STATUSES = {
'pre-match': '未開賽',
'scheduled': '未開賽',
'upcoming': '未開賽',
'in-play': '進行中',
'live': '進行中',
'finished': '已完賽',
'final': '已完賽',
'postponed': '延期',
'cancelled': '取消',
'canceled': '取消',
}
def _clean(value: Any) -> str:
return str(value or '').strip()
# External feeds and bookmakers use several spellings for the same national team.
# Keep this list intentionally broad so the public API never leaks mixed English names.
TEAM_NAMES.update(
{
"South Korea": "南韓",
"Korea Republic": "南韓",
"Republic of Korea": "南韓",
"ROK": "南韓",
"南韓": "南韓",
"韓國": "南韓",
"Czechia": "捷克",
"Czech Republic": "捷克",
"捷克": "捷克",
"Bosnia and Herzegovina": "波士尼亞與赫塞哥維納",
"Bosnia & Herzegovina": "波士尼亞與赫塞哥維納",
"Bosnia-Herzegovina": "波士尼亞與赫塞哥維納",
"Bosnia": "波士尼亞與赫塞哥維納",
"Paraguay": "巴拉圭",
"Algeria": "阿爾及利亞",
"Morocco": "摩洛哥",
"Switzerland": "瑞士",
"Scotland": "蘇格蘭",
"Wales": "威爾斯",
"Poland": "波蘭",
"Denmark": "丹麥",
"Serbia": "塞爾維亞",
"Chile": "智利",
"Peru": "秘魯",
"Venezuela": "委內瑞拉",
"Bolivia": "玻利維亞",
"Costa Rica": "哥斯大黎加",
"Honduras": "宏都拉斯",
"Jamaica": "牙買加",
"Panama": "巴拿馬",
"El Salvador": "薩爾瓦多",
"Guatemala": "瓜地馬拉",
"Haiti": "海地",
"Trinidad and Tobago": "千里達及托巴哥",
"Trinidad & Tobago": "千里達及托巴哥",
"Curacao": "庫拉索",
"Curaçao": "庫拉索",
"Dominican Republic": "多明尼加共和國",
"New Zealand": "紐西蘭",
"Australia": "澳洲",
"Saudi Arabia": "沙烏地阿拉伯",
"Qatar": "卡達",
"United Arab Emirates": "阿拉伯聯合大公國",
"UAE": "阿拉伯聯合大公國",
"Iraq": "伊拉克",
"Jordan": "約旦",
"Oman": "阿曼",
"Uzbekistan": "烏茲別克",
"Iran": "伊朗",
"Japan": "日本",
"China PR": "中國",
"China": "中國",
"North Korea": "北韓",
"Cameroon": "喀麥隆",
"Ghana": "迦納",
"Ivory Coast": "象牙海岸",
"Cote d'Ivoire": "象牙海岸",
"Côte dIvoire": "象牙海岸",
"Senegal": "塞內加爾",
"Nigeria": "奈及利亞",
"Tunisia": "突尼西亞",
"Egypt": "埃及",
"Mali": "馬利",
"DR Congo": "剛果民主共和國",
"Congo DR": "剛果民主共和國",
"South Africa": "南非",
}
)
TEAM_NAME_ALIASES = {
"usmnt": "United States",
"usa": "United States",
"u s a": "United States",
"united states of america": "United States",
"mex": "Mexico",
"can": "Canada",
"kor": "South Korea",
"korea republic": "South Korea",
"republic of korea": "South Korea",
"czechia": "Czechia",
"czech republic": "Czechia",
"bosnia and herzegovina": "Bosnia and Herzegovina",
"bosnia herzegovina": "Bosnia and Herzegovina",
"bosnia": "Bosnia and Herzegovina",
"paraguay": "Paraguay",
}
def _normalize_lookup_key(value: Any) -> str:
text = unicodedata.normalize("NFKD", _clean(value))
text = "".join(ch for ch in text if not unicodedata.combining(ch))
text = text.replace("&", " and ")
text = re.sub(r"[^0-9A-Za-z\u4e00-\u9fff]+", " ", text)
return re.sub(r"\s+", " ", text).strip().lower()
def _lookup(value: Any, mapping: dict[str, str]) -> str:
text = _clean(value)
if not text:
return '待確認'
direct = mapping.get(text) or mapping.get(text.lower())
if direct:
return direct
normalized = _normalize_lookup_key(text)
if mapping is TEAM_NAMES:
alias = TEAM_NAME_ALIASES.get(normalized)
if alias:
alias_value = mapping.get(alias) or mapping.get(alias.lower())
if alias_value:
return alias_value
for key, translated in mapping.items():
if _normalize_lookup_key(key) == normalized:
return translated
return text
def localize_team_name(value: Any) -> str:
return _lookup(value, TEAM_NAMES)
def localize_country(value: Any) -> str:
return _lookup(value, COUNTRIES)
def localize_city(value: Any) -> str:
return _lookup(value, CITIES)
def localize_venue_name(value: Any) -> str:
return _lookup(value, VENUES)
def localize_market_type(value: Any) -> str:
return _lookup(value, MARKETS)
def localize_selection(value: Any) -> str:
text = _clean(value)
if not text:
return '待確認'
lowered = text.lower()
return SELECTIONS.get(lowered, TEAM_NAMES.get(text, text))
def localize_status(value: Any) -> str:
text = _clean(value)
if not text:
return '待確認'
return STATUSES.get(text.lower().replace('_', '-'), text)