Files
2026FIFAWorldCup/platform/backend/app/analytics/vig_remover.py

136 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""莊家抽水Vig去除工具。"""
from __future__ import annotations
from typing import Callable, List, Sequence
import numpy as np
from scipy.optimize import minimize_scalar
def calculate_overround(odds: Sequence[float]) -> float:
"""計算莊家總水位Overround
Overround = Σ(1 / odds_i)。
若結果 > 1 表示含有抽水。
"""
if not odds:
raise ValueError('odds 不可為空')
_odds = np.asarray(odds, dtype=float)
if np.any(_odds <= 1):
raise ValueError('賠率必須全部大於 1')
return float(np.sum(1.0 / _odds))
def remove_margin_basic(odds: Sequence[float]) -> List[float]:
"""等比例剝除抽水。
先轉換為 implied probability再除以 overround 讓機率總和為 1。
"""
implied = np.array([1.0 / x for x in odds], dtype=float)
overround = implied.sum()
if overround <= 0:
raise ValueError('無效 odds無法計算去水')
true_probs = implied / overround
return [float(x) for x in true_probs]
def _shin_objective(z: float, observed: np.ndarray) -> float:
"""Shin 模型中,透過 z 估計真實機率,使每個結果有一致修正。
模型假設:
q_i(z) = max((p_i - z/(k-1)) / (1 - k/(k-1)*z), 1e-12)
其中 q_i 為觀察值 implied probabilityp_i 為解構後真實機率。
透過約束 Σp_i=1 搜尋最小平方誤差。
"""
k = observed.size
if not 0.0 <= z < 1:
return 1e9
denom = 1.0 - k / max(k - 1, 1) * z
if denom <= 0:
return 1e9
raw = (observed - z / max(k - 1, 1)) / denom
raw = np.clip(raw, 1e-12, None)
normalized = raw / raw.sum()
return float(np.sum((normalized - observed / observed.sum()) ** 2))
def remove_margin_shin(odds: Sequence[float]) -> List[float]:
"""Shin 方法去水。
流程:
1) 觀察賠率轉 implied probability。
2) 用單參數 z 做最小化,推回一組更接近無套利的真實機率。
3) 回傳機率正規化結果。
"""
odds_array = np.asarray(odds, dtype=float)
if odds_array.size == 0:
raise ValueError('odds 不可為空')
if np.any(odds_array <= 1):
raise ValueError('賠率必須全部大於 1')
implied = 1.0 / odds_array
if implied.size == 2:
# 二元市場可直接利用近似閉式解,穩定性較佳
q1 = implied[0] / implied.sum()
q2 = implied[1] / implied.sum()
z = max(0.0, min(0.49, (q1 + q2 - 1.0) * 0.5))
else:
# 多項市場,使用數值搜尋
result = minimize_scalar(
_shin_objective,
args=(implied,),
bounds=(0.0, 0.49),
method='bounded',
)
z = float(result.x if result.success else 0.0)
k = implied.size
denom = 1.0 - k / max(k - 1, 1) * z
if denom <= 0:
return remove_margin_basic(odds)
raw = (implied - z / max(k - 1, 1)) / denom
raw = np.clip(raw, 1e-12, None)
true_prob = raw / raw.sum()
return [float(x) for x in true_prob]
def prob_to_decimal_odds(true_probs: Sequence[float]) -> List[float]:
"""真實機率轉換回無水賠率。
p 轉賠率公式odds = 1 / p。
"""
probs = np.asarray(true_probs, dtype=float)
if np.any(probs <= 0):
raise ValueError('機率需大於 0')
total = probs.sum()
if not np.isclose(total, 1.0, atol=1e-6):
probs = probs / total
return [round(float(1.0 / p), 4) for p in probs]
def compare_bookmaker_true_prob(
implied_odds: Sequence[float],
transform: Callable[[Sequence[float]], Sequence[float]] = remove_margin_shin,
) -> dict[str, list[float]]:
"""比對原始賠率與去水後真實賠率,可直接提供前端展示。"""
true_probs = transform(implied_odds)
return {
'implied_prob': [float(1.0 / x) for x in implied_odds],
'true_implied_prob': true_probs,
'true_decimal_odds': prob_to_decimal_odds(true_probs),
}