Files
2026FIFAWorldCup/platform/backend/app/analytics/crawler.py

135 lines
5.2 KiB
Python

import asyncio
import httpx
import os
import logging
from datetime import datetime, timezone
from sqlalchemy.future import select
from sqlalchemy.exc import SQLAlchemyError
from app.db.base import SessionFactory
from app.db.models import Match, MatchStatus, OddsHistory, Team, Venue, Bookmaker
logger = logging.getLogger("fifa2026-crawler")
logging.basicConfig(level=logging.INFO)
THE_ODDS_API_KEY = os.environ.get("THE_ODDS_API_KEY", "")
THE_ODDS_SPORT_KEY = os.environ.get("THE_ODDS_SPORT_KEY", "soccer_fifa_world_cup")
THE_ODDS_BASE = "https://api.the-odds-api.com/v4"
async def fetch_odds():
if not THE_ODDS_API_KEY or THE_ODDS_API_KEY == "your_the_odds_api_key":
logger.warning("No valid THE_ODDS_API_KEY found. Crawler will not fetch real data.")
return
url = f"{THE_ODDS_BASE}/sports/{THE_ODDS_SPORT_KEY}/odds"
params = {
"apiKey": THE_ODDS_API_KEY,
"regions": "eu",
"markets": "h2h,spreads,totals",
"oddsFormat": "decimal"
}
async with httpx.AsyncClient() as client:
logger.info(f"Fetching odds from {url}")
try:
response = await client.get(url, params=params, timeout=15.0)
response.raise_for_status()
data = response.json()
await process_odds_data(data)
except Exception as e:
logger.error(f"Failed to fetch odds: {e}")
async def process_odds_data(data: list[dict]):
if not data:
return
async with SessionFactory() as session:
try:
# Upsert logic for each event
for event in data:
home_team_name = event.get("home_team")
away_team_name = event.get("away_team")
match_id = event.get("id")
commence_time = event.get("commence_time")
# Fetch or create Teams
home_team = await get_or_create_team(session, home_team_name)
away_team = await get_or_create_team(session, away_team_name)
# Default Venue
venue = await get_or_create_venue(session, "Unknown Stadium", "Unknown", "Unknown")
# Upsert Match
dt = datetime.fromisoformat(commence_time.replace("Z", "+00:00"))
match = await session.get(Match, match_id)
if not match:
match = Match(
id=match_id,
home_team_id=home_team.id,
away_team_id=away_team.id,
venue_id=venue.id,
match_time_utc=dt,
status=MatchStatus.PRE_MATCH
)
session.add(match)
# Upsert Odds History
bookmakers = event.get("bookmakers", [])
for bm in bookmakers:
bm_key = bm.get("key")
bm_title = bm.get("title")
bookmaker = await get_or_create_bookmaker(session, bm_key, bm_title)
for market in bm.get("markets", []):
market_key = market.get("key")
for outcome in market.get("outcomes", []):
selection = outcome.get("name")
price = outcome.get("price")
odds_entry = OddsHistory(
match_id=match.id,
bookmaker_id=bookmaker.id,
market_type=market_key,
selection=selection,
decimal_odds=price,
implied_probability=1.0/price if price > 1 else 0,
recorded_at=datetime.now(timezone.utc)
)
session.add(odds_entry)
await session.commit()
logger.info("Successfully updated odds in the database.")
except SQLAlchemyError as e:
await session.rollback()
logger.error(f"Database error while saving odds: {e}")
async def get_or_create_team(session, name: str) -> Team:
result = await session.execute(select(Team).where(Team.name == name))
team = result.scalars().first()
if not team:
import uuid
team = Team(id=str(uuid.uuid4()), name=name)
session.add(team)
await session.flush()
return team
async def get_or_create_venue(session, name: str, city: str, country: str) -> Venue:
result = await session.execute(select(Venue).where(Venue.name == name))
venue = result.scalars().first()
if not venue:
import uuid
venue = Venue(id=str(uuid.uuid4()), name=name, city=city, country=country, timezone="UTC")
session.add(venue)
await session.flush()
return venue
async def get_or_create_bookmaker(session, id: str, name: str) -> Bookmaker:
bookmaker = await session.get(Bookmaker, id)
if not bookmaker:
bookmaker = Bookmaker(id=id, name=name)
session.add(bookmaker)
await session.flush()
return bookmaker
if __name__ == "__main__":
asyncio.run(fetch_odds())