diff --git a/golgg_fetch_all_game_ids.py b/golgg_fetch_all_game_ids.py new file mode 100644 index 0000000..2fd91b7 --- /dev/null +++ b/golgg_fetch_all_game_ids.py @@ -0,0 +1,45 @@ +from playwright.async_api import async_playwright, Browser, Page, BrowserContext +from src.utils.scrapers.golgg import GOLGG_TOURNAMENT_API, GOLGG_URL, GolggScraper +from tqdm.asyncio import tqdm_asyncio +import json + + +async def main(): + async with GolggScraper() as scraper: + tournaments = await scraper.get_tournaments_in_season(10) + + # Step 1: Extract matches from tournaments + all_games = set() + print("Extracting matches from tournaments...") + match_tasks = [ + scraper.get_matches_in_tournament(tournament["trname"]) + for tournament in tournaments + ] + matches_by_tournament = await tqdm_asyncio.gather( + *match_tasks, desc="Tournaments", leave=True + ) + + # Flatten the matches list + all_matches = {match for matches in matches_by_tournament for match in matches} + + # Step 2: Extract games from matches + print("Extracting games from matches...") + game_tasks = [scraper.get_games_ids_in_match(match) for match in all_matches] + games_by_match = await tqdm_asyncio.gather( + *game_tasks, desc="Matches", leave=True + ) + + # Combine all game IDs + for games in games_by_match: + all_games.update(games) + + games = list(all_games) + print(f"Found {len(games)} games.") + with open("games.json", "w") as f: + json.dump(games, f, indent=4) + + +if __name__ == "__main__": + import asyncio + + asyncio.run(main()) diff --git a/requirements.txt b/requirements.txt index d1884e7..2d64c42 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,19 +1,106 @@ -attrs==24.2.0 -cattrs==24.1.2 -certifi==2024.8.30 -charset-normalizer==3.4.0 +absl-py==2.1.0 +annotated-types==0.7.0 +anyio==4.7.0 +asttokens==3.0.0 +beautifulsoup4==4.12.3 +certifi==2024.12.14 +charset-normalizer==3.4.1 +colorama==0.4.6 +comm==0.2.2 +contourpy==1.3.1 cssselect==1.2.0 +cycler==0.12.1 +dacite==1.8.1 +debugpy==1.8.12 +decorator==5.1.1 +distro==1.9.0 +dnspython==2.7.0 +executing==2.1.0 +filelock==3.17.0 +fonttools==4.55.3 +fsspec==2025.2.0 +greenlet==3.1.1 +grpcio==1.70.0 +h11==0.14.0 +h5py==3.13.0 +httpcore==1.0.7 +httpx==0.28.1 idna==3.10 +ipykernel==6.29.5 +ipython==8.31.0 +jedi==0.19.2 +Jinja2==3.1.5 +jiter==0.8.2 jmespath==1.0.1 +joblib==1.4.2 +jupyter_client==8.6.3 +jupyter_core==5.7.2 +kiwisolver==1.4.8 +llvmlite==0.44.0 lxml==5.3.0 +Markdown==3.7 +markdownify==0.14.1 +MarkupSafe==3.0.2 +matplotlib==3.10.0 +matplotlib-inline==0.1.7 +motor==3.7.0 +mpmath==1.3.0 +nest-asyncio==1.6.0 +networkx==3.4.2 +numba==0.61.0 +numpy==2.1.3 +openai==1.63.0 +openskill==6.0.2 packaging==24.2 +pandas==2.2.3 parsel==1.9.1 +parso==0.8.4 +pillow==11.0.0 platformdirs==4.3.6 -PoroPilot==0.2.10 +playwright==1.51.0 +prompt_toolkit==3.0.48 +protobuf==5.29.3 +psutil==6.1.1 +psycopg2-binary==2.9.10 +pure_eval==0.2.3 +pydantic==2.10.6 +pydantic_core==2.27.2 +pyee==12.1.1 +Pygments==2.19.1 +pymongo==4.11 +pynndescent==0.5.13 +pyparsing==3.2.0 +python-dateutil==2.9.0.post0 python-dotenv==1.0.1 +pytz==2024.2 +pywin32==308 +pyzmq==26.2.0 requests==2.32.3 -requests-cache==1.2.1 -six==1.16.0 -url-normalize==1.4.3 -urllib3==2.2.3 +scikit-learn==1.6.1 +scipy==1.15.1 +seaborn==0.13.2 +setuptools==75.8.0 +six==1.17.0 +sniffio==1.3.1 +soupsieve==2.6 +SQLAlchemy==2.0.40 +stack-data==0.6.3 +sympy==1.13.1 +tenacity==9.0.0 +tensorboard==2.19.0 +tensorboard-data-server==0.7.2 +threadpoolctl==3.5.0 +torch==2.6.0 +torchaudio==2.6.0 +torchvision==0.21.0 +tornado==6.4.2 +tqdm==4.67.1 +traitlets==5.14.3 +trueskill==0.4.5 +typing_extensions==4.12.2 +tzdata==2024.2 +umap-learn==0.5.7 +urllib3==2.3.0 w3lib==2.2.1 +wcwidth==0.2.13 +Werkzeug==3.1.3 diff --git a/src/scripts/esport/calc_elo.py b/src/scripts/esport/calc_elo.py new file mode 100644 index 0000000..80a80cd --- /dev/null +++ b/src/scripts/esport/calc_elo.py @@ -0,0 +1,445 @@ +import json +from collections import defaultdict +from pathlib import Path +import numpy as np +from sklearn.metrics import auc, roc_curve +from tqdm import tqdm +from trueskill import Rating, rate +from openskill.models import PlackettLuce +import matplotlib.pyplot as plt +import openskill +from scipy.stats import norm +import math +import csv +import trueskill +from utils.rankings._elo import expected_win_elo, update_elo, update_team_elo +from utils.rankings._os import update_openskill +from utils.rankings._ts import team_rating, update_trueskill, expected_trueskill_win +import statistics as stat + +model = PlackettLuce() +elo_ratings = defaultdict(lambda: 1500) +trueskill_ratings = defaultdict(lambda: Rating()) +openskill_ratings = defaultdict(lambda: model.rating()) +THRESHOLD = 0.5 +trueskill.DRAW_PROBABILITY = 0.0 + + +def plot_roc_auc(y_true, elo_probs, ts_probs, os_probs): + """ + Plots ROC curves for three models and prints their AUCs. + + y_true: list of 0/1 true outcomes (1 = team1 won) + elo_probs: list of predicted P(team1 wins) from ELO + ts_probs: list of predicted P(team1 wins) from TrueSkill + os_probs: list of predicted P(team1 wins) from OpenSkill + """ + plt.figure(figsize=(8, 6)) + for probs, name in ( + (elo_probs, "ELO"), + (ts_probs, "TrueSkill"), + (os_probs, "OpenSkill"), + ): + fpr, tpr, _ = roc_curve(y_true, probs) + model_auc = auc(fpr, tpr) + plt.plot(fpr, tpr, lw=2, label=f"{name} (AUC = {model_auc:.3f})") + + # random‐guess line + plt.plot([0, 1], [0, 1], linestyle="--", color="gray", label="Chance") + + plt.xlim(0, 1) + plt.ylim(0, 1.05) + plt.xlabel("False Positive Rate") + plt.ylabel("True Positive Rate") + plt.title("ROC Curves") + plt.legend(loc="lower right") + plt.grid(True) + plt.show() + + +def plot_final_ratings( + ratings_dict, title, name_map: dict[str, str], top_n=10, getter=lambda r: r +): + ratings = [(p, getter(r)) for p, r in ratings_dict.items()] + ratings.sort(key=lambda x: x[1], reverse=True) + top_players = ratings[:top_n] + + names = [name_map[p] for p, _ in top_players] + values = [v for _, v in top_players] + + plt.figure(figsize=(10, 6)) + plt.barh(names[::-1], values[::-1]) + plt.xlabel("Rating") + plt.title(title) + plt.grid(True) + plt.tight_layout() + plt.savefig(f"viz/{title}.png") + + +def get_id_to_name_mapping(games): + mapping = {} + for game in games: + for role, player in game["t1_players"].items(): + mapping[player["player_id"]] = player["player_name"] + for role, player in game["t2_players"].items(): + mapping[player["player_id"]] = player["player_name"] + return mapping + + +def get_name_to_id_mapping(games): + mapping = {} + for game in games: + for role, player in game["t1_players"].items(): + mapping[player["player_name"]] = player["player_id"] + for role, player in game["t2_players"].items(): + mapping[player["player_name"]] = player["player_id"] + return mapping + + +def save_ratings(name_map, player_game_count): + rating_path = Path("ratings.csv") + data = [] + for p in elo_ratings: + player_data = { + "player_id": p, + "player_name": name_map[p], + "games_played": player_game_count[p], + "elo_rating": elo_ratings[p], + "trueskill_mu": trueskill_ratings[p].mu, + "trueskill_sigma": trueskill_ratings[p].sigma, + "openskill_mu": openskill_ratings[p].mu, + "openskill_sigma": openskill_ratings[p].sigma, + } + + data.append(player_data) + + with open(rating_path, "w", newline="") as f: + csv_writer = csv.DictWriter(f, fieldnames=data[0].keys(), delimiter=";") + csv_writer.writeheader() + csv_writer.writerows(data) + + +def count_player_games(games): + """ + Count the number of games each player participated in. + Returns a defaultdict mapping player_id -> number of games. + """ + game_counter = defaultdict(int) + for game in tqdm(games): + # Count games for team1 players + for role, player in game["t1_players"].items(): + game_counter[player["player_id"]] += 1 + # Count games for team2 players + for role, player in game["t2_players"].items(): + game_counter[player["player_id"]] += 1 + return game_counter + + +def plot_rating_diff_histogram(win_diffs, title, filename): + plt.figure(figsize=(10, 6)) + plt.hist(win_diffs, bins=100, alpha=0.6) + plt.title(title) + plt.xlabel("Rating Difference (T1 - T2)") + plt.ylabel("Frequency") + plt.legend() + plt.grid(True) + plt.tight_layout() + plt.savefig(f"viz/{filename}.png") + + +def plot_percentage_accuracy( + perc_count: defaultdict, perc_sum: defaultdict, num_of_games: int, name: str = "elo" +): + """ + Plots a histogram of predicted confidence percentages and the accuracy for each percentage bucket. + + Parameters: + - ts_perc_count: dict mapping predicted percentage (float) to count of predictions + - ts_perc_sum: dict mapping predicted percentage (float) to count of correct predictions + - num_of_games: int total number of predictions + """ + # Sort percentage buckets + percents = sorted(perc_count.keys()) + counts = [perc_count[p] / num_of_games for p in percents] + accuracies = [perc_sum.get(p, 0) / perc_count[p] for p in percents] + + # Histogram of counts + plt.figure() + plt.bar(percents, counts) + plt.xlabel("Predicted Percentage") + plt.ylabel("% of Population") + plt.title(f"Histogram of Predicted Confidence Percentages ({name})") + + # Plot of accuracy per percentage + plt.figure() + plt.plot(percents, accuracies, marker="o") + plt.xlabel("Predicted Percentage") + plt.ylabel("Accuracy") + plt.title(f"Accuracy per Predicted Percentage {name}") + plt.ylim(0.45, 1) + plt.xlim(50, 100) + + # 5%-interval ticks + # plt.xticks(np.arange(50, 101, 5)) + plt.yticks(np.arange(0.5, 1.01, 0.05)) + + # grid on major ticks + plt.grid(which="major", linestyle="--", linewidth=0.5) + + # Print overall accuracy + total_correct = sum(perc_sum.values()) + overall_accuracy = total_correct / num_of_games if num_of_games > 0 else 0 + print(f"Overall accuracy: {overall_accuracy:.2%} ({total_correct}/{num_of_games})") + + +elo_diffs = [] +elo_pred = 0 +count_elo = 0 +elo_perc_count = defaultdict(int) +elo_perc_sum = defaultdict(int) + + +def elo_rating(t1, t2, t1_win): + global elo_pred, count_elo + r1, r2 = [elo_ratings[p] for p in t1], [elo_ratings[p] for p in t2] + t1_avg = sum(r1) / len(r1) + t2_avg = sum(r2) / len(r2) + t1_win_pred = expected_win_elo(t1_avg, t2_avg) + if t1_win_pred > THRESHOLD: + elo_pred += 1 if t1_win else 0 + count_elo += 1 + elif t1_win_pred < (1 - THRESHOLD): + elo_pred += 1 if not t1_win else 0 + count_elo += 1 + + elo_diff = t1_avg - t2_avg + elo_diffs.append(elo_diff) + + t1_up, t2_up = update_team_elo(r1, r2, t1_win) + for player_id, new_ranking in zip(t1 + t2, t1_up + t2_up): + elo_ratings[player_id] = new_ranking + + wp = t1_win_pred + tw = t1_win + if t1_win_pred < 0.5: + wp = 1 - t1_win_pred + tw = not t1_win + wp1 = round(wp * 100) + + elo_perc_count[wp1] += 1 + elo_perc_sum[wp1] += 1 if tw else 0 + + return t1_win_pred + + +ts_perc_count = defaultdict(int) +ts_perc_sum = defaultdict(int) +ts_diffs = [] +ts_pred = 0 +count_ts = 0 + + +def ts_rating(t1, t2, t1_win): + global ts_pred, count_ts + r1, r2 = [trueskill_ratings[p] for p in t1], [trueskill_ratings[p] for p in t2] + t1_win_pred = expected_trueskill_win(r1, r2) + + if t1_win_pred > THRESHOLD: + ts_pred += 1 if t1_win else 0 + count_ts += 1 + elif t1_win_pred < (1 - THRESHOLD): + ts_pred += 1 if not t1_win else 0 + count_ts += 1 + + ts_mu1, ts_var1 = team_rating(r1) + ts_mu2, ts_var2 = team_rating(r2) + ts_denom = math.sqrt(len(r1) * trueskill.BETA**2 + ts_var1 + ts_var2) + ts_diff = (ts_mu1 - ts_mu2) / ts_denom + + ts_diffs.append(ts_diff) + + t1_up, t2_up = update_trueskill(r1, r2, t1_win) + for player_id, new_ranking in zip(t1 + t2, t1_up + t2_up): + trueskill_ratings[player_id] = new_ranking + + wp = t1_win_pred + tw = t1_win + if t1_win_pred < 0.5: + wp = 1 - t1_win_pred + tw = not t1_win + wp1 = round(t1_win_pred * 100) + + ts_perc_count[wp1] += 1 + ts_perc_sum[wp1] += 1 if tw else 0 + + return t1_win_pred + + +os_diffs = [] +os_pred = 0 +os_perc_count = defaultdict(int) +os_perc_sum = defaultdict(int) +count_os = 0 + + +def os_rating(t1, t2, t1_win): + global os_pred, count_os + r1, r2 = [openskill_ratings[p] for p in t1], [openskill_ratings[p] for p in t2] + wp1, wp2 = model.predict_win([r1, r2]) + + if wp1 > THRESHOLD: + os_pred += 1 if t1_win else 0 + count_os += 1 + elif wp2 > THRESHOLD: + os_pred += 1 if not t1_win else 0 + count_os += 1 + + os_mu1, os_var1 = team_rating(r1) + os_mu2, os_var2 = team_rating(r2) + os_denom = math.sqrt(len(r1) * model.beta**2 + os_var1 + os_var2) + os_diff = (os_mu1 - os_mu2) / os_denom + os_diffs.append(os_diff) + t1_up, t2_up = update_openskill(r1, r2, t1_win, model=model) + for player_id, new_ranking in zip(t1 + t2, t1_up + t2_up): + openskill_ratings[player_id] = new_ranking + + wp = wp1 + tw = t1_win + if wp1 < wp2: + wp = wp2 + tw = not t1_win + + max_wp = round(wp * 100) + os_perc_count[max_wp] += 1 + os_perc_sum[max_wp] += 1 if tw else 0 + + return wp1 + + +def main(): + global elo_ratings, trueskill_ratings, openskill_ratings + global elo_diffs, ts_diffs, os_diffs + global elo_pred, ts_pred, os_pred + games_path = Path("games.json") + if not games_path.exists(): + print("Games file not found, please download it first.") + return + + with open(games_path, "r") as f: + games = json.load(f) + + games = sorted(games, key=lambda g: g["date"]) + name_map = get_id_to_name_mapping(games) + player_game_count = count_player_games(games) + y_true = [] + elo_probs = [] + ts_probs = [] + os_probs = [] + for game in tqdm(games, desc="Processing games"): + t1 = [game["t1_players"][role]["player_id"] for role in game["t1_players"]] + t2 = [game["t2_players"][role]["player_id"] for role in game["t2_players"]] + t1_win = game["t1_win"] + t1_name = game["t1_name"] + t2_name = game["t2_name"] + + p_elo = elo_rating(t1, t2, t1_win) + p_ts = ts_rating(t1, t2, t1_win) + p_os = os_rating(t1, t2, t1_win) + y_true.append(1 if t1_win else 0) + + elo_probs.append(p_elo) + ts_probs.append(p_ts) + os_probs.append(p_os) + # if abs(elo_diff) > 300 or abs(ts_diff) > 3 or abs(os_diff) > 3: + # print( + # f"Game: {t1_name} vs {t2_name} (Won: {t1_name if t1_win else t2_name}) | Elo diff: {elo_diff:.2f} | TrueSkill diff: {ts_diff:.2f} | OpenSkill diff: {os_diff:.2f}" + # ) + print("len(y_true) =", len(y_true)) + print("len(elo_probs)=", len(elo_probs)) + print("len(ts_probs) =", len(ts_probs)) + print("len(os_probs) =", len(os_probs)) + print(f"Elo prediction accuracy: {elo_pred / count_elo:.2%}") + print(f"TrueSkill prediction accuracy: {ts_pred / count_ts:.2%}") + print(f"OpenSkill prediction accuracy: {os_pred / count_os:.2%}") + print(f"Total Elo games: {count_elo} ({count_elo / len(games) * 100:.2f}%)") + print(f"Total TrueSkill games: {count_ts} ({count_ts / len(games) * 100:.2f}%)") + print(f"Total OpenSkill games: {count_os} ({count_os / len(games) * 100:.2f}%)") + + name_to_id = get_name_to_id_mapping(games) + team_1 = "T1 Academy" + team_2 = "Nongshim Esports Academy" + t1 = ["Haetae", "Vincenzo", "Poby", "Cypher", "Cloud"] + t2 = ["Kangin", "Sylvie", "Calix", "Vital", "Crack"] + t1 = [name_to_id[name] for name in t1] + t2 = [name_to_id[name] for name in t2] + r1 = [elo_ratings[p] for p in t1] + r2 = [elo_ratings[p] for p in t2] + avg_elo1 = sum(r1) / len(r1) + avg_elo2 = sum(r2) / len(r2) + ep = expected_win_elo(avg_elo1, avg_elo2) + print(f"Elo prediction for {team_1} vs {team_2}: {ep:.2f}") + r1, r2 = [trueskill_ratings[p] for p in t1], [trueskill_ratings[p] for p in t2] + tp = expected_trueskill_win(r1, r2) + print(f"TrueSkill prediction for {team_1} vs {team_2}: {tp:.2f}") + r1, r2 = [openskill_ratings[p] for p in t1], [openskill_ratings[p] for p in t2] + op, _ = model.predict_win([r1, r2]) + print(f"OpenSkill prediction for {team_1} vs {team_2}: {op:.2f}") + + # Elo final rating chart + # plot_final_ratings(elo_ratings, "Top 10 Final Elo Ratings", name_map) + + # # TrueSkill mu chart + # plot_final_ratings( + # trueskill_ratings, + # "Top 10 Final TrueSkill Ratings", + # name_map, + # getter=lambda r: r.mu - 3 * r.sigma, + # ) + + # # OpenSkill mu chart + # plot_final_ratings( + # openskill_ratings, + # "Top 10 Final OpenSkill Ratings", + # name_map, + # getter=lambda r: r.mu - 3 * r.sigma, + # ) + plot_roc_auc(y_true, elo_probs, ts_probs, os_probs) + # plot_percentage_accuracy(elo_perc_count, elo_perc_sum, len(games), "elo") + # plot_percentage_accuracy(ts_perc_count, ts_perc_sum, len(games), "ts") + # plot_percentage_accuracy(os_perc_count, os_perc_sum, len(games), "os") + plt.show() + + # mean = stat.mean(elo_diffs) + # std = stat.stdev(elo_diffs) + # # estymacja sigma pełnego rozkładu + # sigma_hat = mean * math.sqrt(math.pi / 2) + + # # teoretyczne parametry half-normal + # mean_theoretical = sigma_hat * math.sqrt(2 / math.pi) + # std_theoretical = sigma_hat * math.sqrt(1 - 2 / math.pi) + + # print("Estimated full-normal σ:", sigma_hat) + # print("Half-normal theoretical mean:", mean_theoretical) + # print("Half-normal theoretical std:", std_theoretical) + # plot_rating_diff_histogram( + # elo_diffs, + # "Elo Rating Difference Histogram", + # "elo_rating_diff", + # ) + # plot_rating_diff_histogram( + # ts_diffs, + # "TrueSkill Mu Difference Histogram", + # "trueskill_diff", + # ) + # plot_rating_diff_histogram( + # os_diffs, + # "OpenSkill Mu Difference Histogram", + # "openskill_diff", + # ) + # plt.show() + + save_ratings(name_map, player_game_count) + + +if __name__ == "__main__": + main() diff --git a/src/scripts/esport/scrap_golgg.py b/src/scripts/esport/scrap_golgg.py new file mode 100644 index 0000000..345bf2a --- /dev/null +++ b/src/scripts/esport/scrap_golgg.py @@ -0,0 +1,127 @@ +from utils.scrapers.golgg import GolggScraper +import json +import asyncio +import statistics +from pathlib import Path +from tqdm.asyncio import tqdm +from tqdm import tqdm as stqdm + +CONCURRENCY_LIMIT = 15 +MAX_RETRIES = 5 + + +async def get_tournament_matches( + scraper: GolggScraper, tournaments: list[str] +) -> list[dict]: + all_matches = [] + for tournament in tqdm(tournaments): + tournament_name = tournament["trname"] + matches = await scraper.get_matches_in_tournament(tournament_name) + all_matches.extend(matches) + return all_matches + + +async def fetch_match_games( + scrapper: GolggScraper, match: dict, semaphore: asyncio.Semaphore +) -> list[dict]: + match_id = match.get("match_id") + if not match_id: + print("Match ID not found, skipping...") + return [] + + for attempt in range(1, MAX_RETRIES + 1): + async with semaphore: + try: + return await scrapper.get_games_in_match(match_id) + except Exception as e: + print(f"[!][Match {match_id}] attempt {attempt} failed:", e) + await asyncio.sleep(attempt) # simple back-off before retrying + + print(f"[!][Match {match_id}] giving up after {MAX_RETRIES} attempts") + return [] + + +async def get_games(matches, games_path: Path) -> list[dict]: + semaphore = asyncio.Semaphore(CONCURRENCY_LIMIT) + all_games = [] + + async with GolggScraper(max_pages=40) as scrapper: + tasks = [fetch_match_games(scrapper, match, semaphore) for match in matches] + + for coro in tqdm(asyncio.as_completed(tasks), total=len(tasks)): + result = await coro + all_games.extend(result) + + with open(games_path, "w") as f: + json.dump(all_games, f, indent=4) + + +async def main(): + tournaments_path = Path("data/tournaments.json") + matches_path = Path("data/matches.json") + games_path = Path("data/games.json") + # if tournaments_path.exists(): + # print("Tournaments already downloaded, skipping...") + # return + tournaments = [] + matches = [] + if tournaments_path.exists(): + with open(tournaments_path, "r") as f: + tournaments = json.load(f) + + if matches_path.exists(): + with open(matches_path, "r") as f: + matches = json.load(f) + + if not tournaments and not matches: + async with GolggScraper(max_pages=3) as scrapper: + tournaments = await scrapper.get_all_tournaments() + + with open(tournaments_path, "w") as f: + json.dump(tournaments, f, indent=4) + + mean = statistics.mean([int(t["nbgames"]) for t in tournaments]) + median = statistics.median([int(t["nbgames"]) for t in tournaments]) + accumulated = sum([int(t["nbgames"]) for t in tournaments]) + minimum = min([int(t["nbgames"]) for t in tournaments]) + maximum = max([int(t["nbgames"]) for t in tournaments]) + is_nan = any([int(t["nbgames"]) == float("nan") for t in tournaments]) + + print("Tournaments saved to tournaments.json") + print("Total tournaments:", len(tournaments)) + print("Mean number of games:", mean) + print("Median number of games:", median) + print("Total number of games:", accumulated) + print("Minimum number of games:", minimum) + print("Maximum number of games:", maximum) + print("Contains NaN:", is_nan) + + matches = await get_tournament_matches(scrapper, tournaments=tournaments) + with open(matches_path, "w") as f: + json.dump(matches, f, indent=4) + + print("Matches saved to matches.json") + print("Total matches:", len(matches)) + + games = await get_games(matches, games_path) + + updated_games = [] + for match in stqdm(matches): + new_mgames = [] + for game in games: + if game["match_id"] != match["match_id"]: + continue + + game["date"] = match["date"] + + game["tournament"] = match["tournament_name"] + new_mgames.append(game) + + updated_games.extend(new_mgames) + + with open(games_path, "w") as f: + json.dump(updated_games, f, indent=4) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/scripts/esport/viz_elo.py b/src/scripts/esport/viz_elo.py new file mode 100644 index 0000000..d988fd4 --- /dev/null +++ b/src/scripts/esport/viz_elo.py @@ -0,0 +1,244 @@ +import json +from collections import defaultdict +import matplotlib.pyplot as plt +import pandas as pd +from tqdm import tqdm +import itertools +from datetime import date +import statistics + +# Load your games list (assuming JSON format) +with open("games.json", "r") as f: + games = json.load(f) + +# Sort games by date (assuming date format "YYYY-MM-DD") +games.sort(key=lambda g: g["date"]) + +# Dictionary mapping player_id to a list of (date, rating) +player_rating_history = defaultdict(lambda: defaultdict(list)) +# Dictionary for current ratings; starting rating set to 1500 +current_ratings = defaultdict(lambda: 1500) + +expected = lambda p1, p2: 1 / (1 + 10 ** ((p2 - p1) / 400)) + + +# Instead of one current_ratings, keep two: +current_ratings_new = defaultdict(lambda: 1500) +current_ratings_old = defaultdict(lambda: 1500) + +# Lists to collect pre‐match avg rating differences +diffs_new = [] +diffs_old = [] + + +def update_elo_new(team1_ids, team2_ids, t1_win, k=32): + t1_avg = sum(current_ratings_new[p] for p in team1_ids) / len(team1_ids) + t2_avg = sum(current_ratings_new[p] for p in team2_ids) / len(team2_ids) + outcome = 1 if t1_win else 0 + for p in team1_ids: + current_ratings_new[p] += k * ( + outcome - expected(current_ratings_new[p], t2_avg) + ) + for p in team2_ids: + current_ratings_new[p] += k * ( + (1 - outcome) - expected(current_ratings_new[p], t1_avg) + ) + + +def update_elo_old(team1_ids, team2_ids, t1_win, k=32): + t1_avg = sum(current_ratings_old[p] for p in team1_ids) / len(team1_ids) + t2_avg = sum(current_ratings_old[p] for p in team2_ids) / len(team2_ids) + expected_t1 = 1 / (1 + 10 ** ((t2_avg - t1_avg) / 400)) + outcome = 1 if t1_win else 0 + for p in team1_ids: + current_ratings_old[p] += k * (outcome - expected_t1) + for p in team2_ids: + current_ratings_old[p] += k * ((1 - outcome) - (1 - expected_t1)) + + +# Process each game +for game in tqdm(games): + # parse teams & result + team1 = [game["t1_players"][r]["player_id"] for r in game["t1_players"]] + team2 = [game["t2_players"][r]["player_id"] for r in game["t2_players"]] + t1_win = game["t1_win"] + + # compute and record the pre‐match diff + t1_new_avg = sum(current_ratings_new[p] for p in team1) / len(team1) + t2_new_avg = sum(current_ratings_new[p] for p in team2) / len(team2) + diffs_new.append(t1_new_avg - t2_new_avg) + + t1_old_avg = sum(current_ratings_old[p] for p in team1) / len(team1) + t2_old_avg = sum(current_ratings_old[p] for p in team2) / len(team2) + diffs_old.append(t1_old_avg - t2_old_avg) + + # update both systems + update_elo_new(team1, team2, t1_win) + update_elo_old(team1, team2, t1_win) + game_date = pd.to_datetime(game["date"]) + for player_id in team1 + team2: + player_rating_history[game_date.year][player_id].append( + current_ratings_new[player_id] + ) + + +# Helper function to create a mapping from player_id to player_name +def get_player_names(games): + mapping = {} + for game in games: + for role, player in game["t1_players"].items(): + mapping[player["player_id"]] = player["player_name"] + for role, player in game["t2_players"].items(): + mapping[player["player_id"]] = player["player_name"] + return mapping + + +player_names = get_player_names(games) + + +def get_top_10_each_year(player_rating_history): + """ + Return a dictionary mapping each year to the top 10 players (player_id and rating) + based on their final rating update within that year. + """ + top_10_by_year_max = {} + top_10_by_year_avg = {} + all_years = player_rating_history.keys() + min_year = min(all_years) + max_year = max(all_years) + + for year in tqdm(range(min_year, max_year + 1)): + players = player_rating_history[year] + players_avg = {p: statistics.mean(ratings) for p, ratings in players.items()} + players_max = {p: max(ratings) for p, ratings in players.items()} + + sorted_players_avg = sorted( + players_avg.items(), key=lambda x: x[1], reverse=True + ) + sorted_players_max = sorted( + players_max.items(), key=lambda x: x[1], reverse=True + ) + top_10_by_year_max[year] = sorted_players_max[:10] + top_10_by_year_avg[year] = sorted_players_avg[:10] + return top_10_by_year_max, top_10_by_year_avg + + +top_10_each_year_max, top_10_each_year_avg = get_top_10_each_year(player_rating_history) + + +for year, players in top_10_each_year_max.items(): + print(f"Year {year} Top 10 Players (Max Rating):") + for player_id, rating in players: + player_name = player_names.get(player_id, "Unknown") + print(f" Player ID: {player_id}, Name: {player_name} Rating: {rating:.2f}") + + +for year, players in top_10_each_year_avg.items(): + print(f"Year {year} Top 10 Players (Avg Rating):") + for player_id, rating in players: + player_name = player_names.get(player_id, "Unknown") + print(f" Player ID: {player_id}, Name: {player_name} Rating: {rating:.2f}") + + +# ----------------------------- +# Plotting the Rating Trajectories +# ----------------------------- + +# Define your highlighted set (as strings of player IDs) +highlighted = { + "1250", # Showmaker + # "1618", # Czekolad + "48", # Faker + "392", # Peanut + "1501", # Inspierd + "1075", # TheShy + "171", # Uzi + "470", # Doinb + # "5947", # Baus + "392", + "1629", # Chovy + "3247", # Gumayusi +} + +plt.figure(figsize=(12, 8)) + +# First, plot non-highlighted players in gray (with lower zorder) +for player_id, history in player_rating_history.items(): + if player_id in highlighted: + continue # Skip highlighted players for now + df = pd.DataFrame(history, columns=["date", "rating"]) + df["date"] = pd.to_datetime(df["date"]) + df.sort_values("date", inplace=True) + plt.plot(df["date"], df["rating"], color="gray", linewidth=1, alpha=0.3, zorder=1) + + +# Helper to check if a hex color is gray (R == G == B) +def is_gray(color): + if color.startswith("#") and len(color) == 7: + r, g, b = color[1:3], color[3:5], color[5:7] + return r.lower() == g.lower() == b.lower() + return False + + +# Get default color cycle and filter out gray colors +default_colors = plt.rcParams["axes.prop_cycle"].by_key()["color"] +filtered_colors = [c for c in default_colors if not is_gray(c)] +color_cycle = itertools.cycle(filtered_colors) + +# Plot highlighted players with distinct non-gray colors and a higher zorder. +for player_id in highlighted: + if player_id not in player_rating_history: + print(f"Player ID {player_id} not found in player_rating_history.") + # Skip if player_id is not in player_rating_history + continue + history = player_rating_history[player_id] + df = pd.DataFrame(history, columns=["date", "rating"]) + df["date"] = pd.to_datetime(df["date"]) + df.sort_values("date", inplace=True) + color = next(color_cycle) + print(f"Plotting {player_id} with color {color} and rating: {df["rating"]}") + plt.plot( + df["date"], + df["rating"], + label=player_names.get(player_id, player_id), + linewidth=1, + zorder=3, + color=color, + ) + +plt.title("Player Elo Rating Trajectories Over Time") +plt.xlabel("Date") +plt.ylabel("Elo Rating") +plt.legend() +plt.tight_layout() + +diff_abs = [abs(diff) for diff in diffs_new] + +abs_mean = statistics.mean(diff_abs) +abs_std = statistics.stdev(diff_abs) +print(f"Abs Mean: {abs_mean:.2f}, Std: {abs_std:.2f}") +new_mean = statistics.mean(diffs_new) +old_mean = statistics.mean(diffs_old) +new_std = statistics.stdev(diffs_new) +old_std = statistics.stdev(diffs_old) +print(f"New Elo Mean: {new_mean:.2f}, Std: {new_std:.2f}") +print(f"Old Elo Mean: {old_mean:.2f}, Std: {old_std:.2f}") + +plt.figure(figsize=(10, 6)) +plt.hist( + diffs_new, bins=30, alpha=0.5, label="New‐Elo Δ", edgecolor="black", density=True +) +plt.hist( + diffs_old, bins=30, alpha=0.5, label="Old‐Elo Δ", edgecolor="black", density=True +) +# plt.hist(diff_abs, bins=30, alpha=0.5, label="Abs Δ", edgecolor="black", density=True) +plt.title("Histogram of Team Avg‐Rating Differences\n(New vs. Old Elo Update)") +plt.xlabel("Team1 Avg Rating − Team2 Avg Rating") +plt.ylabel("Number of Games") +plt.legend() +plt.tight_layout() +plt.show() + +# ----------------------------- +# Compute Top 10 for Each Year +# ----------------------------- diff --git a/src/utils/rankings/_elo.py b/src/utils/rankings/_elo.py new file mode 100644 index 0000000..417f7cf --- /dev/null +++ b/src/utils/rankings/_elo.py @@ -0,0 +1,32 @@ +from math import sqrt + + +def expected_win_elo(r1, r2) -> float: + return 1 / (1 + 10 ** ((r2 - r1) / (400))) + + +# For Elo we use our simple update function (unchanged) +def update_elo(p1, p2, p1_win, k=32): + outcome = 1 if p1_win else 0 + + p1_up = k * (outcome - expected_win_elo(p1, p2)) + p2_up = k * ((1 - outcome) - (expected_win_elo(p2, p1))) + + return p1_up, p2_up + + +def update_team_elo(team1, team2, t1_win: bool, k=64): + t1_avg = sum(team1) / len(team1) + t2_avg = sum(team2) / len(team2) + win = 1 if t1_win else 0 + t1_up = [] + for pi in team1: + p_up, _ = update_elo(pi, t2_avg, win) + t1_up.append(pi + p_up) + + t2_up = [] + for pi in team2: + p_up, _ = update_elo(pi, t1_avg, 1 - win) + t2_up.append(pi + p_up) + + return t1_up, t2_up diff --git a/src/utils/rankings/_os.py b/src/utils/rankings/_os.py new file mode 100644 index 0000000..f501b5d --- /dev/null +++ b/src/utils/rankings/_os.py @@ -0,0 +1,12 @@ +from openskill.models import PlackettLuce, PlackettLuceRating + + +def update_openskill( + team1: list[PlackettLuceRating], + team2: list[PlackettLuceRating], + t1_win: bool, + model: PlackettLuce, +) -> tuple[list[PlackettLuceRating], list[PlackettLuceRating]]: + teams = [team1, team2] if t1_win else [team2, team1] + updated = model.rate(teams) + return updated if t1_win else updated[::-1] diff --git a/src/utils/rankings/_ts.py b/src/utils/rankings/_ts.py new file mode 100644 index 0000000..5371cc8 --- /dev/null +++ b/src/utils/rankings/_ts.py @@ -0,0 +1,27 @@ +from math import sqrt +from trueskill import BETA, Rating, rate +from trueskill.backends import cdf + + +def team_rating(team): + team_mu = sum(player.mu for player in team) + team_sigma_squared = sum(player.sigma**2 for player in team) + return team_mu, team_sigma_squared + + +def expected_trueskill_win(team1, team2): + team1_mu, team1_sigma_squared = team_rating(team1) + team2_mu, team2_sigma_squared = team_rating(team2) + delta_mu = team1_mu - team2_mu + denom = sqrt(2 * (BETA**2) + team1_sigma_squared + team2_sigma_squared) + return cdf(delta_mu / denom) + + +def update_trueskill(team1: list[Rating], team2: list[Rating], t1_win): + ranked = [team1, team2] if t1_win else [team2, team1] + new_ratings = rate(ranked) + + t1 = new_ratings[0] if t1_win else new_ratings[1] + t2 = new_ratings[1] if t1_win else new_ratings[0] + + return t1, t2 diff --git a/src/utils/scrapers/__init__.py b/src/utils/scrapers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/scrapers/golgg.py b/src/utils/scrapers/golgg.py new file mode 100644 index 0000000..de2509a --- /dev/null +++ b/src/utils/scrapers/golgg.py @@ -0,0 +1,435 @@ +import json +from typing import Self +from playwright.async_api import async_playwright, Page +import httpx +from playwright.async_api import ElementHandle +import re +from tqdm import tqdm +from urllib.parse import urljoin, quote +import asyncio +import time +import parsel + +GOLGG_URL = "https://gol.gg" +GOLGG_TOURNAMENT_API = "https://gol.gg/tournament/ajax.trlist.php" +GOLGG_MATCH_SUMMARY = "https://gol.gg/game/stats/{}/page-summary/" +INDEX_TO_ROLE = { + 0: "TOP", + 1: "JUNGLE", + 2: "MID", + 3: "ADC", + 4: "SUPPORT", +} + + +class GolggScraper: + def __init__(self, max_pages: int = 20): + self.semaphore = asyncio.Semaphore(max_pages) + + async def start(self, headless: bool = True) -> Self: + self.client = httpx.AsyncClient( + headers={ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)", + } + ) + self.playwright = await async_playwright().start() + self.browser = await self.playwright.chromium.launch(headless=headless) + return self + + async def stop(self): + await self.browser.close() + await self.playwright.stop() + if self.client: + await self.client.aclose() + + async def __aenter__(self) -> Self: + return await self.start() + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.stop() + + async def click_consent(self, page: Page): + # Example if consent handling is needed + buttons = await page.query_selector_all("button.fc-button") + if len(buttons) > 1: + await buttons[1].click() + + async def get_tournaments_in_season(self, season: int = 9) -> list[dict]: + response = await self.client.post( + GOLGG_TOURNAMENT_API, + data={"season": f"S{season}"}, + ) + data = response.json() + return data + + async def get_all_tournaments(self) -> list[dict]: + result = [] + for season in range(2, 16): + data = await self.get_tournaments_in_season(season) + result.extend(data) + return result + + async def get_matches_in_tournament(self, tournament_name: str) -> set[str]: + """Process a single tournament using a dedicated page.""" + failed = [] + encoded_trname = quote(tournament_name) + s = f"tournament-matchlist/{encoded_trname}/" + + url = f"{GOLGG_URL}/tournament/{s}" + + response = await self.client.get(url) + html = response.content.decode("utf-8") + + sel = parsel.Selector(text=html) + tables = sel.css(".table_list") + matches_table = next( + (table for table in tables if "data-sort" in table.attrib), None + ) + + if not matches_table: + print("Couldn't find games table in", url) + return [] + + # Extract links and match IDs + rows = matches_table.css("tbody tr") + matches = [] + + for row in rows: + try: + href = row.css("a::attr(href)").get() + + if not href: + continue + link_text = row.css("a::text").get() + # Extract match ID using regex + pattern = r"stats/(\d+)/" + match = re.search(pattern, href) + if not match: + print("Couldn't extract match id from", href) + continue + + match_id = match.group(1) + team_a, team_b = link_text.split(" vs ") + + team_a = team_a.strip() + team_b = team_b.strip() + won = row.css("td.text_victory::text").get() + lost = row.css("td.text_defeat::text").get() + score = row.css("td:nth-child(3)::text").get().strip() + + team_a_score, team_b_score = score.split("-") + team_a_score = int(team_a_score.strip()) + team_b_score = int(team_b_score.strip()) + + patch = row.css("td:nth-child(6)::text").get() + date = row.css("td:nth-child(7)::text").get() + data = { + "match_id": match_id, + "tournament_name": tournament_name, + "link": href, + "sname_t1": team_a, + "sname_t2": team_b, + "won": won, + "lost": lost, + "score": score, + "t1_score": team_a_score, + "t2_score": team_b_score, + "patch": patch, + "date": date, + } + except Exception as e: + print("Error processing row:", e) + failed.append(row.get()) + continue + matches.append(data) + with open("failed.html", "w") as f: + f.write("\n".join(failed)) + return matches + + async def get_game_selector(self, game_id: str) -> parsel.Selector: + s = f"/game/stats/{game_id}/page-game/" + url = f"{GOLGG_URL}{s}" + + async with self.semaphore: + response = await self.client.get(url) + html = response.content.decode("utf-8") + + sel = parsel.Selector(text=html) + return sel + + async def get_players_stats(self, game_id: str) -> dict[str, dict]: + s = f"/game/stats/{game_id}/page-fullstats/" + url = f"{GOLGG_URL}{s}" + async with self.semaphore: + response = await self.client.get(url) + html = response.content.decode("utf-8") + sel = parsel.Selector(text=html) + table = sel.css(".completestats") + rows = table.xpath("./tr") + result = { + "blue": { + "TOP": {}, + "JUNGLE": {}, + "MID": {}, + "ADC": {}, + "SUPPORT": {}, + }, + "red": { + "TOP": {}, + "JUNGLE": {}, + "MID": {}, + "ADC": {}, + "SUPPORT": {}, + }, + } + + for row in rows: + tds = row.css("td::text").getall() + title, *stats = tds + title = title.strip() + title = title.replace(" ", "_").replace(":", "").replace("'", "").lower() + if len(stats) != 10: + stats = [None] * 10 + + if title.endswith("%"): + stats = [float(s[:-1]) / 100 if s else None for s in stats] + else: + stats = [ + ( + float(s) + if s and s.replace("-", "").replace(".", "").isnumeric() + else s + ) + for s in stats + ] + + blue_stats = stats[:5] + red_stats = stats[5:] + + for i, (bs, rs) in enumerate(zip(blue_stats, red_stats)): + role = INDEX_TO_ROLE.get(i, "UNKNOWN") + + result["blue"][role][title] = bs + result["red"][role][title] = rs + + return result + + async def get_players_in_game(self, game_sel: parsel.Selector) -> dict[str, dict]: + """Get the players in a game.""" + + team_table = game_sel.css(".col-cadre")[0] + team_row = team_table.xpath("./*")[1] + team_1_block, team_2_block = team_row.xpath("./*") + team_1_info = team_1_block.xpath("./*")[0] + team_2_info = team_2_block.xpath("./*")[0] + team_1_link = team_1_info.css("a::attr(href)").get() + team_2_link = team_2_info.css("a::attr(href)").get() + team_1_id = re.search(r"teams/team-stats/(\d+)/", team_1_link).group(1) + team_2_id = re.search(r"teams/team-stats/(\d+)/", team_2_link).group(1) + t1_players_table, t2_players_table = game_sel.css(".playersInfosLine") + t1_players = t1_players_table.xpath("./tr") + t2_players = t2_players_table.xpath("./tr") + team_1 = {} + for i, player in enumerate(t1_players): + player_td = player.css("td")[0] + player_link = player_td.css("a")[1] + href = player_link.css("::attr(href)").get() + player_name = player_link.css("::text").get() + player_id = re.search(r"player-stats/(\d+)/", href).group(1) + player_data = { + "player_id": player_id, + "player_name": player_name, + } + role = INDEX_TO_ROLE.get(i, "UNKNOWN") + team_1[role] = player_data + + team_2 = {} + for i, player in enumerate(t2_players): + player_td = player.css("td")[0] + player_link = player_td.css("a")[1] + href = player_link.css("::attr(href)").get() + player_name = player_link.css("::text").get() + + player_id = re.search(r"player-stats/(\d+)/", href).group(1) + player_data = { + "player_id": player_id, + "player_name": player_name, + } + role = INDEX_TO_ROLE.get(i, "UNKNOWN") + team_2[role] = player_data + + return { + team_1_id: team_1, + team_2_id: team_2, + } + + async def get_team_stats(self, game_sel: parsel.Selector) -> dict[str, dict]: + result = { + "blue_id": None, + "red_id": None, + "gameDuration": 0, + "blue": { + "kills": 0, + "towers": 0, + "dragons": 0, + "nashors": 0, + "gold": 0, + }, + "red": { + "kills": 0, + "towers": 0, + "dragons": 0, + "nashors": 0, + "gold": 0, + }, + } + + team_info = game_sel.css(".col-cadre")[0] + dur_row, stats_row = team_info.xpath("./div") + dur_text = dur_row.css("h1::text").get().strip() + duration = None + if dur_text: + minutes, seconds = dur_text.split(":") + duration = int(minutes) * 60 + int(seconds) + result["gameDuration"] = duration + blue, red = stats_row.xpath("./*") + bteam, bstats, champions = blue.xpath("./*") + rteam, rstats, _ = red.xpath("./*") + bteam_id = bteam.css("a::attr(href)").get().strip() + rteam_id = rteam.css("a::attr(href)").get().strip() + bteam_id = re.search(r"teams/team-stats/(\d+)/", bteam_id).group(1) + rteam_id = re.search(r"teams/team-stats/(\d+)/", rteam_id).group(1) + result["blue_id"] = bteam_id + result["red_id"] = rteam_id + + kills, towers, dragons, nashors, gold, _ = bstats.xpath("./*") + kills = kills.css("span::text").get().strip() + towers = towers.css("span::text").get().strip() + dragons = dragons.css("span::text").get() + nashors = nashors.css("span::text").get() + if not dragons: + dragons = None + else: + dragons = int(dragons.strip()) + + if not nashors: + nashors = None + else: + nashors = int(nashors.strip()) + + gold = gold.css("::text").get().strip() + result["blue"]["kills"] = int(kills) if kills else 0 + result["blue"]["towers"] = int(towers) if towers else 0 + result["blue"]["dragons"] = int(dragons) if dragons else 0 + result["blue"]["nashors"] = nashors + result["blue"]["gold"] = float(gold[:-1]) * 1000 if gold else 0 + kills, towers, dragons, nashors, gold, _ = rstats.xpath("./*") + kills = kills.css("span::text").get().strip() + towers = towers.css("span::text").get().strip() + dragons = dragons.css("span::text").get() + nashors = nashors.css("span::text").get() + gold = gold.css("span::text").get().strip() + if not dragons: + dragons = None + else: + dragons = int(dragons.strip()) + + if not nashors: + nashors = None + else: + nashors = int(nashors.strip()) + + result["red"]["kills"] = int(kills) if kills else 0 + result["red"]["towers"] = int(towers) if towers else 0 + result["red"]["dragons"] = int(dragons) if dragons else 0 + result["red"]["nashors"] = nashors + result["red"]["gold"] = float(gold[:-1]) * 1000 if gold else 0 + + return result + + async def get_games_in_match(self, match_id): + """Get the games ids in a match.""" + s = f"/game/stats/{match_id}/page-summary/" + url = f"{GOLGG_URL}{s}" + async with self.semaphore: + response = await self.client.get(url) + html = response.content.decode("utf-8") + + sel = parsel.Selector(text=html) + navbar = sel.css("#gameMenuToggler") + if not navbar: + print("Couldn't find navbar in", url) + return set() + + game_links = [ + el + for el in navbar.css("li > a") + if el.css("::text").get().strip().lower().startswith("game") + ] + match_table = sel.css(".col-cadre")[0] + teams, *games_sel = match_table.xpath("./*") + t1_link, t2_link = teams.css("a") + t1_href = t1_link.css("::attr(href)").get() + t2_href = t2_link.css("::attr(href)").get() + pattern = r"teams/team-stats/(\d+)/" + t1_id = re.search(pattern, t1_href).group(1) + t2_id = re.search(pattern, t2_href).group(1) + t1_name = t1_link.css("::text").get() + t2_name = t2_link.css("::text").get() + games = [] + for i, game_sel in enumerate(games_sel): + team_1, _, team_2 = game_sel.xpath("./*") + game_link = game_links[i] + game_href = game_link.css("::attr(href)").get() + pattern = r"game/stats/(\d+)/" + game_id = re.search(pattern, game_href).group(1) + t1_win = False + if team_1.css(".text_victory"): + t1_win = True + + game_sel = await self.get_game_selector(game_id) + tstats = await self.get_team_stats(game_sel=game_sel) + t1_side = "blue" if t1_id == tstats["blue_id"] else "red" + t2_side = "red" if t1_side == "blue" else "blue" + + players = await self.get_players_in_game(game_sel=game_sel) + pstats = await self.get_players_stats(game_id=game_id) + for role, player in players[t1_id].items(): + player["stats"] = pstats[t1_side][role] + + for role, player in players[t2_id].items(): + player["stats"] = pstats[t2_side][role] + + games.append( + { + "game_id": game_id, + "match_id": match_id, + "t1_id": t1_id, + "t2_id": t2_id, + "t1_name": t1_name, + "t2_name": t2_name, + "t1_win": t1_win, + "t2_win": not t1_win, + "t1_players": players[t1_id], + "t2_players": players[t2_id], + "t1_stats": tstats[t1_side], + "t2_stats": tstats[t2_side], + "game_duration": tstats["gameDuration"], + } + ) + + return games + + +async def main(): + game_id = "383" + match_id = "56264" + async with GolggScraper() as scrapper: + games = await scrapper.get_games_in_match(match_id=match_id) + with open("test_games.json", "w") as f: + json.dump(games, f, indent=4) + + +if __name__ == "__main__": + + asyncio.run(main())