Spaces:
Sleeping
Sleeping
from sqlalchemy import create_engine, Column, Integer, String, ForeignKey, Text, Float, Boolean, UniqueConstraint | |
from sqlalchemy.orm import declarative_base, sessionmaker | |
import pandas as pd | |
import os | |
import json | |
engine = create_engine('sqlite:///../data/games.db', echo=False) | |
Base = declarative_base() | |
class Game(Base): | |
__tablename__ = 'games' | |
id = Column(Integer, primary_key=True) | |
timestamp = Column(String) | |
score = Column(String) | |
goal_home = Column(Integer) | |
goal_away = Column(Integer) | |
round = Column(String) | |
home_team_id = Column(Integer, ForeignKey('teams.id')) | |
away_team_id = Column(Integer, ForeignKey('teams.id')) | |
venue = Column(String) | |
referee = Column(String) | |
attendance = Column(String) | |
date = Column(String) | |
season = Column(String) | |
league_id = Column(Integer, ForeignKey('leagues.id')) | |
class GameLineup(Base): | |
__tablename__ = 'game_lineup' | |
id = Column(Integer, primary_key=True) | |
game_id = Column(Integer, ForeignKey('games.id')) | |
team_id = Column(Integer, ForeignKey('teams.id')) | |
player_id = Column(Integer, ForeignKey('players.hash')) | |
shirt_number = Column(String) | |
position = Column(String) | |
starting = Column(Boolean) | |
captain = Column(Boolean) | |
coach = Column(Boolean) | |
tactics = Column(String) | |
# Add a unique constraint on game_id and player_id | |
__table_args__ = (UniqueConstraint('game_id', 'player_id', name='uc_game_id_player_id'),) | |
class Team(Base): | |
__tablename__ = 'teams' | |
id = Column(Integer, primary_key=True) | |
name = Column(String) | |
class Player(Base): | |
__tablename__ = 'players' | |
hash = Column(String, primary_key=True) | |
name = Column(String) | |
country = Column(String) | |
class Caption(Base): | |
__tablename__ = 'captions' | |
id = Column(Integer, primary_key=True) | |
game_id = Column(Integer, ForeignKey('games.id')) | |
game_time = Column(String) | |
period = Column(Integer) | |
label = Column(String) | |
description = Column(Text) | |
important = Column(Boolean) | |
visibility = Column(Boolean) | |
frame_stamp = Column(Integer) | |
class Commentary(Base): | |
__tablename__ = 'commentary' | |
id = Column(Integer, primary_key=True) | |
game_id = Column(Integer, ForeignKey('games.id')) | |
period = Column(Integer) | |
event_time_start = Column(Float) | |
event_time_end = Column(Float) | |
description = Column(Text) | |
class League(Base): | |
__tablename__ = 'leagues' | |
id = Column(Integer, primary_key=True) | |
name = Column(String) | |
class Event(Base): | |
__tablename__ = 'events' | |
id = Column(Integer, primary_key=True) | |
game_id = Column(Integer, ForeignKey('games.id')) | |
period = Column(Integer) | |
# half = Column(Integer) | |
game_time = Column(Integer) | |
team_id = Column(Integer, ForeignKey('teams.id')) | |
frame_stamp = Column(Integer) | |
label = Column(String) | |
visibility = Column(Boolean) | |
class Augmented_Team(Base): | |
__tablename__ = 'augmented_teams' | |
id = Column(Integer, primary_key=True) | |
team_id = Column(Integer, ForeignKey('teams.id')) | |
augmented_name = Column(String) | |
class Augmented_League(Base): | |
__tablename__ = 'augmented_leagues' | |
id = Column(Integer, primary_key=True) | |
league_id = Column(Integer, ForeignKey('leagues.id')) | |
augmented_name = Column(String) | |
class Player_Event_Label(Base): | |
__tablename__ = 'player_event_labels' | |
id = Column(Integer, primary_key=True) | |
label = Column(String) | |
class Player_Event(Base): | |
__tablename__ = 'player_events' | |
id = Column(Integer, primary_key=True) | |
game_id = Column(Integer, ForeignKey('games.id')) | |
player_id = Column(Integer, ForeignKey('players.hash')) | |
time = Column(String) # Time in minutes of the game | |
type = Column(Integer, ForeignKey('player_event_labels.id')) | |
linked_player = Column(Integer, ForeignKey( | |
'players.hash')) # If the event is linked to another player, for example a substitution | |
# Create Tables | |
Base.metadata.create_all(engine) | |
# Session setup | |
Session = sessionmaker(bind=engine) | |
def extract_time_from_player_event(time: str) -> str: | |
# Extract the time from the string | |
time = time.split("'")[0] # Need to keep it str because of overtime eg. (45+2) | |
return time | |
def get_or_create(session, model, **kwargs): | |
instance = session.query(model).filter_by(**kwargs).first() | |
if instance: | |
return instance | |
else: | |
instance = model(**kwargs) | |
session.add(instance) | |
session.commit() | |
return instance | |
def process_game_data(data, data2, league, season): | |
session = Session() | |
# Caption = d and v2 = d2 | |
home_team = data["gameHomeTeam"] | |
away_team = data["gameAwayTeam"] | |
score = data["score"] | |
home_score = score[0] | |
away_score = score[-1] | |
round_ = data["round"] | |
venue = data["venue"][0] | |
referee = data.get("referee_found", None) | |
referee = referee[0] if referee else data.get("referee", None) | |
date = data["gameDate"] | |
timestamp = data["timestamp"] | |
attendance = data.get("attendance", []) | |
attendance = attendance[0] if attendance else None | |
home_team = get_or_create(session, Team, name=home_team) | |
away_team = get_or_create(session, Team, name=away_team) | |
# Check if the game already exists | |
game = session.query(Game).filter_by(timestamp=timestamp, home_team_id=home_team.id).first() | |
# Check if league exists | |
league = get_or_create(session, League, name=league) | |
if not game: | |
game = Game(timestamp=timestamp, score=score, goal_home=home_score, goal_away=away_score, round=round_, | |
home_team_id=home_team.id, away_team_id=away_team.id, | |
venue=venue, date=date, attendance=attendance, season=season, league_id=league.id, referee=referee) | |
session.add(game) | |
session.commit() | |
teams = ["home", "away"] | |
# Lets add lineup data | |
for team in teams: | |
if team == "home": | |
team_id = home_team.id | |
else: | |
team_id = away_team.id | |
team_lineup = data["lineup"][team] | |
tactic = team_lineup["tactic"] | |
for player_data in team_lineup["players"]: | |
player_hash = player_data["hash"] | |
name = player_data["long_name"] | |
if " " not in name: # Since some players are missing their first name, do this to help with the search | |
name = "NULL " + name | |
number = player_data["shirt_number"] | |
captain = player_data["captain"] == "(C)" | |
starting = player_data["starting"] | |
country = player_data["country"] | |
position = player_data["lineup"] | |
facts = player_data.get("facts", None) # Facts might be empty | |
player = get_or_create(session, Player, hash=player_hash, name=name, country=country) | |
game_lineup = GameLineup(game_id=game.id, team_id=team_id, player_id=player.hash, | |
shirt_number=number, position=position, starting=starting, captain=captain, | |
coach=False, tactics=tactic) | |
if facts: | |
for fact in facts: | |
type = fact["type"] | |
time = extract_time_from_player_event(fact["time"]) | |
event = get_or_create(session, Player_Event_Label, id=int(type)) | |
linked_player = fact.get("linked_player_hash", None) | |
player_event = Player_Event(game_id=game.id, player_id=player.hash, time=time, type=event.id, | |
linked_player=linked_player) | |
session.add(player_event) | |
session.add(game_lineup) | |
# Get the coach | |
coach = team_lineup["coach"][0] | |
coach_hash = coach["hash"] | |
coach_name = coach["long_name"] | |
if " " not in coach_name: # Since some players are missing their first name, do this to help with the search | |
name = "NULL " + coach_name | |
coach_country = coach["country"] | |
coach_player = get_or_create(session, Player, hash=coach_hash, name=coach_name, country=coach_country) | |
game_lineup = GameLineup(game_id=game.id, team_id=team_id, player_id=coach_player.hash, | |
shirt_number=None, position=None, starting=None, captain=False, coach=True, | |
tactics=tactic) | |
session.add(game_lineup) | |
# Commit all changes at once | |
session.commit() | |
# Start parsing the events | |
events = data["annotations"] | |
for event in events: | |
period, time = convert_to_seconds(event["gameTime"]) | |
label = event["label"] | |
# Renaming labels | |
if label == "soccer-ball": | |
label = "goal" | |
elif label == "y-card": | |
label = "yellow card" | |
elif label == "r-card": | |
label = "red card" | |
description = event["description"] | |
important = event["important"] == "true" | |
visible = event["visibility"] | |
# Convert to boolean | |
# True if shown, False if not | |
visible = visible == "shown" | |
position = int(event["position"]) | |
event = Caption(game_id=game.id, game_time=time, period=period, label=label, description=description, | |
important=important, visibility=visible, frame_stamp=position) | |
session.add(event) | |
session.commit() | |
return game.id, home_team.id, away_team.id | |
def process_player_data(data): | |
pass | |
def process_ASR_data(data, game_id, period): | |
session = Session() | |
seg = data["segments"] | |
commentary_events = [] # Store the events in a list | |
for k, v in seg.items(): | |
start = float(v[0]) | |
end = float(v[1]) | |
desc = v[2] | |
event = Commentary(game_id=game_id, period=period, event_time_start=start, event_time_end=end, description=desc) | |
commentary_events.append(event) | |
# Bulk save objects | |
session.bulk_save_objects(commentary_events) | |
session.commit() | |
session.close() | |
def convert_to_seconds(time_str): | |
# Split the string into its components | |
period, time = time_str.split(" - ") | |
minutes, seconds = time.split(":") | |
# Convert the components to integers | |
period = int(period) | |
minutes = int(minutes) | |
seconds = int(seconds) | |
# Calculate the time in seconds | |
total_seconds = (minutes * 60) + seconds | |
return period, total_seconds | |
def parse_labels_v2(data, session, home_team_id, away_team_id, game_id): | |
annotations_data = data["annotations"] | |
no_team = get_or_create(session, Team, name="not applicable") | |
for annotation in annotations_data: | |
period, game_time = convert_to_seconds(annotation["gameTime"]) | |
# Determine which team the annotation belongs to | |
if annotation["team"] == "home": | |
team_id = home_team_id | |
elif annotation["team"] == "away": | |
team_id = away_team_id | |
else: | |
team_id = no_team.id | |
position = annotation.get("position", None) # Assuming position can be null | |
visibility = annotation["visibility"] == "visible" | |
# Convert to boolean | |
# True if visible, False if not | |
visibility = visibility == "visible" | |
label = annotation["label"] | |
# Create and add the Annotations instance | |
annotation_entry = Event( | |
game_id=game_id, | |
period=period, # periode | |
game_time=game_time, # Already in seconds | |
frame_stamp=position, # Make sure this is an integer or None | |
team_id=team_id, # Integer ID of the team | |
visibility=visibility, # Boolean | |
label=label # String with information | |
) | |
session.add(annotation_entry) | |
session.commit() | |
def process_json_files(directory): | |
session = Session() | |
fill_player_events(session) | |
for root, dirs, files in os.walk(directory): | |
print(root) | |
labels_file = None | |
asr_files = [] | |
path_parts = root.split("\\") | |
if len(path_parts) > 2: | |
league = path_parts[-3].split("/")[-1] | |
season = path_parts[-2] | |
# Need the labels-v2 first as it contains the game ID | |
for file in files: | |
if 'Labels-caption.json' in file: | |
labels_file = file | |
elif file.endswith('.json'): | |
asr_files.append(file) | |
if labels_file: | |
with open(os.path.join(root, labels_file), 'r') as f: | |
lb_cap = json.load(f) | |
with open(os.path.join(root, "Labels-v2.json"), 'r') as f: | |
lb_v2 = json.load(f) | |
game_id, home_team_id, away_team_id = process_game_data(lb_cap, lb_v2, league, season) | |
for file in asr_files: | |
with open(os.path.join(root, file), 'r') as f: | |
asr = json.load(f) | |
# Determine the type of file and process accordingly | |
if 'Labels-v2' in file: | |
parse_labels_v2(asr, session, home_team_id, away_team_id, game_id) | |
elif '1_half-ASR' in file: | |
period = 1 | |
# Parse and commit the data | |
process_ASR_data(data=asr, game_id=game_id, period=period) | |
elif '2_half-ASR' in file: | |
period = 2 | |
# Parse and commit the data | |
process_ASR_data(data=asr, game_id=game_id, period=period) | |
session.commit() | |
session.close() | |
def fill_player_events(session): | |
fact_id2label = { | |
"1": "Yellow card", | |
# Example: "time": "71' Ivanovic B. (Unsportsmanlike conduct)", "description": "Yellow Card" | |
"2": "Red card", # Example: "time": "70' Matic N. (Unsportsmanlike conduct)", "description": "Red Card" | |
"3": "Goal", # Example: "time": "14' Ivanovic B. (Hazard E.)", "description": "Goal" | |
"4": "NA", | |
"5": "NA 2", | |
"6": "Substitution home", # Example: "time": "72'", "description": "Ramires" | |
"7": "Substitution away", # Example: "time": "86'", "description": "Filipe Luis" | |
"8": "Assistance" # Example: "time": "14' Ivanovic B. (Hazard E.)", "description": "Assistance" | |
} | |
for key, value in fact_id2label.items(): | |
label = get_or_create(session, Player_Event_Label, label=value) | |
session.commit() | |
def fill_Augmented_Team(file_path): | |
df = pd.read_csv(file_path) | |
# the df should have two columns, team_name and augmented_name | |
session = Session() | |
teams = session.query(Team).all() | |
# For each row, find the team_id and add the augmented name | |
for index, row in df.iterrows(): | |
team_name = row["name"] | |
augmented_name = row["augmented_name"] | |
# Strip leading and trailing whitespace | |
augmented_name = augmented_name.strip() | |
team = session.query(Team).filter_by(name=team_name).first() | |
if team: | |
augmented_team = get_or_create(session, Augmented_Team, team_id=team.id, augmented_name=augmented_name) | |
session.commit() | |
session.close() | |
def fill_Augmented_League(file_path): | |
# Read the csv file | |
df = pd.read_csv(file_path) | |
# the df should have two columns, team_name and augmented_name | |
session = Session() | |
leagues = session.query(League).all() | |
# For each row, find the team_id and add the augmented name | |
for index, row in df.iterrows(): | |
league_name = row["name"] | |
augmented_name = row["augmented_name"] | |
# Strip leading and trailing whitespace | |
augmented_name = augmented_name.strip() | |
league = session.query(League).filter_by(name=league_name).first() | |
if league: | |
augmented_league = get_or_create(session, Augmented_League, league_id=league.id, | |
augmented_name=augmented_name) | |
session.commit() | |
session.close() | |
if __name__ == "__main__": | |
# Example directory path | |
process_json_files('../data/Dataset/SN-ASR_captions_and_actions/') | |
fill_Augmented_Team('../data/Dataset/augmented_teams.csv') | |
fill_Augmented_League('../data/Dataset/augmented_leagues.csv') | |
# Rename the event/annotation table to something more descriptive. Events are fucking everything else over | |