nesticot commited on
Commit
bbb7691
·
verified ·
1 Parent(s): be5453a

Upload api_scraper.py

Browse files
Files changed (1) hide show
  1. api_scraper.py +747 -0
api_scraper.py ADDED
@@ -0,0 +1,747 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ import numpy as np
4
+ from datetime import datetime
5
+ from tqdm import tqdm
6
+ import time
7
+ from pytz import timezone
8
+
9
+
10
+ class MLB_Scrape:
11
+
12
+ # def __init__(self):
13
+ # # Initialize your class here if needed
14
+ # pass
15
+
16
+ def get_sport_id(self):
17
+ df = pd.DataFrame(requests.get(url=f'https://statsapi.mlb.com/api/v1/sports').json()['sports']).set_index('id')
18
+ return df
19
+
20
+ def get_sport_id_check(self,sport_id):
21
+ sport_id_df = self.get_sport_id()
22
+ if sport_id not in sport_id_df.index:
23
+ print('Please Select a New Sport ID from the following')
24
+ print(sport_id_df)
25
+ return False
26
+ return True
27
+
28
+ def get_schedule(self,year_input=2023,
29
+ sport_id=1,
30
+ start_date='YYYY-MM-DD',
31
+ end_date='YYYY-MM-DD',
32
+ final=True,
33
+ regular=True,
34
+ spring=False):
35
+ # Get MLB Schedule
36
+
37
+ if not self.get_sport_id_check(sport_id=sport_id):
38
+ return
39
+ if regular == True:
40
+ game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&gameTypes=R&season={year_input}&hydrate=lineup,players').json()
41
+ print(f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&gameTypes=R&season={year_input}&hydrate=lineup,players')
42
+ elif spring == True:
43
+ print('spring')
44
+ game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&gameTypes=S&season={year_input}&hydrate=lineup,players').json()
45
+ print(f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&gameTypes=S&season={year_input}&hydrate=lineup,players')
46
+ else:
47
+ game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&season={year_input}&hydrate=lineup,players').json()
48
+
49
+ # Grab data from MLB Schedule (game id, away, home, state)
50
+ game_list = [item for sublist in [[y['gamePk'] for y in x['games']] for x in game_call['dates']] for item in sublist]
51
+ time_list = [item for sublist in [[y['gameDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
52
+ date_list = [item for sublist in [[y['officialDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
53
+ away_team_list = [item for sublist in [[y['teams']['away']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
54
+ home_team_list = [item for sublist in [[y['teams']['home']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
55
+ state_list = [item for sublist in [[y['status']['codedGameState'] for y in x['games']] for x in game_call['dates']] for item in sublist]
56
+ venue_id = [item for sublist in [[y['venue']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
57
+ venue_name = [item for sublist in [[y['venue']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
58
+
59
+ game_df = pd.DataFrame(data={'game_id':game_list,
60
+ 'time':time_list,
61
+ 'date':date_list,
62
+ 'away':away_team_list,
63
+ 'home':home_team_list,
64
+ 'state':state_list,
65
+ 'venue_id':venue_id,
66
+ 'venue_name':venue_name})
67
+
68
+ # game_list = [item for sublist in [[y['gamePk'] for y in x['games']] for x in game_call['dates']] for item in sublist]
69
+ # date_list = [item for sublist in [[y['officialDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
70
+ # cancel_list = [item for sublist in [[y['status']['codedGameState'] for y in x['games']] for x in game_call['dates']] for item in sublist]
71
+ # game_df = pd.DataFrame(data={'game_id':game_list,'date':date_list,'state':cancel_list})
72
+ #game_df = pd.concat([game_df,game_df])
73
+ if len(game_df) == 0:
74
+ return 'Schedule Length of 0, please select different parameters.'
75
+
76
+ game_df['date'] = pd.to_datetime(game_df['date']).dt.date
77
+ #game_df['time'] = game_df['time'].dt.tz_localize('UTC')
78
+ #game_df['time'] = game_df['time'].dt.tz_localize('UTC')
79
+ game_df['time'] = pd.to_datetime(game_df['time'])
80
+ eastern = timezone('US/Eastern')
81
+ game_df['time'] = game_df['time'].dt.tz_convert(eastern)
82
+ game_df['time'] = game_df['time'].dt.strftime("%I:%M %p EST")#.dt.time
83
+
84
+ if not start_date == 'YYYY-MM-DD' or not end_date == 'YYYY-MM-DD':
85
+ try:
86
+ start_date = datetime.strptime(start_date, "%Y-%m-%d").date()
87
+ end_date = datetime.strptime(end_date, "%Y-%m-%d").date()
88
+ game_df = game_df[(game_df['date'] >= start_date) & (game_df['date'] <= end_date)]
89
+
90
+ except ValueError:
91
+ return 'Please use YYYY-MM-DD Format for Start and End Dates'
92
+ if final:
93
+ game_df = game_df[game_df['state'] == 'F'].drop_duplicates(subset='game_id').reset_index(drop=True)
94
+
95
+ game_df = game_df.drop_duplicates(subset='game_id').reset_index(drop=True)
96
+
97
+ if len(game_df) == 0:
98
+ return 'Schedule Length of 0, please select different parameters.'
99
+
100
+ return game_df
101
+
102
+ def get_data(self,game_list_input = [748540]):
103
+ data_total = []
104
+ #n_count = 0
105
+ print('This May Take a While. Progress Bar shows Completion of Data Retrieval.')
106
+ for i in tqdm(range(len(game_list_input)), desc="Processing", unit="iteration"):
107
+ #for game_id_select in game_list:
108
+ # if n_count%50 == 0:
109
+ # print(n_count)
110
+ r = requests.get(f'https://statsapi.mlb.com/api/v1.1/game/{game_list_input[i]}/feed/live')
111
+ data_total.append(r.json())
112
+ #n_count = n_count + 1
113
+ return data_total
114
+
115
+ def get_data_df(self,data_list):
116
+
117
+ swing_list = ['X','F','S','D','E','T','W']
118
+ whiff_list = ['S','T','W']
119
+ print('Converting Data to Dataframe.')
120
+ game_id = []
121
+ game_date = []
122
+ batter_id = []
123
+ batter_name = []
124
+ batter_hand = []
125
+ batter_team = []
126
+ batter_team_id = []
127
+ pitcher_id = []
128
+ pitcher_name = []
129
+ pitcher_hand = []
130
+ pitcher_team = []
131
+ pitcher_team_id = []
132
+
133
+ play_description = []
134
+ play_code = []
135
+ in_play = []
136
+ is_strike = []
137
+ is_swing = []
138
+ is_whiff = []
139
+ is_out = []
140
+ is_ball = []
141
+ is_review = []
142
+ pitch_type = []
143
+ pitch_description = []
144
+ strikes = []
145
+ balls = []
146
+ outs = []
147
+
148
+ start_speed = []
149
+ end_speed = []
150
+ sz_top = []
151
+ sz_bot = []
152
+ x = []
153
+ y = []
154
+ ax = []
155
+ ay = []
156
+ az = []
157
+ pfxx = []
158
+ pfxz = []
159
+ px = []
160
+ pz = []
161
+ vx0 = []
162
+ vy0 = []
163
+ vz0 = []
164
+ x0 = []
165
+ y0 = []
166
+ z0 = []
167
+ zone = []
168
+ type_confidence = []
169
+ plate_time = []
170
+ extension = []
171
+ spin_rate = []
172
+ spin_direction = []
173
+ ivb = []
174
+ hb = []
175
+
176
+ launch_speed = []
177
+ launch_angle = []
178
+ launch_distance = []
179
+ launch_location = []
180
+ trajectory = []
181
+ hardness = []
182
+ hit_x = []
183
+ hit_y = []
184
+
185
+ index_play = []
186
+ play_id = []
187
+ start_time = []
188
+ end_time = []
189
+ is_pitch = []
190
+ type_type = []
191
+
192
+
193
+ type_ab = []
194
+ ab_number = []
195
+ event = []
196
+ event_type = []
197
+ rbi = []
198
+ away_score = []
199
+ home_score = []
200
+
201
+ #data[0]['liveData']['plays']['allPlays'][32]['playEvents'][-1]['details']['call']['code'] in ['VP']
202
+
203
+ for data in data_list:
204
+ for ab_id in range(len(data['liveData']['plays']['allPlays'])):
205
+ ab_list = data['liveData']['plays']['allPlays'][ab_id]
206
+ for n in range(len(ab_list['playEvents'])):
207
+ if ab_list['playEvents'][n]['isPitch'] == True or 'call' in ab_list['playEvents'][n]['details']:
208
+
209
+ game_id.append(data['gamePk'])
210
+ game_date.append(data['gameData']['datetime']['officialDate'])
211
+ if 'matchup' in ab_list:
212
+ batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else np.nan)
213
+ if 'batter' in ab_list['matchup']:
214
+ batter_name.append(ab_list['matchup']['batter']['fullName'] if 'fullName' in ab_list['matchup']['batter'] else np.nan)
215
+ else:
216
+ batter_name.append(np.nan)
217
+
218
+ batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else np.nan)
219
+ pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else np.nan)
220
+ if 'pitcher' in ab_list['matchup']:
221
+ pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'fullName' in ab_list['matchup']['pitcher'] else np.nan)
222
+ else:
223
+ pitcher_name.append(np.nan)
224
+ #pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'pitcher' in ab_list['matchup'] else np.nan)
225
+ pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else np.nan)
226
+
227
+
228
+ # batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else np.nan)
229
+ # batter_name.append(ab_list['matchup']['batter']['fullName'] if 'batter' in ab_list['matchup'] else np.nan)
230
+ # batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else np.nan)
231
+ # pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else np.nan)
232
+ # pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'pitcher' in ab_list['matchup'] else np.nan)
233
+ # pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else np.nan)
234
+
235
+ if ab_list['about']['isTopInning']:
236
+ batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else np.nan)
237
+ batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else np.nan)
238
+ pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else np.nan)
239
+ pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else np.nan)
240
+
241
+ else:
242
+ batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else np.nan)
243
+ batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else np.nan)
244
+ pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else np.nan)
245
+ pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else np.nan)
246
+
247
+ play_description.append(ab_list['playEvents'][n]['details']['description'] if 'description' in ab_list['playEvents'][n]['details'] else np.nan)
248
+ play_code.append(ab_list['playEvents'][n]['details']['code'] if 'code' in ab_list['playEvents'][n]['details'] else np.nan)
249
+ in_play.append(ab_list['playEvents'][n]['details']['isInPlay'] if 'isInPlay' in ab_list['playEvents'][n]['details'] else np.nan)
250
+ is_strike.append(ab_list['playEvents'][n]['details']['isStrike'] if 'isStrike' in ab_list['playEvents'][n]['details'] else np.nan)
251
+
252
+ if 'details' in ab_list['playEvents'][n]:
253
+ is_swing.append(True if ab_list['playEvents'][n]['details']['code'] in swing_list else np.nan)
254
+ is_whiff.append(True if ab_list['playEvents'][n]['details']['code'] in whiff_list else np.nan)
255
+ else:
256
+ is_swing.append(np.nan)
257
+ is_whiff.append(np.nan)
258
+
259
+ #is_out.append(ab_list['playEvents'][n]['details']['isBall'] if 'isBall' in ab_list['playEvents'][n]['details'] else np.nan)
260
+ is_ball.append(ab_list['playEvents'][n]['details']['isOut'] if 'isOut' in ab_list['playEvents'][n]['details'] else np.nan)
261
+ is_review.append(ab_list['playEvents'][n]['details']['hasReview'] if 'hasReview' in ab_list['playEvents'][n]['details'] else np.nan)
262
+ pitch_type.append(ab_list['playEvents'][n]['details']['type']['code'] if 'type' in ab_list['playEvents'][n]['details'] else np.nan)
263
+ pitch_description.append(ab_list['playEvents'][n]['details']['type']['description'] if 'type' in ab_list['playEvents'][n]['details'] else np.nan)
264
+
265
+ #if ab_list['playEvents'][n]['isPitch'] == True:
266
+ if ab_list['playEvents'][n]['pitchNumber'] == 1:
267
+ ab_number.append(ab_list['playEvents'][n]['atBatIndex'] if 'atBatIndex' in ab_list['playEvents'][n] else np.nan)
268
+ strikes.append(0)
269
+ balls.append(0)
270
+ outs.append(0)
271
+ else:
272
+ ab_number.append(ab_list['playEvents'][n]['atBatIndex'] if 'atBatIndex' in ab_list['playEvents'][n] else np.nan)
273
+ strikes.append(ab_list['playEvents'][n-1]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n-1]['count'] else np.nan)
274
+ balls.append(ab_list['playEvents'][n-1]['count']['balls'] if 'balls' in ab_list['playEvents'][n-1]['count'] else np.nan)
275
+ outs.append(ab_list['playEvents'][n-1]['count']['outs'] if 'outs' in ab_list['playEvents'][n-1]['count'] else np.nan)
276
+
277
+ if 'pitchData' in ab_list['playEvents'][n]:
278
+
279
+ start_speed.append(ab_list['playEvents'][n]['pitchData']['startSpeed'] if 'startSpeed' in ab_list['playEvents'][n]['pitchData'] else np.nan)
280
+ end_speed.append(ab_list['playEvents'][n]['pitchData']['endSpeed'] if 'endSpeed' in ab_list['playEvents'][n]['pitchData'] else np.nan)
281
+
282
+ sz_top.append(ab_list['playEvents'][n]['pitchData']['strikeZoneTop'] if 'strikeZoneTop' in ab_list['playEvents'][n]['pitchData'] else np.nan)
283
+ sz_bot.append(ab_list['playEvents'][n]['pitchData']['strikeZoneBottom'] if 'strikeZoneBottom' in ab_list['playEvents'][n]['pitchData'] else np.nan)
284
+ x.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x'] if 'x' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
285
+ y.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y'] if 'y' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
286
+
287
+ ax.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aX'] if 'aX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
288
+ ay.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aY'] if 'aY' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
289
+ az.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aZ'] if 'aZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
290
+ pfxx.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxX'] if 'pfxX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
291
+ pfxz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxZ'] if 'pfxZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
292
+ px.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pX'] if 'pX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
293
+ pz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pZ'] if 'pZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
294
+ vx0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vX0'] if 'vX0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
295
+ vy0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vY0'] if 'vY0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
296
+ vz0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vZ0'] if 'vZ0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
297
+ x0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x0'] if 'x0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
298
+ y0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y0'] if 'y0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
299
+ z0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['z0'] if 'z0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
300
+
301
+ zone.append(ab_list['playEvents'][n]['pitchData']['zone'] if 'zone' in ab_list['playEvents'][n]['pitchData'] else np.nan)
302
+ type_confidence.append(ab_list['playEvents'][n]['pitchData']['typeConfidence'] if 'typeConfidence' in ab_list['playEvents'][n]['pitchData'] else np.nan)
303
+ plate_time.append(ab_list['playEvents'][n]['pitchData']['plateTime'] if 'plateTime' in ab_list['playEvents'][n]['pitchData'] else np.nan)
304
+ extension.append(ab_list['playEvents'][n]['pitchData']['extension'] if 'extension' in ab_list['playEvents'][n]['pitchData'] else np.nan)
305
+
306
+ if 'breaks' in ab_list['playEvents'][n]['pitchData']:
307
+ spin_rate.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinRate'] if 'spinRate' in ab_list['playEvents'][n]['pitchData']['breaks'] else np.nan)
308
+ spin_direction.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinDirection'] if 'spinDirection' in ab_list['playEvents'][n]['pitchData']['breaks'] else np.nan)
309
+ ivb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakVerticalInduced'] if 'breakVerticalInduced' in ab_list['playEvents'][n]['pitchData']['breaks'] else np.nan)
310
+ hb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakHorizontal'] if 'breakHorizontal' in ab_list['playEvents'][n]['pitchData']['breaks'] else np.nan)
311
+
312
+ else:
313
+ start_speed.append(np.nan)
314
+ end_speed.append(np.nan)
315
+
316
+ sz_top.append(np.nan)
317
+ sz_bot.append(np.nan)
318
+ x.append(np.nan)
319
+ y.append(np.nan)
320
+
321
+ ax.append(np.nan)
322
+ ay.append(np.nan)
323
+ az.append(np.nan)
324
+ pfxx.append(np.nan)
325
+ pfxz.append(np.nan)
326
+ px.append(np.nan)
327
+ pz.append(np.nan)
328
+ vx0.append(np.nan)
329
+ vy0.append(np.nan)
330
+ vz0.append(np.nan)
331
+ x0.append(np.nan)
332
+ y0.append(np.nan)
333
+ z0.append(np.nan)
334
+
335
+ zone.append(np.nan)
336
+ type_confidence.append(np.nan)
337
+ plate_time.append(np.nan)
338
+ extension.append(np.nan)
339
+ spin_rate.append(np.nan)
340
+ spin_direction.append(np.nan)
341
+ ivb.append(np.nan)
342
+ hb.append(np.nan)
343
+
344
+ if 'hitData' in ab_list['playEvents'][n]:
345
+ launch_speed.append(ab_list['playEvents'][n]['hitData']['launchSpeed'] if 'launchSpeed' in ab_list['playEvents'][n]['hitData'] else np.nan)
346
+ launch_angle.append(ab_list['playEvents'][n]['hitData']['launchAngle'] if 'launchAngle' in ab_list['playEvents'][n]['hitData'] else np.nan)
347
+ launch_distance.append(ab_list['playEvents'][n]['hitData']['totalDistance'] if 'totalDistance' in ab_list['playEvents'][n]['hitData'] else np.nan)
348
+ launch_location.append(ab_list['playEvents'][n]['hitData']['location'] if 'location' in ab_list['playEvents'][n]['hitData'] else np.nan)
349
+
350
+ trajectory.append(ab_list['playEvents'][n]['hitData']['trajectory'] if 'trajectory' in ab_list['playEvents'][n]['hitData'] else np.nan)
351
+ hardness.append(ab_list['playEvents'][n]['hitData']['hardness'] if 'hardness' in ab_list['playEvents'][n]['hitData'] else np.nan)
352
+ hit_x.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordX'] if 'coordX' in ab_list['playEvents'][n]['hitData']['coordinates'] else np.nan)
353
+ hit_y.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordY'] if 'coordY' in ab_list['playEvents'][n]['hitData']['coordinates'] else np.nan)
354
+ else:
355
+ launch_speed.append(np.nan)
356
+ launch_angle.append(np.nan)
357
+ launch_distance.append(np.nan)
358
+ launch_location.append(np.nan)
359
+ trajectory.append(np.nan)
360
+ hardness.append(np.nan)
361
+ hit_x.append(np.nan)
362
+ hit_y.append(np.nan)
363
+
364
+ index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else np.nan)
365
+ play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else np.nan)
366
+ start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else np.nan)
367
+ end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else np.nan)
368
+ is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else np.nan)
369
+ type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else np.nan)
370
+
371
+
372
+
373
+ if n == len(ab_list['playEvents']) - 1 :
374
+
375
+ type_ab.append(data['liveData']['plays']['allPlays'][ab_id]['result']['type'] if 'type' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
376
+ event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'] if 'event' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
377
+ event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'] if 'eventType' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
378
+ rbi.append(data['liveData']['plays']['allPlays'][ab_id]['result']['rbi'] if 'rbi' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
379
+ away_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['awayScore'] if 'awayScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
380
+ home_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['homeScore'] if 'homeScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
381
+ is_out.append(data['liveData']['plays']['allPlays'][ab_id]['result']['isOut'] if 'isOut' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
382
+
383
+ else:
384
+
385
+ type_ab.append(np.nan)
386
+ event.append(np.nan)
387
+ event_type.append(np.nan)
388
+ rbi.append(np.nan)
389
+ away_score.append(np.nan)
390
+ home_score.append(np.nan)
391
+ is_out.append(np.nan)
392
+
393
+ elif ab_list['playEvents'][n]['count']['balls'] == 4:
394
+
395
+ event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'])
396
+ event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'])
397
+
398
+
399
+ game_id.append(data['gamePk'])
400
+ game_date.append(data['gameData']['datetime']['officialDate'])
401
+ batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else np.nan)
402
+ batter_name.append(ab_list['matchup']['batter']['fullName'] if 'batter' in ab_list['matchup'] else np.nan)
403
+ batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else np.nan)
404
+ pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else np.nan)
405
+ pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'pitcher' in ab_list['matchup'] else np.nan)
406
+ pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else np.nan)
407
+ if ab_list['about']['isTopInning']:
408
+ batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else np.nan)
409
+ batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else np.nan)
410
+ pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else np.nan)
411
+ pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else np.nan)
412
+ else:
413
+ batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else np.nan)
414
+ batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else np.nan)
415
+ pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else np.nan)
416
+ pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else np.nan)
417
+
418
+ play_description.append(np.nan)
419
+ play_code.append(np.nan)
420
+ in_play.append(np.nan)
421
+ is_strike.append(np.nan)
422
+ is_ball.append(np.nan)
423
+ is_review.append(np.nan)
424
+ pitch_type.append(np.nan)
425
+ pitch_description.append(np.nan)
426
+ strikes.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else np.nan)
427
+ balls.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else np.nan)
428
+ outs.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else np.nan)
429
+ index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else np.nan)
430
+ play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else np.nan)
431
+ start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else np.nan)
432
+ end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else np.nan)
433
+ is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else np.nan)
434
+ type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else np.nan)
435
+
436
+
437
+
438
+ is_swing.append(np.nan)
439
+ is_whiff.append(np.nan)
440
+ start_speed.append(np.nan)
441
+ end_speed.append(np.nan)
442
+ sz_top.append(np.nan)
443
+ sz_bot.append(np.nan)
444
+ x.append(np.nan)
445
+ y.append(np.nan)
446
+ ax.append(np.nan)
447
+ ay.append(np.nan)
448
+ az.append(np.nan)
449
+ pfxx.append(np.nan)
450
+ pfxz.append(np.nan)
451
+ px.append(np.nan)
452
+ pz.append(np.nan)
453
+ vx0.append(np.nan)
454
+ vy0.append(np.nan)
455
+ vz0.append(np.nan)
456
+ x0.append(np.nan)
457
+ y0.append(np.nan)
458
+ z0.append(np.nan)
459
+ zone.append(np.nan)
460
+ type_confidence.append(np.nan)
461
+ plate_time.append(np.nan)
462
+ extension.append(np.nan)
463
+ spin_rate.append(np.nan)
464
+ spin_direction.append(np.nan)
465
+ ivb.append(np.nan)
466
+ hb.append(np.nan)
467
+ launch_speed.append(np.nan)
468
+ launch_angle.append(np.nan)
469
+ launch_distance.append(np.nan)
470
+ launch_location.append(np.nan)
471
+ trajectory.append(np.nan)
472
+ hardness.append(np.nan)
473
+ hit_x.append(np.nan)
474
+ hit_y.append(np.nan)
475
+ type_ab.append(np.nan)
476
+ ab_number.append(np.nan)
477
+
478
+ rbi.append(np.nan)
479
+ away_score.append(np.nan)
480
+ home_score.append(np.nan)
481
+ is_out.append(np.nan)
482
+ print({
483
+ 'game_id':len(game_id),
484
+ 'game_date':len(game_date),
485
+ 'batter_id':len(batter_id),
486
+ 'batter_name':len(batter_name),
487
+ 'batter_hand':len(batter_hand),
488
+ 'batter_team':len(batter_team),
489
+ 'batter_team_id':len(batter_team_id),
490
+ 'pitcher_id':len(pitcher_id),
491
+ 'pitcher_name':len(pitcher_name),
492
+ 'pitcher_hand':len(pitcher_hand),
493
+ 'pitcher_team':len(pitcher_team),
494
+ 'pitcher_team_id':len(pitcher_team_id),
495
+ 'play_description':len(play_description),
496
+ 'play_code':len(play_code),
497
+ 'in_play':len(in_play),
498
+ 'is_strike':len(is_strike),
499
+ 'is_swing':len(is_swing),
500
+ 'is_whiff':len(is_whiff),
501
+ 'is_out':len(is_out),
502
+ 'is_ball':len(is_ball),
503
+ 'is_review':len(is_review),
504
+ 'pitch_type':len(pitch_type),
505
+ 'pitch_description':len(pitch_description),
506
+ 'strikes':len(strikes),
507
+ 'balls':len(balls),
508
+ 'outs':len(outs),
509
+ 'start_speed':len(start_speed),
510
+ 'end_speed':len(end_speed),
511
+ 'sz_top':len(sz_top),
512
+ 'sz_bot':len(sz_bot),
513
+ 'x':len(x),
514
+ 'y':len(y),
515
+ 'ax':len(ax),
516
+ 'ay':len(ay),
517
+ 'az':len(az),
518
+ 'pfxx':len(pfxx),
519
+ 'pfxz':len(pfxz),
520
+ 'px':len(px),
521
+ 'pz':len(pz),
522
+ 'vx0':len(vx0),
523
+ 'vy0':len(vy0),
524
+ 'vz0':len(vz0),
525
+ 'x0':len(x0),
526
+ 'y0':len(y0),
527
+ 'z0':len(z0),
528
+ 'zone':len(zone),
529
+ 'type_confidence':len(type_confidence),
530
+ 'plate_time':len(plate_time),
531
+ 'extension':len(extension),
532
+ 'spin_rate':len(spin_rate),
533
+ 'spin_direction':len(spin_direction),
534
+ 'ivb':len(ivb),
535
+ 'hb':len(hb),
536
+ 'launch_speed':len(launch_speed),
537
+ 'launch_angle':len(launch_angle),
538
+ 'launch_distance':len(launch_distance),
539
+ 'launch_location':len(launch_location),
540
+ 'trajectory':len(trajectory),
541
+ 'hardness':len(hardness),
542
+ 'hit_x':len(hit_x),
543
+ 'hit_y':len(hit_y),
544
+ 'index_play':len(index_play),
545
+ 'play_id':len(play_id),
546
+ 'start_time':len(start_time),
547
+ 'end_time':len(end_time),
548
+ 'is_pitch':len(is_pitch),
549
+ 'type_type':len(type_type),
550
+ 'type_ab':len(type_ab),
551
+ 'event':len(event),
552
+ 'event_type':len(event_type),
553
+ 'rbi':len(rbi),
554
+ 'away_score':len(away_score),
555
+ 'home_score':len(home_score),
556
+ }
557
+
558
+
559
+ )
560
+ df = pd.DataFrame(data={
561
+ 'game_id':game_id,
562
+ 'game_date':game_date,
563
+ 'batter_id':batter_id,
564
+ 'batter_name':batter_name,
565
+ 'batter_hand':batter_hand,
566
+ 'batter_team':batter_team,
567
+ 'batter_team_id':batter_team_id,
568
+ 'pitcher_id':pitcher_id,
569
+ 'pitcher_name':pitcher_name,
570
+ 'pitcher_hand':pitcher_hand,
571
+ 'pitcher_team':pitcher_team,
572
+ 'pitcher_team_id':pitcher_team_id,
573
+ 'play_description':play_description,
574
+ 'play_code':play_code,
575
+ 'in_play':in_play,
576
+ 'is_strike':is_strike,
577
+ 'is_swing':is_swing,
578
+ 'is_whiff':is_whiff,
579
+ 'is_out':is_out,
580
+ 'is_ball':is_ball,
581
+ 'is_review':is_review,
582
+ 'pitch_type':pitch_type,
583
+ 'pitch_description':pitch_description,
584
+ 'strikes':strikes,
585
+ 'balls':balls,
586
+ 'outs':outs,
587
+ 'start_speed':start_speed,
588
+ 'end_speed':end_speed,
589
+ 'sz_top':sz_top,
590
+ 'sz_bot':sz_bot,
591
+ 'x':x,
592
+ 'y':y,
593
+ 'ax':ax,
594
+ 'ay':ay,
595
+ 'az':az,
596
+ 'pfxx':pfxx,
597
+ 'pfxz':pfxz,
598
+ 'px':px,
599
+ 'pz':pz,
600
+ 'vx0':vx0,
601
+ 'vy0':vy0,
602
+ 'vz0':vz0,
603
+ 'x0':x0,
604
+ 'y0':y0,
605
+ 'z0':z0,
606
+ 'zone':zone,
607
+ 'type_confidence':type_confidence,
608
+ 'plate_time':plate_time,
609
+ 'extension':extension,
610
+ 'spin_rate':spin_rate,
611
+ 'spin_direction':spin_direction,
612
+ 'ivb':ivb,
613
+ 'hb':hb,
614
+ 'launch_speed':launch_speed,
615
+ 'launch_angle':launch_angle,
616
+ 'launch_distance':launch_distance,
617
+ 'launch_location':launch_location,
618
+ 'trajectory':trajectory,
619
+ 'hardness':hardness,
620
+ 'hit_x':hit_x,
621
+ 'hit_y':hit_y,
622
+ 'index_play':index_play,
623
+ 'play_id':play_id,
624
+ 'start_time':start_time,
625
+ 'end_time':end_time,
626
+ 'is_pitch':is_pitch,
627
+ 'type_type':type_type,
628
+ 'type_ab':type_ab,
629
+ 'event':event,
630
+ 'event_type':event_type,
631
+ 'rbi':rbi,
632
+ 'away_score':away_score,
633
+ 'home_score':home_score,
634
+
635
+ }
636
+ )
637
+ return df
638
+
639
+ def get_players(self,sport_id=1):
640
+ player_data = requests.get(url=f'https://statsapi.mlb.com/api/v1/sports/{sport_id}/players').json()
641
+
642
+ #Select relevant data that will help distinguish players from one another
643
+ fullName_list = [x['fullName'] for x in player_data['people']]
644
+ id_list = [x['id'] for x in player_data['people']]
645
+ position_list = [x['primaryPosition']['abbreviation'] for x in player_data['people']]
646
+ team_list = [x['currentTeam']['id']for x in player_data['people']]
647
+ age_list = [x['currentAge']for x in player_data['people']]
648
+
649
+ player_df = pd.DataFrame(data={'player_id':id_list,
650
+ 'name':fullName_list,
651
+ 'position':position_list,
652
+ 'team':team_list,
653
+ 'age':age_list})
654
+ return player_df
655
+
656
+ def get_teams(self):
657
+ teams = requests.get(url='https://statsapi.mlb.com/api/v1/teams/').json()
658
+ #Select only teams that are at the MLB level
659
+ # mlb_teams_city = [x['franchiseName'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
660
+ # mlb_teams_name = [x['teamName'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
661
+ # mlb_teams_franchise = [x['name'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
662
+ # mlb_teams_id = [x['id'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
663
+ # mlb_teams_abb = [x['abbreviation'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
664
+
665
+ mlb_teams_city = [x['franchiseName'] if 'franchiseName' in x else None for x in teams['teams']]
666
+ mlb_teams_name = [x['teamName'] if 'franchiseName' in x else None for x in teams['teams']]
667
+ mlb_teams_franchise = [x['name'] if 'franchiseName' in x else None for x in teams['teams']]
668
+ mlb_teams_id = [x['id'] if 'franchiseName' in x else None for x in teams['teams']]
669
+ mlb_teams_abb = [x['abbreviation'] if 'franchiseName' in x else None for x in teams['teams']]
670
+ mlb_teams_parent_id = [x['parentOrgId'] if 'parentOrgId' in x else None for x in teams['teams']]
671
+ mlb_teams_parent = [x['parentOrgName'] if 'parentOrgName' in x else None for x in teams['teams']]
672
+ mlb_teams_league_id = [x['league']['id'] if 'id' in x['league'] else None for x in teams['teams']]
673
+ mlb_teams_league_name = [x['league']['name'] if 'name' in x['league'] else None for x in teams['teams']]
674
+
675
+
676
+
677
+ #Create a dataframe of all the teams
678
+ mlb_teams_df = pd.DataFrame(data={'team_id':mlb_teams_id,
679
+ 'city':mlb_teams_franchise,
680
+ 'name':mlb_teams_name,
681
+ 'franchise':mlb_teams_franchise,
682
+ 'abbreviation':mlb_teams_abb,
683
+ 'parent_org_id':mlb_teams_parent_id,
684
+ 'parent_org':mlb_teams_parent,
685
+ 'league_id':mlb_teams_league_id,
686
+ 'league_name':mlb_teams_league_name
687
+
688
+ }).drop_duplicates().dropna(subset=['team_id']).reset_index(drop=True).sort_values('team_id')
689
+
690
+ mlb_teams_df.loc[mlb_teams_df['parent_org_id'].isnull(),'parent_org_id'] = mlb_teams_df.loc[mlb_teams_df['parent_org_id'].isnull(),'team_id']
691
+ mlb_teams_df.loc[mlb_teams_df['parent_org'].isnull(),'parent_org'] = mlb_teams_df.loc[mlb_teams_df['parent_org'].isnull(),'franchise']
692
+
693
+
694
+ mlb_teams_df['parent_org_abbreviation'] = mlb_teams_df['parent_org_id'].map(mlb_teams_df.set_index('team_id')['abbreviation'].to_dict())
695
+
696
+
697
+ #mlb_teams_df.loc[mlb_teams_df.franchise.isin(mlb_teams_df.parent_org.unique()),'parent_org'] = mlb_teams_df.loc[mlb_teams_df.franchise.isin(mlb_teams_df.parent_org.unique()),'franchise']
698
+
699
+ return mlb_teams_df
700
+
701
+ def get_leagues(self):
702
+ leagues = requests.get(url='https://statsapi.mlb.com/api/v1/leagues/').json()
703
+
704
+ sport_id = [x['sport']['id'] if 'sport' in x else None for x in leagues['leagues']]
705
+ league_id = [x['id'] if 'id' in x else None for x in leagues['leagues']]
706
+ league_name = [x['name'] if 'name' in x else None for x in leagues['leagues']]
707
+ league_abbreviation = [x['abbreviation'] if 'abbreviation' in x else None for x in leagues['leagues']]
708
+
709
+
710
+
711
+ leagues_df = pd.DataFrame(data= {
712
+ 'league_id':league_id,
713
+ 'league_name':league_name,
714
+ 'league_abbreviation':league_abbreviation,
715
+ 'sport_id':sport_id,
716
+ })
717
+
718
+ return leagues_df
719
+
720
+ def get_player_games_list(self,player_id=691587):
721
+ player_game_list = [x['game']['gamePk'] for x in requests.get(url=f'http://statsapi.mlb.com/api/v1/people/{player_id}?hydrate=stats(type=gameLog,season=2023),hydrations').json()['people'][0]['stats'][0]['splits']]
722
+ return player_game_list
723
+
724
+ def get_team_schedule(self,year=2023,sport_id=1,mlb_team='Toronto Blue Jays'):
725
+ if not self.get_sport_id_check(sport_id=sport_id):
726
+ print('Please Select a New Sport ID from the following')
727
+ print(self.get_sport_id())
728
+ return False, False
729
+
730
+ schedule_df = self.get_schedule(year_input=year,sport_id=sport_id)
731
+ teams_df = self.get_teams().merge(self.get_leagues()).merge(self.get_sport_id(),left_on=['sport_id'],right_index=True,suffixes=['','_sport'])
732
+ teams_df = teams_df[teams_df['sport_id'] == sport_id]
733
+ team_abb_select = teams_df[teams_df['parent_org'] == mlb_team]['abbreviation'].values[0]
734
+ team_name_select = teams_df[teams_df['parent_org'] == mlb_team]['franchise'].values[0]
735
+ schedule_df = schedule_df[((schedule_df.away == team_name_select) | (schedule_df.home == team_name_select)) & (schedule_df.state == 'F')].reset_index(drop=True)
736
+ return schedule_df,teams_df
737
+
738
+ def get_team_game_data(self,year=2023,sport_id=1,mlb_team='Toronto Blue Jays'):
739
+ schedule_df,teams_df = self.get_team_schedule(year=year,sport_id=sport_id,mlb_team=mlb_team)
740
+ if not schedule_df:
741
+ return
742
+ data = self.get_data(schedule_df['game_id'][:])
743
+ df = self.get_data_df(data_list = data)
744
+ df['mlb_team'] = teams_df[teams_df['parent_org'] == mlb_team]['parent_org_abbreviation'].values[0]
745
+ df['level'] = teams_df[teams_df['parent_org'] == mlb_team]['abbreviation_sport'].values[0]
746
+
747
+ return df