Spaces:
Running
Running
Upload 23 files
Browse files- Dockerfile +11 -15
- __pycache__/api_scraper.cpython-39.pyc +0 -0
- __pycache__/app.cpython-39.pyc +0 -0
- __pycache__/pitch_summary_functions.cpython-39.pyc +0 -0
- api_scraper.py +747 -0
- app.py +682 -454
- joblib_model/barrel_model.joblib +3 -0
- joblib_model/in_zone.joblib +3 -0
- joblib_model/model_attack_zone.joblib +3 -0
- joblib_model/no_swing.joblib +3 -0
- joblib_model/swing.joblib +3 -0
- joblib_model/tjloc_model_20240311.joblib +3 -0
- joblib_model/tjstuff_model_20240123.joblib +3 -0
- joblib_model/tjstuff_model_20240317.joblib +3 -0
- joblib_model/tjstuff_model_20240318.joblib +3 -0
- joblib_model/xwoba_model.joblib +3 -0
- pitch_summary_functions.py +1005 -0
- statcast_pitch_summary.csv +18 -0
- team_logos.csv +32 -0
Dockerfile
CHANGED
@@ -1,20 +1,16 @@
|
|
1 |
FROM python:3.9
|
2 |
|
3 |
-
|
|
|
|
|
4 |
|
5 |
-
|
|
|
|
|
|
|
6 |
|
7 |
-
|
|
|
8 |
|
9 |
-
#
|
10 |
-
|
11 |
-
USER user
|
12 |
-
ENV HOME=/home/user \
|
13 |
-
PATH=/home/user/.local/bin:$PATH
|
14 |
-
|
15 |
-
|
16 |
-
COPY . .
|
17 |
-
|
18 |
-
EXPOSE 7860
|
19 |
-
|
20 |
-
CMD ["shiny", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
1 |
FROM python:3.9
|
2 |
|
3 |
+
# Install dependencies
|
4 |
+
COPY requirements.txt /app/
|
5 |
+
RUN pip install -r /app/requirements.txt
|
6 |
|
7 |
+
# Copy app files
|
8 |
+
COPY app_name /app/app_name/
|
9 |
+
COPY static/ /app/static/
|
10 |
+
COPY templates/ /app/templates/
|
11 |
|
12 |
+
# Set working directory
|
13 |
+
WORKDIR /app/app_name
|
14 |
|
15 |
+
# Set the command to run the app
|
16 |
+
CMD ["python", "app.py"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__pycache__/api_scraper.cpython-39.pyc
ADDED
Binary file (22.8 kB). View file
|
|
__pycache__/app.cpython-39.pyc
CHANGED
Binary files a/__pycache__/app.cpython-39.pyc and b/__pycache__/app.cpython-39.pyc differ
|
|
__pycache__/pitch_summary_functions.cpython-39.pyc
ADDED
Binary file (26.7 kB). View file
|
|
api_scraper.py
ADDED
@@ -0,0 +1,747 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from datetime import datetime
|
5 |
+
from tqdm import tqdm
|
6 |
+
import time
|
7 |
+
from pytz import timezone
|
8 |
+
|
9 |
+
|
10 |
+
class MLB_Scrape:
|
11 |
+
|
12 |
+
# def __init__(self):
|
13 |
+
# # Initialize your class here if needed
|
14 |
+
# pass
|
15 |
+
|
16 |
+
def get_sport_id(self):
|
17 |
+
df = pd.DataFrame(requests.get(url=f'https://statsapi.mlb.com/api/v1/sports').json()['sports']).set_index('id')
|
18 |
+
return df
|
19 |
+
|
20 |
+
def get_sport_id_check(self,sport_id):
|
21 |
+
sport_id_df = self.get_sport_id()
|
22 |
+
if sport_id not in sport_id_df.index:
|
23 |
+
print('Please Select a New Sport ID from the following')
|
24 |
+
print(sport_id_df)
|
25 |
+
return False
|
26 |
+
return True
|
27 |
+
|
28 |
+
def get_schedule(self,year_input=2023,
|
29 |
+
sport_id=1,
|
30 |
+
start_date='YYYY-MM-DD',
|
31 |
+
end_date='YYYY-MM-DD',
|
32 |
+
final=True,
|
33 |
+
regular=True,
|
34 |
+
spring=False):
|
35 |
+
# Get MLB Schedule
|
36 |
+
|
37 |
+
if not self.get_sport_id_check(sport_id=sport_id):
|
38 |
+
return
|
39 |
+
if regular == True:
|
40 |
+
game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&gameTypes=R&season={year_input}&hydrate=lineup,players').json()
|
41 |
+
print(f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&gameTypes=R&season={year_input}&hydrate=lineup,players')
|
42 |
+
elif spring == True:
|
43 |
+
print('spring')
|
44 |
+
game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&gameTypes=S&season={year_input}&hydrate=lineup,players').json()
|
45 |
+
print(f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&gameTypes=S&season={year_input}&hydrate=lineup,players')
|
46 |
+
else:
|
47 |
+
game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id}&season={year_input}&hydrate=lineup,players').json()
|
48 |
+
|
49 |
+
# Grab data from MLB Schedule (game id, away, home, state)
|
50 |
+
game_list = [item for sublist in [[y['gamePk'] for y in x['games']] for x in game_call['dates']] for item in sublist]
|
51 |
+
time_list = [item for sublist in [[y['gameDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
|
52 |
+
date_list = [item for sublist in [[y['officialDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
|
53 |
+
away_team_list = [item for sublist in [[y['teams']['away']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
|
54 |
+
home_team_list = [item for sublist in [[y['teams']['home']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
|
55 |
+
state_list = [item for sublist in [[y['status']['codedGameState'] for y in x['games']] for x in game_call['dates']] for item in sublist]
|
56 |
+
venue_id = [item for sublist in [[y['venue']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
|
57 |
+
venue_name = [item for sublist in [[y['venue']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
|
58 |
+
|
59 |
+
game_df = pd.DataFrame(data={'game_id':game_list,
|
60 |
+
'time':time_list,
|
61 |
+
'date':date_list,
|
62 |
+
'away':away_team_list,
|
63 |
+
'home':home_team_list,
|
64 |
+
'state':state_list,
|
65 |
+
'venue_id':venue_id,
|
66 |
+
'venue_name':venue_name})
|
67 |
+
|
68 |
+
# game_list = [item for sublist in [[y['gamePk'] for y in x['games']] for x in game_call['dates']] for item in sublist]
|
69 |
+
# date_list = [item for sublist in [[y['officialDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
|
70 |
+
# cancel_list = [item for sublist in [[y['status']['codedGameState'] for y in x['games']] for x in game_call['dates']] for item in sublist]
|
71 |
+
# game_df = pd.DataFrame(data={'game_id':game_list,'date':date_list,'state':cancel_list})
|
72 |
+
#game_df = pd.concat([game_df,game_df])
|
73 |
+
if len(game_df) == 0:
|
74 |
+
return 'Schedule Length of 0, please select different parameters.'
|
75 |
+
|
76 |
+
game_df['date'] = pd.to_datetime(game_df['date']).dt.date
|
77 |
+
#game_df['time'] = game_df['time'].dt.tz_localize('UTC')
|
78 |
+
#game_df['time'] = game_df['time'].dt.tz_localize('UTC')
|
79 |
+
game_df['time'] = pd.to_datetime(game_df['time'])
|
80 |
+
eastern = timezone('US/Eastern')
|
81 |
+
game_df['time'] = game_df['time'].dt.tz_convert(eastern)
|
82 |
+
game_df['time'] = game_df['time'].dt.strftime("%I:%M %p EST")#.dt.time
|
83 |
+
|
84 |
+
if not start_date == 'YYYY-MM-DD' or not end_date == 'YYYY-MM-DD':
|
85 |
+
try:
|
86 |
+
start_date = datetime.strptime(start_date, "%Y-%m-%d").date()
|
87 |
+
end_date = datetime.strptime(end_date, "%Y-%m-%d").date()
|
88 |
+
game_df = game_df[(game_df['date'] >= start_date) & (game_df['date'] <= end_date)]
|
89 |
+
|
90 |
+
except ValueError:
|
91 |
+
return 'Please use YYYY-MM-DD Format for Start and End Dates'
|
92 |
+
if final:
|
93 |
+
game_df = game_df[game_df['state'] == 'F'].drop_duplicates(subset='game_id').reset_index(drop=True)
|
94 |
+
|
95 |
+
game_df = game_df.drop_duplicates(subset='game_id').reset_index(drop=True)
|
96 |
+
|
97 |
+
if len(game_df) == 0:
|
98 |
+
return 'Schedule Length of 0, please select different parameters.'
|
99 |
+
|
100 |
+
return game_df
|
101 |
+
|
102 |
+
def get_data(self,game_list_input = [748540]):
|
103 |
+
data_total = []
|
104 |
+
#n_count = 0
|
105 |
+
print('This May Take a While. Progress Bar shows Completion of Data Retrieval.')
|
106 |
+
for i in tqdm(range(len(game_list_input)), desc="Processing", unit="iteration"):
|
107 |
+
#for game_id_select in game_list:
|
108 |
+
# if n_count%50 == 0:
|
109 |
+
# print(n_count)
|
110 |
+
r = requests.get(f'https://statsapi.mlb.com/api/v1.1/game/{game_list_input[i]}/feed/live')
|
111 |
+
data_total.append(r.json())
|
112 |
+
#n_count = n_count + 1
|
113 |
+
return data_total
|
114 |
+
|
115 |
+
def get_data_df(self,data_list):
|
116 |
+
|
117 |
+
swing_list = ['X','F','S','D','E','T','W']
|
118 |
+
whiff_list = ['S','T','W']
|
119 |
+
print('Converting Data to Dataframe.')
|
120 |
+
game_id = []
|
121 |
+
game_date = []
|
122 |
+
batter_id = []
|
123 |
+
batter_name = []
|
124 |
+
batter_hand = []
|
125 |
+
batter_team = []
|
126 |
+
batter_team_id = []
|
127 |
+
pitcher_id = []
|
128 |
+
pitcher_name = []
|
129 |
+
pitcher_hand = []
|
130 |
+
pitcher_team = []
|
131 |
+
pitcher_team_id = []
|
132 |
+
|
133 |
+
play_description = []
|
134 |
+
play_code = []
|
135 |
+
in_play = []
|
136 |
+
is_strike = []
|
137 |
+
is_swing = []
|
138 |
+
is_whiff = []
|
139 |
+
is_out = []
|
140 |
+
is_ball = []
|
141 |
+
is_review = []
|
142 |
+
pitch_type = []
|
143 |
+
pitch_description = []
|
144 |
+
strikes = []
|
145 |
+
balls = []
|
146 |
+
outs = []
|
147 |
+
|
148 |
+
start_speed = []
|
149 |
+
end_speed = []
|
150 |
+
sz_top = []
|
151 |
+
sz_bot = []
|
152 |
+
x = []
|
153 |
+
y = []
|
154 |
+
ax = []
|
155 |
+
ay = []
|
156 |
+
az = []
|
157 |
+
pfxx = []
|
158 |
+
pfxz = []
|
159 |
+
px = []
|
160 |
+
pz = []
|
161 |
+
vx0 = []
|
162 |
+
vy0 = []
|
163 |
+
vz0 = []
|
164 |
+
x0 = []
|
165 |
+
y0 = []
|
166 |
+
z0 = []
|
167 |
+
zone = []
|
168 |
+
type_confidence = []
|
169 |
+
plate_time = []
|
170 |
+
extension = []
|
171 |
+
spin_rate = []
|
172 |
+
spin_direction = []
|
173 |
+
ivb = []
|
174 |
+
hb = []
|
175 |
+
|
176 |
+
launch_speed = []
|
177 |
+
launch_angle = []
|
178 |
+
launch_distance = []
|
179 |
+
launch_location = []
|
180 |
+
trajectory = []
|
181 |
+
hardness = []
|
182 |
+
hit_x = []
|
183 |
+
hit_y = []
|
184 |
+
|
185 |
+
index_play = []
|
186 |
+
play_id = []
|
187 |
+
start_time = []
|
188 |
+
end_time = []
|
189 |
+
is_pitch = []
|
190 |
+
type_type = []
|
191 |
+
|
192 |
+
|
193 |
+
type_ab = []
|
194 |
+
ab_number = []
|
195 |
+
event = []
|
196 |
+
event_type = []
|
197 |
+
rbi = []
|
198 |
+
away_score = []
|
199 |
+
home_score = []
|
200 |
+
|
201 |
+
#data[0]['liveData']['plays']['allPlays'][32]['playEvents'][-1]['details']['call']['code'] in ['VP']
|
202 |
+
|
203 |
+
for data in data_list:
|
204 |
+
for ab_id in range(len(data['liveData']['plays']['allPlays'])):
|
205 |
+
ab_list = data['liveData']['plays']['allPlays'][ab_id]
|
206 |
+
for n in range(len(ab_list['playEvents'])):
|
207 |
+
if ab_list['playEvents'][n]['isPitch'] == True or 'call' in ab_list['playEvents'][n]['details']:
|
208 |
+
|
209 |
+
game_id.append(data['gamePk'])
|
210 |
+
game_date.append(data['gameData']['datetime']['officialDate'])
|
211 |
+
if 'matchup' in ab_list:
|
212 |
+
batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else np.nan)
|
213 |
+
if 'batter' in ab_list['matchup']:
|
214 |
+
batter_name.append(ab_list['matchup']['batter']['fullName'] if 'fullName' in ab_list['matchup']['batter'] else np.nan)
|
215 |
+
else:
|
216 |
+
batter_name.append(np.nan)
|
217 |
+
|
218 |
+
batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else np.nan)
|
219 |
+
pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else np.nan)
|
220 |
+
if 'pitcher' in ab_list['matchup']:
|
221 |
+
pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'fullName' in ab_list['matchup']['pitcher'] else np.nan)
|
222 |
+
else:
|
223 |
+
pitcher_name.append(np.nan)
|
224 |
+
#pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'pitcher' in ab_list['matchup'] else np.nan)
|
225 |
+
pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else np.nan)
|
226 |
+
|
227 |
+
|
228 |
+
# batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else np.nan)
|
229 |
+
# batter_name.append(ab_list['matchup']['batter']['fullName'] if 'batter' in ab_list['matchup'] else np.nan)
|
230 |
+
# batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else np.nan)
|
231 |
+
# pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else np.nan)
|
232 |
+
# pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'pitcher' in ab_list['matchup'] else np.nan)
|
233 |
+
# pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else np.nan)
|
234 |
+
|
235 |
+
if ab_list['about']['isTopInning']:
|
236 |
+
batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else np.nan)
|
237 |
+
batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else np.nan)
|
238 |
+
pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else np.nan)
|
239 |
+
pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else np.nan)
|
240 |
+
|
241 |
+
else:
|
242 |
+
batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else np.nan)
|
243 |
+
batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else np.nan)
|
244 |
+
pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else np.nan)
|
245 |
+
pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else np.nan)
|
246 |
+
|
247 |
+
play_description.append(ab_list['playEvents'][n]['details']['description'] if 'description' in ab_list['playEvents'][n]['details'] else np.nan)
|
248 |
+
play_code.append(ab_list['playEvents'][n]['details']['code'] if 'code' in ab_list['playEvents'][n]['details'] else np.nan)
|
249 |
+
in_play.append(ab_list['playEvents'][n]['details']['isInPlay'] if 'isInPlay' in ab_list['playEvents'][n]['details'] else np.nan)
|
250 |
+
is_strike.append(ab_list['playEvents'][n]['details']['isStrike'] if 'isStrike' in ab_list['playEvents'][n]['details'] else np.nan)
|
251 |
+
|
252 |
+
if 'details' in ab_list['playEvents'][n]:
|
253 |
+
is_swing.append(True if ab_list['playEvents'][n]['details']['code'] in swing_list else np.nan)
|
254 |
+
is_whiff.append(True if ab_list['playEvents'][n]['details']['code'] in whiff_list else np.nan)
|
255 |
+
else:
|
256 |
+
is_swing.append(np.nan)
|
257 |
+
is_whiff.append(np.nan)
|
258 |
+
|
259 |
+
#is_out.append(ab_list['playEvents'][n]['details']['isBall'] if 'isBall' in ab_list['playEvents'][n]['details'] else np.nan)
|
260 |
+
is_ball.append(ab_list['playEvents'][n]['details']['isOut'] if 'isOut' in ab_list['playEvents'][n]['details'] else np.nan)
|
261 |
+
is_review.append(ab_list['playEvents'][n]['details']['hasReview'] if 'hasReview' in ab_list['playEvents'][n]['details'] else np.nan)
|
262 |
+
pitch_type.append(ab_list['playEvents'][n]['details']['type']['code'] if 'type' in ab_list['playEvents'][n]['details'] else np.nan)
|
263 |
+
pitch_description.append(ab_list['playEvents'][n]['details']['type']['description'] if 'type' in ab_list['playEvents'][n]['details'] else np.nan)
|
264 |
+
|
265 |
+
#if ab_list['playEvents'][n]['isPitch'] == True:
|
266 |
+
if ab_list['playEvents'][n]['pitchNumber'] == 1:
|
267 |
+
ab_number.append(ab_list['playEvents'][n]['atBatIndex'] if 'atBatIndex' in ab_list['playEvents'][n] else np.nan)
|
268 |
+
strikes.append(0)
|
269 |
+
balls.append(0)
|
270 |
+
outs.append(0)
|
271 |
+
else:
|
272 |
+
ab_number.append(ab_list['playEvents'][n]['atBatIndex'] if 'atBatIndex' in ab_list['playEvents'][n] else np.nan)
|
273 |
+
strikes.append(ab_list['playEvents'][n-1]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n-1]['count'] else np.nan)
|
274 |
+
balls.append(ab_list['playEvents'][n-1]['count']['balls'] if 'balls' in ab_list['playEvents'][n-1]['count'] else np.nan)
|
275 |
+
outs.append(ab_list['playEvents'][n-1]['count']['outs'] if 'outs' in ab_list['playEvents'][n-1]['count'] else np.nan)
|
276 |
+
|
277 |
+
if 'pitchData' in ab_list['playEvents'][n]:
|
278 |
+
|
279 |
+
start_speed.append(ab_list['playEvents'][n]['pitchData']['startSpeed'] if 'startSpeed' in ab_list['playEvents'][n]['pitchData'] else np.nan)
|
280 |
+
end_speed.append(ab_list['playEvents'][n]['pitchData']['endSpeed'] if 'endSpeed' in ab_list['playEvents'][n]['pitchData'] else np.nan)
|
281 |
+
|
282 |
+
sz_top.append(ab_list['playEvents'][n]['pitchData']['strikeZoneTop'] if 'strikeZoneTop' in ab_list['playEvents'][n]['pitchData'] else np.nan)
|
283 |
+
sz_bot.append(ab_list['playEvents'][n]['pitchData']['strikeZoneBottom'] if 'strikeZoneBottom' in ab_list['playEvents'][n]['pitchData'] else np.nan)
|
284 |
+
x.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x'] if 'x' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
285 |
+
y.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y'] if 'y' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
286 |
+
|
287 |
+
ax.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aX'] if 'aX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
288 |
+
ay.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aY'] if 'aY' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
289 |
+
az.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aZ'] if 'aZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
290 |
+
pfxx.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxX'] if 'pfxX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
291 |
+
pfxz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxZ'] if 'pfxZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
292 |
+
px.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pX'] if 'pX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
293 |
+
pz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pZ'] if 'pZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
294 |
+
vx0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vX0'] if 'vX0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
295 |
+
vy0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vY0'] if 'vY0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
296 |
+
vz0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vZ0'] if 'vZ0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
297 |
+
x0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x0'] if 'x0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
298 |
+
y0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y0'] if 'y0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
299 |
+
z0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['z0'] if 'z0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else np.nan)
|
300 |
+
|
301 |
+
zone.append(ab_list['playEvents'][n]['pitchData']['zone'] if 'zone' in ab_list['playEvents'][n]['pitchData'] else np.nan)
|
302 |
+
type_confidence.append(ab_list['playEvents'][n]['pitchData']['typeConfidence'] if 'typeConfidence' in ab_list['playEvents'][n]['pitchData'] else np.nan)
|
303 |
+
plate_time.append(ab_list['playEvents'][n]['pitchData']['plateTime'] if 'plateTime' in ab_list['playEvents'][n]['pitchData'] else np.nan)
|
304 |
+
extension.append(ab_list['playEvents'][n]['pitchData']['extension'] if 'extension' in ab_list['playEvents'][n]['pitchData'] else np.nan)
|
305 |
+
|
306 |
+
if 'breaks' in ab_list['playEvents'][n]['pitchData']:
|
307 |
+
spin_rate.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinRate'] if 'spinRate' in ab_list['playEvents'][n]['pitchData']['breaks'] else np.nan)
|
308 |
+
spin_direction.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinDirection'] if 'spinDirection' in ab_list['playEvents'][n]['pitchData']['breaks'] else np.nan)
|
309 |
+
ivb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakVerticalInduced'] if 'breakVerticalInduced' in ab_list['playEvents'][n]['pitchData']['breaks'] else np.nan)
|
310 |
+
hb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakHorizontal'] if 'breakHorizontal' in ab_list['playEvents'][n]['pitchData']['breaks'] else np.nan)
|
311 |
+
|
312 |
+
else:
|
313 |
+
start_speed.append(np.nan)
|
314 |
+
end_speed.append(np.nan)
|
315 |
+
|
316 |
+
sz_top.append(np.nan)
|
317 |
+
sz_bot.append(np.nan)
|
318 |
+
x.append(np.nan)
|
319 |
+
y.append(np.nan)
|
320 |
+
|
321 |
+
ax.append(np.nan)
|
322 |
+
ay.append(np.nan)
|
323 |
+
az.append(np.nan)
|
324 |
+
pfxx.append(np.nan)
|
325 |
+
pfxz.append(np.nan)
|
326 |
+
px.append(np.nan)
|
327 |
+
pz.append(np.nan)
|
328 |
+
vx0.append(np.nan)
|
329 |
+
vy0.append(np.nan)
|
330 |
+
vz0.append(np.nan)
|
331 |
+
x0.append(np.nan)
|
332 |
+
y0.append(np.nan)
|
333 |
+
z0.append(np.nan)
|
334 |
+
|
335 |
+
zone.append(np.nan)
|
336 |
+
type_confidence.append(np.nan)
|
337 |
+
plate_time.append(np.nan)
|
338 |
+
extension.append(np.nan)
|
339 |
+
spin_rate.append(np.nan)
|
340 |
+
spin_direction.append(np.nan)
|
341 |
+
ivb.append(np.nan)
|
342 |
+
hb.append(np.nan)
|
343 |
+
|
344 |
+
if 'hitData' in ab_list['playEvents'][n]:
|
345 |
+
launch_speed.append(ab_list['playEvents'][n]['hitData']['launchSpeed'] if 'launchSpeed' in ab_list['playEvents'][n]['hitData'] else np.nan)
|
346 |
+
launch_angle.append(ab_list['playEvents'][n]['hitData']['launchAngle'] if 'launchAngle' in ab_list['playEvents'][n]['hitData'] else np.nan)
|
347 |
+
launch_distance.append(ab_list['playEvents'][n]['hitData']['totalDistance'] if 'totalDistance' in ab_list['playEvents'][n]['hitData'] else np.nan)
|
348 |
+
launch_location.append(ab_list['playEvents'][n]['hitData']['location'] if 'location' in ab_list['playEvents'][n]['hitData'] else np.nan)
|
349 |
+
|
350 |
+
trajectory.append(ab_list['playEvents'][n]['hitData']['trajectory'] if 'trajectory' in ab_list['playEvents'][n]['hitData'] else np.nan)
|
351 |
+
hardness.append(ab_list['playEvents'][n]['hitData']['hardness'] if 'hardness' in ab_list['playEvents'][n]['hitData'] else np.nan)
|
352 |
+
hit_x.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordX'] if 'coordX' in ab_list['playEvents'][n]['hitData']['coordinates'] else np.nan)
|
353 |
+
hit_y.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordY'] if 'coordY' in ab_list['playEvents'][n]['hitData']['coordinates'] else np.nan)
|
354 |
+
else:
|
355 |
+
launch_speed.append(np.nan)
|
356 |
+
launch_angle.append(np.nan)
|
357 |
+
launch_distance.append(np.nan)
|
358 |
+
launch_location.append(np.nan)
|
359 |
+
trajectory.append(np.nan)
|
360 |
+
hardness.append(np.nan)
|
361 |
+
hit_x.append(np.nan)
|
362 |
+
hit_y.append(np.nan)
|
363 |
+
|
364 |
+
index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else np.nan)
|
365 |
+
play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else np.nan)
|
366 |
+
start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else np.nan)
|
367 |
+
end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else np.nan)
|
368 |
+
is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else np.nan)
|
369 |
+
type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else np.nan)
|
370 |
+
|
371 |
+
|
372 |
+
|
373 |
+
if n == len(ab_list['playEvents']) - 1 :
|
374 |
+
|
375 |
+
type_ab.append(data['liveData']['plays']['allPlays'][ab_id]['result']['type'] if 'type' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
|
376 |
+
event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'] if 'event' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
|
377 |
+
event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'] if 'eventType' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
|
378 |
+
rbi.append(data['liveData']['plays']['allPlays'][ab_id]['result']['rbi'] if 'rbi' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
|
379 |
+
away_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['awayScore'] if 'awayScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
|
380 |
+
home_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['homeScore'] if 'homeScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
|
381 |
+
is_out.append(data['liveData']['plays']['allPlays'][ab_id]['result']['isOut'] if 'isOut' in data['liveData']['plays']['allPlays'][ab_id]['result'] else np.nan)
|
382 |
+
|
383 |
+
else:
|
384 |
+
|
385 |
+
type_ab.append(np.nan)
|
386 |
+
event.append(np.nan)
|
387 |
+
event_type.append(np.nan)
|
388 |
+
rbi.append(np.nan)
|
389 |
+
away_score.append(np.nan)
|
390 |
+
home_score.append(np.nan)
|
391 |
+
is_out.append(np.nan)
|
392 |
+
|
393 |
+
elif ab_list['playEvents'][n]['count']['balls'] == 4:
|
394 |
+
|
395 |
+
event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'])
|
396 |
+
event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'])
|
397 |
+
|
398 |
+
|
399 |
+
game_id.append(data['gamePk'])
|
400 |
+
game_date.append(data['gameData']['datetime']['officialDate'])
|
401 |
+
batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else np.nan)
|
402 |
+
batter_name.append(ab_list['matchup']['batter']['fullName'] if 'batter' in ab_list['matchup'] else np.nan)
|
403 |
+
batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else np.nan)
|
404 |
+
pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else np.nan)
|
405 |
+
pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'pitcher' in ab_list['matchup'] else np.nan)
|
406 |
+
pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else np.nan)
|
407 |
+
if ab_list['about']['isTopInning']:
|
408 |
+
batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else np.nan)
|
409 |
+
batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else np.nan)
|
410 |
+
pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else np.nan)
|
411 |
+
pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else np.nan)
|
412 |
+
else:
|
413 |
+
batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else np.nan)
|
414 |
+
batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else np.nan)
|
415 |
+
pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else np.nan)
|
416 |
+
pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else np.nan)
|
417 |
+
|
418 |
+
play_description.append(np.nan)
|
419 |
+
play_code.append(np.nan)
|
420 |
+
in_play.append(np.nan)
|
421 |
+
is_strike.append(np.nan)
|
422 |
+
is_ball.append(np.nan)
|
423 |
+
is_review.append(np.nan)
|
424 |
+
pitch_type.append(np.nan)
|
425 |
+
pitch_description.append(np.nan)
|
426 |
+
strikes.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else np.nan)
|
427 |
+
balls.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else np.nan)
|
428 |
+
outs.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else np.nan)
|
429 |
+
index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else np.nan)
|
430 |
+
play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else np.nan)
|
431 |
+
start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else np.nan)
|
432 |
+
end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else np.nan)
|
433 |
+
is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else np.nan)
|
434 |
+
type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else np.nan)
|
435 |
+
|
436 |
+
|
437 |
+
|
438 |
+
is_swing.append(np.nan)
|
439 |
+
is_whiff.append(np.nan)
|
440 |
+
start_speed.append(np.nan)
|
441 |
+
end_speed.append(np.nan)
|
442 |
+
sz_top.append(np.nan)
|
443 |
+
sz_bot.append(np.nan)
|
444 |
+
x.append(np.nan)
|
445 |
+
y.append(np.nan)
|
446 |
+
ax.append(np.nan)
|
447 |
+
ay.append(np.nan)
|
448 |
+
az.append(np.nan)
|
449 |
+
pfxx.append(np.nan)
|
450 |
+
pfxz.append(np.nan)
|
451 |
+
px.append(np.nan)
|
452 |
+
pz.append(np.nan)
|
453 |
+
vx0.append(np.nan)
|
454 |
+
vy0.append(np.nan)
|
455 |
+
vz0.append(np.nan)
|
456 |
+
x0.append(np.nan)
|
457 |
+
y0.append(np.nan)
|
458 |
+
z0.append(np.nan)
|
459 |
+
zone.append(np.nan)
|
460 |
+
type_confidence.append(np.nan)
|
461 |
+
plate_time.append(np.nan)
|
462 |
+
extension.append(np.nan)
|
463 |
+
spin_rate.append(np.nan)
|
464 |
+
spin_direction.append(np.nan)
|
465 |
+
ivb.append(np.nan)
|
466 |
+
hb.append(np.nan)
|
467 |
+
launch_speed.append(np.nan)
|
468 |
+
launch_angle.append(np.nan)
|
469 |
+
launch_distance.append(np.nan)
|
470 |
+
launch_location.append(np.nan)
|
471 |
+
trajectory.append(np.nan)
|
472 |
+
hardness.append(np.nan)
|
473 |
+
hit_x.append(np.nan)
|
474 |
+
hit_y.append(np.nan)
|
475 |
+
type_ab.append(np.nan)
|
476 |
+
ab_number.append(np.nan)
|
477 |
+
|
478 |
+
rbi.append(np.nan)
|
479 |
+
away_score.append(np.nan)
|
480 |
+
home_score.append(np.nan)
|
481 |
+
is_out.append(np.nan)
|
482 |
+
print({
|
483 |
+
'game_id':len(game_id),
|
484 |
+
'game_date':len(game_date),
|
485 |
+
'batter_id':len(batter_id),
|
486 |
+
'batter_name':len(batter_name),
|
487 |
+
'batter_hand':len(batter_hand),
|
488 |
+
'batter_team':len(batter_team),
|
489 |
+
'batter_team_id':len(batter_team_id),
|
490 |
+
'pitcher_id':len(pitcher_id),
|
491 |
+
'pitcher_name':len(pitcher_name),
|
492 |
+
'pitcher_hand':len(pitcher_hand),
|
493 |
+
'pitcher_team':len(pitcher_team),
|
494 |
+
'pitcher_team_id':len(pitcher_team_id),
|
495 |
+
'play_description':len(play_description),
|
496 |
+
'play_code':len(play_code),
|
497 |
+
'in_play':len(in_play),
|
498 |
+
'is_strike':len(is_strike),
|
499 |
+
'is_swing':len(is_swing),
|
500 |
+
'is_whiff':len(is_whiff),
|
501 |
+
'is_out':len(is_out),
|
502 |
+
'is_ball':len(is_ball),
|
503 |
+
'is_review':len(is_review),
|
504 |
+
'pitch_type':len(pitch_type),
|
505 |
+
'pitch_description':len(pitch_description),
|
506 |
+
'strikes':len(strikes),
|
507 |
+
'balls':len(balls),
|
508 |
+
'outs':len(outs),
|
509 |
+
'start_speed':len(start_speed),
|
510 |
+
'end_speed':len(end_speed),
|
511 |
+
'sz_top':len(sz_top),
|
512 |
+
'sz_bot':len(sz_bot),
|
513 |
+
'x':len(x),
|
514 |
+
'y':len(y),
|
515 |
+
'ax':len(ax),
|
516 |
+
'ay':len(ay),
|
517 |
+
'az':len(az),
|
518 |
+
'pfxx':len(pfxx),
|
519 |
+
'pfxz':len(pfxz),
|
520 |
+
'px':len(px),
|
521 |
+
'pz':len(pz),
|
522 |
+
'vx0':len(vx0),
|
523 |
+
'vy0':len(vy0),
|
524 |
+
'vz0':len(vz0),
|
525 |
+
'x0':len(x0),
|
526 |
+
'y0':len(y0),
|
527 |
+
'z0':len(z0),
|
528 |
+
'zone':len(zone),
|
529 |
+
'type_confidence':len(type_confidence),
|
530 |
+
'plate_time':len(plate_time),
|
531 |
+
'extension':len(extension),
|
532 |
+
'spin_rate':len(spin_rate),
|
533 |
+
'spin_direction':len(spin_direction),
|
534 |
+
'ivb':len(ivb),
|
535 |
+
'hb':len(hb),
|
536 |
+
'launch_speed':len(launch_speed),
|
537 |
+
'launch_angle':len(launch_angle),
|
538 |
+
'launch_distance':len(launch_distance),
|
539 |
+
'launch_location':len(launch_location),
|
540 |
+
'trajectory':len(trajectory),
|
541 |
+
'hardness':len(hardness),
|
542 |
+
'hit_x':len(hit_x),
|
543 |
+
'hit_y':len(hit_y),
|
544 |
+
'index_play':len(index_play),
|
545 |
+
'play_id':len(play_id),
|
546 |
+
'start_time':len(start_time),
|
547 |
+
'end_time':len(end_time),
|
548 |
+
'is_pitch':len(is_pitch),
|
549 |
+
'type_type':len(type_type),
|
550 |
+
'type_ab':len(type_ab),
|
551 |
+
'event':len(event),
|
552 |
+
'event_type':len(event_type),
|
553 |
+
'rbi':len(rbi),
|
554 |
+
'away_score':len(away_score),
|
555 |
+
'home_score':len(home_score),
|
556 |
+
}
|
557 |
+
|
558 |
+
|
559 |
+
)
|
560 |
+
df = pd.DataFrame(data={
|
561 |
+
'game_id':game_id,
|
562 |
+
'game_date':game_date,
|
563 |
+
'batter_id':batter_id,
|
564 |
+
'batter_name':batter_name,
|
565 |
+
'batter_hand':batter_hand,
|
566 |
+
'batter_team':batter_team,
|
567 |
+
'batter_team_id':batter_team_id,
|
568 |
+
'pitcher_id':pitcher_id,
|
569 |
+
'pitcher_name':pitcher_name,
|
570 |
+
'pitcher_hand':pitcher_hand,
|
571 |
+
'pitcher_team':pitcher_team,
|
572 |
+
'pitcher_team_id':pitcher_team_id,
|
573 |
+
'play_description':play_description,
|
574 |
+
'play_code':play_code,
|
575 |
+
'in_play':in_play,
|
576 |
+
'is_strike':is_strike,
|
577 |
+
'is_swing':is_swing,
|
578 |
+
'is_whiff':is_whiff,
|
579 |
+
'is_out':is_out,
|
580 |
+
'is_ball':is_ball,
|
581 |
+
'is_review':is_review,
|
582 |
+
'pitch_type':pitch_type,
|
583 |
+
'pitch_description':pitch_description,
|
584 |
+
'strikes':strikes,
|
585 |
+
'balls':balls,
|
586 |
+
'outs':outs,
|
587 |
+
'start_speed':start_speed,
|
588 |
+
'end_speed':end_speed,
|
589 |
+
'sz_top':sz_top,
|
590 |
+
'sz_bot':sz_bot,
|
591 |
+
'x':x,
|
592 |
+
'y':y,
|
593 |
+
'ax':ax,
|
594 |
+
'ay':ay,
|
595 |
+
'az':az,
|
596 |
+
'pfxx':pfxx,
|
597 |
+
'pfxz':pfxz,
|
598 |
+
'px':px,
|
599 |
+
'pz':pz,
|
600 |
+
'vx0':vx0,
|
601 |
+
'vy0':vy0,
|
602 |
+
'vz0':vz0,
|
603 |
+
'x0':x0,
|
604 |
+
'y0':y0,
|
605 |
+
'z0':z0,
|
606 |
+
'zone':zone,
|
607 |
+
'type_confidence':type_confidence,
|
608 |
+
'plate_time':plate_time,
|
609 |
+
'extension':extension,
|
610 |
+
'spin_rate':spin_rate,
|
611 |
+
'spin_direction':spin_direction,
|
612 |
+
'ivb':ivb,
|
613 |
+
'hb':hb,
|
614 |
+
'launch_speed':launch_speed,
|
615 |
+
'launch_angle':launch_angle,
|
616 |
+
'launch_distance':launch_distance,
|
617 |
+
'launch_location':launch_location,
|
618 |
+
'trajectory':trajectory,
|
619 |
+
'hardness':hardness,
|
620 |
+
'hit_x':hit_x,
|
621 |
+
'hit_y':hit_y,
|
622 |
+
'index_play':index_play,
|
623 |
+
'play_id':play_id,
|
624 |
+
'start_time':start_time,
|
625 |
+
'end_time':end_time,
|
626 |
+
'is_pitch':is_pitch,
|
627 |
+
'type_type':type_type,
|
628 |
+
'type_ab':type_ab,
|
629 |
+
'event':event,
|
630 |
+
'event_type':event_type,
|
631 |
+
'rbi':rbi,
|
632 |
+
'away_score':away_score,
|
633 |
+
'home_score':home_score,
|
634 |
+
|
635 |
+
}
|
636 |
+
)
|
637 |
+
return df
|
638 |
+
|
639 |
+
def get_players(self,sport_id=1):
|
640 |
+
player_data = requests.get(url=f'https://statsapi.mlb.com/api/v1/sports/{sport_id}/players').json()
|
641 |
+
|
642 |
+
#Select relevant data that will help distinguish players from one another
|
643 |
+
fullName_list = [x['fullName'] for x in player_data['people']]
|
644 |
+
id_list = [x['id'] for x in player_data['people']]
|
645 |
+
position_list = [x['primaryPosition']['abbreviation'] for x in player_data['people']]
|
646 |
+
team_list = [x['currentTeam']['id']for x in player_data['people']]
|
647 |
+
age_list = [x['currentAge']for x in player_data['people']]
|
648 |
+
|
649 |
+
player_df = pd.DataFrame(data={'player_id':id_list,
|
650 |
+
'name':fullName_list,
|
651 |
+
'position':position_list,
|
652 |
+
'team':team_list,
|
653 |
+
'age':age_list})
|
654 |
+
return player_df
|
655 |
+
|
656 |
+
def get_teams(self):
|
657 |
+
teams = requests.get(url='https://statsapi.mlb.com/api/v1/teams/').json()
|
658 |
+
#Select only teams that are at the MLB level
|
659 |
+
# mlb_teams_city = [x['franchiseName'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
|
660 |
+
# mlb_teams_name = [x['teamName'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
|
661 |
+
# mlb_teams_franchise = [x['name'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
|
662 |
+
# mlb_teams_id = [x['id'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
|
663 |
+
# mlb_teams_abb = [x['abbreviation'] for x in teams['teams'] if x['sport']['name'] == 'Major League Baseball']
|
664 |
+
|
665 |
+
mlb_teams_city = [x['franchiseName'] if 'franchiseName' in x else None for x in teams['teams']]
|
666 |
+
mlb_teams_name = [x['teamName'] if 'franchiseName' in x else None for x in teams['teams']]
|
667 |
+
mlb_teams_franchise = [x['name'] if 'franchiseName' in x else None for x in teams['teams']]
|
668 |
+
mlb_teams_id = [x['id'] if 'franchiseName' in x else None for x in teams['teams']]
|
669 |
+
mlb_teams_abb = [x['abbreviation'] if 'franchiseName' in x else None for x in teams['teams']]
|
670 |
+
mlb_teams_parent_id = [x['parentOrgId'] if 'parentOrgId' in x else None for x in teams['teams']]
|
671 |
+
mlb_teams_parent = [x['parentOrgName'] if 'parentOrgName' in x else None for x in teams['teams']]
|
672 |
+
mlb_teams_league_id = [x['league']['id'] if 'id' in x['league'] else None for x in teams['teams']]
|
673 |
+
mlb_teams_league_name = [x['league']['name'] if 'name' in x['league'] else None for x in teams['teams']]
|
674 |
+
|
675 |
+
|
676 |
+
|
677 |
+
#Create a dataframe of all the teams
|
678 |
+
mlb_teams_df = pd.DataFrame(data={'team_id':mlb_teams_id,
|
679 |
+
'city':mlb_teams_franchise,
|
680 |
+
'name':mlb_teams_name,
|
681 |
+
'franchise':mlb_teams_franchise,
|
682 |
+
'abbreviation':mlb_teams_abb,
|
683 |
+
'parent_org_id':mlb_teams_parent_id,
|
684 |
+
'parent_org':mlb_teams_parent,
|
685 |
+
'league_id':mlb_teams_league_id,
|
686 |
+
'league_name':mlb_teams_league_name
|
687 |
+
|
688 |
+
}).drop_duplicates().dropna(subset=['team_id']).reset_index(drop=True).sort_values('team_id')
|
689 |
+
|
690 |
+
mlb_teams_df.loc[mlb_teams_df['parent_org_id'].isnull(),'parent_org_id'] = mlb_teams_df.loc[mlb_teams_df['parent_org_id'].isnull(),'team_id']
|
691 |
+
mlb_teams_df.loc[mlb_teams_df['parent_org'].isnull(),'parent_org'] = mlb_teams_df.loc[mlb_teams_df['parent_org'].isnull(),'franchise']
|
692 |
+
|
693 |
+
|
694 |
+
mlb_teams_df['parent_org_abbreviation'] = mlb_teams_df['parent_org_id'].map(mlb_teams_df.set_index('team_id')['abbreviation'].to_dict())
|
695 |
+
|
696 |
+
|
697 |
+
#mlb_teams_df.loc[mlb_teams_df.franchise.isin(mlb_teams_df.parent_org.unique()),'parent_org'] = mlb_teams_df.loc[mlb_teams_df.franchise.isin(mlb_teams_df.parent_org.unique()),'franchise']
|
698 |
+
|
699 |
+
return mlb_teams_df
|
700 |
+
|
701 |
+
def get_leagues(self):
|
702 |
+
leagues = requests.get(url='https://statsapi.mlb.com/api/v1/leagues/').json()
|
703 |
+
|
704 |
+
sport_id = [x['sport']['id'] if 'sport' in x else None for x in leagues['leagues']]
|
705 |
+
league_id = [x['id'] if 'id' in x else None for x in leagues['leagues']]
|
706 |
+
league_name = [x['name'] if 'name' in x else None for x in leagues['leagues']]
|
707 |
+
league_abbreviation = [x['abbreviation'] if 'abbreviation' in x else None for x in leagues['leagues']]
|
708 |
+
|
709 |
+
|
710 |
+
|
711 |
+
leagues_df = pd.DataFrame(data= {
|
712 |
+
'league_id':league_id,
|
713 |
+
'league_name':league_name,
|
714 |
+
'league_abbreviation':league_abbreviation,
|
715 |
+
'sport_id':sport_id,
|
716 |
+
})
|
717 |
+
|
718 |
+
return leagues_df
|
719 |
+
|
720 |
+
def get_player_games_list(self,player_id=691587):
|
721 |
+
player_game_list = [x['game']['gamePk'] for x in requests.get(url=f'http://statsapi.mlb.com/api/v1/people/{player_id}?hydrate=stats(type=gameLog,season=2023),hydrations').json()['people'][0]['stats'][0]['splits']]
|
722 |
+
return player_game_list
|
723 |
+
|
724 |
+
def get_team_schedule(self,year=2023,sport_id=1,mlb_team='Toronto Blue Jays'):
|
725 |
+
if not self.get_sport_id_check(sport_id=sport_id):
|
726 |
+
print('Please Select a New Sport ID from the following')
|
727 |
+
print(self.get_sport_id())
|
728 |
+
return False, False
|
729 |
+
|
730 |
+
schedule_df = self.get_schedule(year_input=year,sport_id=sport_id)
|
731 |
+
teams_df = self.get_teams().merge(self.get_leagues()).merge(self.get_sport_id(),left_on=['sport_id'],right_index=True,suffixes=['','_sport'])
|
732 |
+
teams_df = teams_df[teams_df['sport_id'] == sport_id]
|
733 |
+
team_abb_select = teams_df[teams_df['parent_org'] == mlb_team]['abbreviation'].values[0]
|
734 |
+
team_name_select = teams_df[teams_df['parent_org'] == mlb_team]['franchise'].values[0]
|
735 |
+
schedule_df = schedule_df[((schedule_df.away == team_name_select) | (schedule_df.home == team_name_select)) & (schedule_df.state == 'F')].reset_index(drop=True)
|
736 |
+
return schedule_df,teams_df
|
737 |
+
|
738 |
+
def get_team_game_data(self,year=2023,sport_id=1,mlb_team='Toronto Blue Jays'):
|
739 |
+
schedule_df,teams_df = self.get_team_schedule(year=year,sport_id=sport_id,mlb_team=mlb_team)
|
740 |
+
if not schedule_df:
|
741 |
+
return
|
742 |
+
data = self.get_data(schedule_df['game_id'][:])
|
743 |
+
df = self.get_data_df(data_list = data)
|
744 |
+
df['mlb_team'] = teams_df[teams_df['parent_org'] == mlb_team]['parent_org_abbreviation'].values[0]
|
745 |
+
df['level'] = teams_df[teams_df['parent_org'] == mlb_team]['abbreviation_sport'].values[0]
|
746 |
+
|
747 |
+
return df
|
app.py
CHANGED
@@ -1,551 +1,779 @@
|
|
1 |
-
from shiny import ui, render, App
|
2 |
-
import matplotlib.image as mpimg
|
3 |
import pandas as pd
|
4 |
-
import pygsheets
|
5 |
-
import pytz
|
6 |
-
from datetime import datetime
|
7 |
import numpy as np
|
8 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
df_2024 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
|
15 |
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
-
# print('Starting')
|
18 |
-
# df_2024 = pd.read_csv('2024_spring_data.csv',index_col=[0])
|
19 |
-
print('Starting')
|
20 |
spring_teams = df_2024.groupby(['pitcher_id']).tail(1)[['pitcher_id','pitcher_team']].set_index(['pitcher_id'])['pitcher_team'].to_dict()
|
21 |
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
df_2024['vy_f'] = -(df_2024['vy0']**2 - (2 * df_2024['ay'] * (df_2024['y0'] - 17/12)))**0.5
|
24 |
-
df_2024['t'] = (df_2024['vy_f'] - df_2024['vy0']) / df_2024['ay']
|
25 |
-
df_2024['vz_f'] = (df_2024['vz0']) + (df_2024['az'] * df_2024['t'])
|
26 |
-
df_2024['vaa'] = -np.arctan(df_2024['vz_f'] / df_2024['vy_f']) * (180 / np.pi)
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
|
34 |
-
|
|
|
35 |
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
return x.quantile(n)
|
40 |
-
percentile_.__name__ = 'percentile_{:02.0f}'.format(n*100)
|
41 |
-
return percentile_
|
42 |
|
43 |
-
def df_clean(df):
|
44 |
-
df_copy = df.copy()
|
45 |
-
df_copy.loc[df_copy['pitcher_hand'] == 'L','hb'] *= -1
|
46 |
-
df_copy.loc[df_copy['pitcher_hand'] == 'L','x0'] *= -1
|
47 |
-
df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] = 360 - df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction']
|
48 |
|
49 |
-
|
50 |
-
df_copy['bat_l'] = [1 if x == 'L' else 0 for x in df_copy['batter_hand']]
|
51 |
-
df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "KN", "FO", "CS", "SC", "FA"])].reset_index(drop=True)
|
52 |
-
df_copy['pitch_type'] = df_copy['pitch_type'].replace({'FT':'SI','KC':'CU','ST':'SL','SV':'SL'})
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
-
df_copy = df_copy.merge(df_copy_fb_sum,left_on='pitcher_id',right_index=True,how='left')
|
71 |
|
72 |
-
df_copy['fb_velo_diff'] = df_copy['start_speed']- df_copy['fb_velo']
|
73 |
-
df_copy['fb_max_ivb_diff'] = df_copy['ivb']- df_copy['fb_max_ivb']
|
74 |
-
df_copy['fb_max_hb_diff'] = df_copy['hb']- df_copy['fb_max_x']
|
75 |
-
df_copy['fb_min_hb_diff'] = df_copy['hb']- df_copy['fb_min_x']
|
76 |
-
df_copy['fb_max_velo_diff'] = df_copy['start_speed']- df_copy['fb_max_velo']
|
77 |
-
df_copy['fb_axis_diff'] = df_copy['spin_direction']- df_copy['fb_axis']
|
78 |
|
79 |
-
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_velo_diff'] = 0
|
80 |
-
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_ivb_diff'] = 0
|
81 |
-
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_hb_diff'] = 0
|
82 |
-
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_min_hb_diff'] = 0
|
83 |
-
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_velo_diff'] = 0
|
84 |
-
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_axis_diff'] = 0
|
85 |
|
86 |
|
87 |
-
df_copy['max_speed'] = df_copy.groupby(['pitcher_id'])['start_speed'].transform('max')
|
88 |
-
df_copy['max_speed_diff'] = df_copy['start_speed'] - df_copy['max_speed']
|
89 |
|
90 |
-
df_copy['max_ivb'] = df_copy.groupby(['pitcher_id'])['ivb'].transform('max')
|
91 |
-
df_copy['max_ivb_diff'] = df_copy['ivb'] - df_copy['max_ivb']
|
92 |
|
93 |
-
df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5
|
94 |
-
df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay']
|
95 |
-
df_copy['vz_f'] = (df_copy['vz0']) + (df_copy['az'] * df_copy['t'])
|
96 |
-
df_copy['vaa'] = -np.arctan(df_copy['vz_f'] / df_copy['vy_f']) * (180 / np.pi)
|
97 |
|
98 |
-
#df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5
|
99 |
-
#df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay']
|
100 |
-
df_copy['vx_f'] = (df_copy['vx0']) + (df_copy['ax'] * df_copy['t'])
|
101 |
-
df_copy['haa'] = -np.arctan(df_copy['vx_f'] / df_copy['vy_f']) * (180 / np.pi)
|
102 |
|
103 |
-
# df_copy['x_diff'] = df_copy['x0'] - df_copy['px']
|
104 |
-
# df_copy['z_diff'] = df_copy['z0'] - df_copy['pz']
|
105 |
|
106 |
-
|
107 |
-
|
108 |
|
109 |
-
|
110 |
-
|
111 |
|
|
|
|
|
112 |
|
113 |
-
|
114 |
-
ui.layout_sidebar(
|
115 |
-
|
116 |
-
ui.panel_sidebar(
|
117 |
-
ui.input_date_range("date_range_id", "Date range input",start = df_2024.game_date.min(),
|
118 |
end = df_2024.game_date.max(),width=2,min=df_2024.game_date.min(),
|
119 |
-
max=df_2024.game_date.max()),
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
ui.
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
|
141 |
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
146 |
|
147 |
|
|
|
|
|
|
|
|
|
148 |
|
149 |
|
150 |
-
#
|
151 |
-
|
|
|
|
|
152 |
|
153 |
-
|
154 |
-
# @render.data_frame
|
155 |
-
# def raw_table():
|
156 |
|
157 |
-
|
158 |
-
|
159 |
-
# width='fit-content',
|
160 |
-
# height=750,
|
161 |
-
# filters=True,
|
162 |
-
# )
|
163 |
|
164 |
-
|
165 |
-
|
166 |
-
|
|
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
pitches = ('start_speed','count'),
|
171 |
-
|
172 |
-
start_speed = ('start_speed','mean'),
|
173 |
-
ivb = ('ivb','mean'),
|
174 |
-
hb = ('hb','mean'),
|
175 |
-
spin_rate = ('spin_rate','mean'),
|
176 |
-
vaa = ('vaa','mean'),
|
177 |
-
haa = ('haa','mean'),
|
178 |
-
horizontal_release = ('x0','mean'),
|
179 |
-
vertical_release = ('z0','mean'),
|
180 |
-
extension = ('extension','mean')).round(1).reset_index()
|
181 |
-
#grouped_ivb = grouped_ivb.set_index(['pitcher_id']).reset_index()
|
182 |
-
# return grouped_ivb
|
183 |
-
return render.DataGrid(
|
184 |
-
grouped_ivb,
|
185 |
-
width='fit-content',
|
186 |
-
height=750,
|
187 |
-
filters=True,
|
188 |
-
)
|
189 |
|
190 |
-
@output
|
191 |
-
@render.data_frame
|
192 |
-
def table_daily():
|
193 |
|
194 |
-
|
195 |
-
|
196 |
-
pitches = ('start_speed','count'),
|
197 |
-
|
198 |
-
start_speed = ('start_speed','mean'),
|
199 |
-
ivb = ('ivb','mean'),
|
200 |
-
hb = ('hb','mean'),
|
201 |
-
spin_rate = ('spin_rate','mean'),
|
202 |
-
vaa = ('vaa','mean'),
|
203 |
-
haa = ('haa','mean'),
|
204 |
-
horizontal_release = ('x0','mean'),
|
205 |
-
vertical_release = ('z0','mean'),
|
206 |
-
extension = ('extension','mean')).round(1).reset_index()
|
207 |
-
#grouped_ivb = grouped_ivb.set_index(['pitcher_id']).reset_index()
|
208 |
-
# return grouped_ivb
|
209 |
-
return render.DataGrid(
|
210 |
-
grouped_ivb,
|
211 |
-
width='fit-content',
|
212 |
-
height=750,
|
213 |
-
filters=True,
|
214 |
-
)
|
215 |
-
|
216 |
-
#return grouped_ivb
|
217 |
|
218 |
-
@output
|
219 |
-
@render.data_frame
|
220 |
-
def table_2023():
|
221 |
-
grouped_ivb = df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
|
222 |
-
(pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])].groupby(['pitcher_id','pitcher_name','pitcher_hand','pitch_type']).agg(
|
223 |
-
pitches = ('start_speed','count'),
|
224 |
-
|
225 |
-
start_speed = ('start_speed','mean'),
|
226 |
-
ivb = ('ivb','mean'),
|
227 |
-
hb = ('hb','mean'),
|
228 |
-
spin_rate = ('spin_rate','mean'),
|
229 |
-
vaa = ('vaa','mean'),
|
230 |
-
haa = ('haa','mean'),
|
231 |
-
horizontal_release = ('x0','mean'),
|
232 |
-
vertical_release = ('z0','mean'),
|
233 |
-
extension = ('extension','mean')).round(1).reset_index()
|
234 |
-
grouped_ivb = grouped_ivb.set_index(['pitcher_id','pitch_type'])
|
235 |
|
236 |
-
|
|
|
237 |
|
238 |
-
#####
|
239 |
-
ivb_merged = grouped_ivb_2023.merge(right=grouped_ivb,
|
240 |
-
left_index=True,
|
241 |
-
right_index=True,
|
242 |
-
how='right',suffixes=['_2023','_spring']).reset_index()
|
243 |
-
|
244 |
-
ivb_merged['pitcher_name'] = ivb_merged['pitcher_name_spring']
|
245 |
-
ivb_merged['pitcher_hand'] = ivb_merged['pitcher_hand_spring']
|
246 |
-
#ivb_merged['pitch_type'] = ivb_merged['pitch_type_spring']
|
247 |
-
|
248 |
-
|
249 |
-
# ivb_merged = ivb_merged[['pitcher_id', 'pitcher_name', 'pitcher_hand', 'pitch_type',
|
250 |
-
# 'pitches_spring', 'start_speed_spring', 'ivb_spring',
|
251 |
-
# 'hb_spring', 'spin_rate_spring', 'horizontal_release_spring',
|
252 |
-
# 'vertical_release_spring', 'extension_spring']]
|
253 |
-
ivb_merged['pitcher_team'] = ivb_merged['pitcher_id'].map(spring_teams)
|
254 |
-
ivb_merged = ivb_merged.set_index(['pitcher_id', 'pitcher_name','pitcher_team', 'pitcher_hand', 'pitch_type',])
|
255 |
-
|
256 |
-
return render.DataGrid(
|
257 |
-
ivb_merged[['pitches_2023','start_speed_2023', 'ivb_2023', 'hb_2023',
|
258 |
-
'spin_rate_2023', 'vaa_2023','haa_2023', 'horizontal_release_2023', 'vertical_release_2023',
|
259 |
-
'extension_2023','pitches_spring','start_speed_spring', 'ivb_spring', 'hb_spring',
|
260 |
-
'spin_rate_spring','vaa_spring','haa_spring', 'horizontal_release_spring', 'vertical_release_spring',
|
261 |
-
'extension_spring',]].reset_index(),
|
262 |
-
width='fit-content',
|
263 |
-
height=750,
|
264 |
-
filters=True,
|
265 |
-
)
|
266 |
-
|
267 |
-
@output
|
268 |
-
@render.data_frame
|
269 |
-
def table_difference():
|
270 |
-
grouped_ivb = df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
|
271 |
-
(pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])].groupby(['pitcher_id','pitcher_name','pitcher_hand','pitch_type']).agg(
|
272 |
-
pitches = ('start_speed','count'),
|
273 |
-
|
274 |
-
start_speed = ('start_speed','mean'),
|
275 |
-
ivb = ('ivb','mean'),
|
276 |
-
hb = ('hb','mean'),
|
277 |
-
spin_rate = ('spin_rate','mean'),
|
278 |
-
vaa = ('vaa','mean'),
|
279 |
-
haa = ('haa','mean'),
|
280 |
-
horizontal_release = ('x0','mean'),
|
281 |
-
vertical_release = ('z0','mean'),
|
282 |
-
extension = ('extension','mean')).round(1).reset_index()
|
283 |
-
grouped_ivb = grouped_ivb.set_index(['pitcher_id','pitch_type'])
|
284 |
|
285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
# grouped_ivb = df_2024.groupby(['pitcher_id','pitcher_name','pitcher_hand','pitch_type']).agg(
|
328 |
-
# pitches = ('start_speed','count'),
|
329 |
|
330 |
-
# start_speed = ('start_speed','mean'),
|
331 |
-
# ivb = ('ivb','mean'),
|
332 |
-
# hb = ('hb','mean'),
|
333 |
-
# spin_rate = ('spin_rate','mean'),
|
334 |
-
# vaa = ('vaa','mean'),
|
335 |
-
# haa = ('haa','mean'),
|
336 |
-
# horizontal_release = ('x0','mean'),
|
337 |
-
# vertical_release = ('z0','mean'),
|
338 |
-
# extension = ('extension','mean')).round(1).reset_index()
|
339 |
-
# grouped_ivb = grouped_ivb.set_index(['pitcher_id','pitch_type'])
|
340 |
-
|
341 |
-
# grouped_ivb_2023 = pd.read_csv('2023_pitch_group_data.csv',index_col=[0,3])
|
342 |
-
|
343 |
-
# #####
|
344 |
-
# ivb_merged = grouped_ivb_2023.merge(right=grouped_ivb,
|
345 |
-
# left_index=True,
|
346 |
-
# right_index=True,
|
347 |
-
# how='right',suffixes=['_2023','_spring']).reset_index()
|
348 |
-
|
349 |
-
# ivb_merged['pitcher_name'] = ivb_merged['pitcher_name_spring']
|
350 |
-
# ivb_merged['pitcher_hand'] = ivb_merged['pitcher_hand_spring']
|
351 |
-
# #ivb_merged['pitch_type'] = ivb_merged['pitch_type_spring']
|
352 |
-
|
353 |
-
|
354 |
-
# # ivb_merged = ivb_merged[['pitcher_id', 'pitcher_name', 'pitcher_hand', 'pitch_type',
|
355 |
-
# # 'pitches_spring', 'start_speed_spring', 'ivb_spring',
|
356 |
-
# # 'hb_spring', 'spin_rate_spring', 'horizontal_release_spring',
|
357 |
-
# # 'vertical_release_spring', 'extension_spring']]
|
358 |
-
# ivb_merged['pitcher_team'] = ivb_merged['pitcher_id'].map(spring_teams)
|
359 |
-
# ivb_merged = ivb_merged.set_index(['pitcher_id', 'pitcher_name','pitcher_team', 'pitcher_hand', 'pitch_type',])
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
# ivb_merged[['start_speed_difference', 'ivb_difference', 'hb_difference','spin_rate_difference','vaa_difference','haa_difference',
|
367 |
-
# 'horizontal_release_difference', 'vertical_release_difference',
|
368 |
-
# 'extension_difference']] = ivb_merged[['start_speed_spring', 'ivb_spring', 'hb_spring',
|
369 |
-
# 'spin_rate_spring', 'vaa_spring','haa_spring','horizontal_release_spring', 'vertical_release_spring',
|
370 |
-
# 'extension_spring']].values - ivb_merged[['start_speed_2023', 'ivb_2023', 'hb_2023',
|
371 |
-
# 'spin_rate_2023', 'vaa_2023','haa_2023','horizontal_release_2023', 'vertical_release_2023',
|
372 |
-
# 'extension_2023']].values
|
373 |
-
# ivb_merged_new = ivb_merged.reset_index()
|
374 |
-
|
375 |
-
# ivb_merged_new = ivb_merged_new[
|
376 |
-
# pd.isnull(ivb_merged_new['pitches_2023']) &
|
377 |
-
# pd.notnull(ivb_merged_new['pitches_spring']) &
|
378 |
-
# ivb_merged_new['pitcher_id'].isin(ivb_merged_new[pd.notnull(ivb_merged_new['pitches_2023'])]['pitcher_id'])
|
379 |
-
# ][
|
380 |
-
|
381 |
-
# ['pitcher_id', 'pitcher_name', 'pitcher_hand', 'pitch_type',
|
382 |
-
# 'pitches_spring', 'start_speed_spring', 'ivb_spring',
|
383 |
-
# 'hb_spring', 'spin_rate_spring', 'vaa_spring','haa_spring', 'horizontal_release_spring',
|
384 |
-
# 'vertical_release_spring', 'extension_spring']
|
385 |
-
# ]#.reset_index()
|
386 |
-
|
387 |
-
# # ivb_merged_new = ivb_merged.copy().reset_index()
|
388 |
-
# ivb_merged_new['pitcher_team'] = ivb_merged_new['pitcher_id'].map(spring_teams)
|
389 |
-
# ivb_merged_new = ivb_merged_new.set_index(['pitcher_id', 'pitcher_name','pitcher_team', 'pitcher_hand', 'pitch_type',])
|
390 |
-
# #ivb_merged_new.to_clipboard(header=False)
|
391 |
-
# df_2024_date_min = df_2024.groupby(['pitcher_id','pitcher_name','pitcher_hand','pitch_type','game_date'])[['game_date']].min()
|
392 |
-
# ivb_merged_new = ivb_merged_new.merge(right=df_2024_date_min,
|
393 |
-
# left_index=True,
|
394 |
-
# right_index=True)
|
395 |
-
# ivb_merged_new = ivb_merged_new.drop(columns=['game_date'])
|
396 |
-
|
397 |
-
# return render.DataGrid(
|
398 |
-
# ivb_merged_new.reset_index(),
|
399 |
-
# width='fit-content',
|
400 |
-
# height=750,
|
401 |
-
# filters=True,
|
402 |
-
# )
|
403 |
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
|
|
408 |
|
409 |
-
df_2024_update = df_clean(df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
|
410 |
-
(pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])])
|
411 |
|
412 |
-
|
|
|
|
|
|
|
413 |
|
414 |
-
|
415 |
|
416 |
|
417 |
-
|
418 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
|
420 |
|
421 |
-
y_pred_mean = -0.0023964706
|
422 |
-
y_pred_std =0.0057581966
|
423 |
-
# y_pred_mean = -0.0136602735
|
424 |
-
# y_pred_std = 0.006434487
|
425 |
|
426 |
-
## tjStuff+
|
427 |
-
df_2024_stuff = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team']).agg(
|
428 |
-
pitches = ('y_pred','count'),
|
429 |
-
run_exp = ('y_pred','mean'),)
|
430 |
-
# run_exp_loc = ('y_pred_loc','mean'))
|
431 |
|
432 |
-
|
433 |
-
|
434 |
|
435 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
436 |
|
437 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
|
|
|
439 |
|
440 |
-
df_2024_stuff_pitch = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team','pitch_type']).agg(
|
441 |
-
pitches = ('y_pred','count'),
|
442 |
-
run_exp = ('y_pred','mean'),)
|
443 |
-
# run_exp_loc = ('y_pred_loc','mean'))
|
444 |
|
445 |
-
|
446 |
-
|
447 |
|
448 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
449 |
|
450 |
-
|
|
|
|
|
451 |
|
452 |
-
df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50.reset_index().pivot(index=['pitcher_id','pitcher_name','pitcher_team'],
|
453 |
-
columns=['pitch_type'],
|
454 |
-
values=['tj_stuff_plus'])
|
455 |
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
df_2024_stuff_pitch_50_pivot.columns = column_list
|
464 |
-
df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50_pivot.applymap(lambda x: int(x) if not pd.isna(x) else x)
|
465 |
-
df_2024_stuff_pitch_50_pivot = df_2024_stuff_pitch_50_pivot.reset_index()
|
466 |
|
467 |
-
return render.DataGrid(
|
468 |
-
df_2024_stuff_pitch_50_pivot,
|
469 |
-
width='fit-content',
|
470 |
-
height=750,
|
471 |
-
filters=True)
|
472 |
-
|
473 |
-
@output
|
474 |
-
@render.data_frame
|
475 |
-
def table_stuff_day():
|
476 |
|
477 |
|
478 |
-
df_2024_update = df_clean(df_2024[(pd.to_datetime(df_2024['game_date']).dt.date>=input.date_range_id()[0])&
|
479 |
-
(pd.to_datetime(df_2024['game_date']).dt.date<=input.date_range_id()[1])])
|
480 |
-
print('made it here')
|
481 |
|
482 |
-
features = ['start_speed','spin_rate','extension','ivb','hb','x0','z0','fb_max_velo_diff','fb_max_ivb_diff','fb_max_hb_diff']
|
483 |
|
484 |
-
|
|
|
485 |
|
|
|
|
|
|
|
|
|
486 |
|
487 |
-
|
488 |
-
|
489 |
|
490 |
|
491 |
-
|
492 |
-
|
493 |
-
# y_pred_mean = -0.0136602735
|
494 |
-
# y_pred_std = 0.006434487
|
495 |
|
496 |
-
## tjStuff+
|
497 |
-
df_2024_stuff_daily = df_2024_update.groupby(['pitcher_id','pitcher_name','pitcher_team','game_date']).agg(
|
498 |
-
pitches = ('y_pred','count'),
|
499 |
-
run_exp = ('y_pred','mean'),)
|
500 |
-
# run_exp_loc = ('y_pred_loc','mean'))
|
501 |
|
502 |
-
|
503 |
-
|
504 |
|
505 |
-
df_2024_stuff_daily_50 = df_2024_stuff_daily[df_2024_stuff_daily.pitches >= 1]
|
506 |
|
507 |
-
|
|
|
508 |
|
509 |
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
#
|
|
|
|
|
|
|
|
|
|
|
514 |
|
515 |
-
df_2024_stuff_daily_pitch['run_exp_mean'] = y_pred_mean
|
516 |
-
df_2024_stuff_daily_pitch['run_exp_std'] = y_pred_std
|
517 |
|
518 |
-
|
|
|
|
|
|
|
|
|
519 |
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
print(df_2024_stuff_daily_pitch_50_pivot)
|
529 |
|
530 |
-
|
531 |
-
column_list = list(df_2024_stuff_daily_pitch_50_pivot.columns[:-1])
|
532 |
-
column_list.append('All')
|
533 |
-
df_2024_stuff_daily_pitch_50_pivot.columns = column_list
|
534 |
-
df_2024_stuff_daily_pitch_50_pivot = df_2024_stuff_daily_pitch_50_pivot.applymap(lambda x: int(x) if not pd.isna(x) else x)
|
535 |
-
df_2024_stuff_daily_pitch_50_pivot = df_2024_stuff_daily_pitch_50_pivot.reset_index()
|
536 |
|
537 |
-
return render.DataGrid(
|
538 |
-
df_2024_stuff_daily_pitch_50_pivot,
|
539 |
-
width='fit-content',
|
540 |
-
height=750,
|
541 |
-
filters=True)
|
542 |
|
543 |
|
544 |
|
|
|
|
|
|
|
545 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
|
547 |
|
|
|
|
|
|
|
|
|
548 |
|
549 |
|
|
|
550 |
|
551 |
app = App(app_ui, server)
|
|
|
|
|
|
|
1 |
import pandas as pd
|
|
|
|
|
|
|
2 |
import numpy as np
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import seaborn as sns
|
5 |
+
import pitch_summary_functions as psf
|
6 |
+
import requests
|
7 |
+
import matplotlib
|
8 |
+
from api_scraper import MLB_Scrape
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
colour_palette = ['#FFB000','#648FFF','#785EF0',
|
13 |
+
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
|
14 |
+
|
15 |
+
df_2024 = pd.read_csv('C:/Users/thoma/Google Drive/Python/Baseball/season_stats/2024/2024_regular_data.csv',index_col=[0])
|
16 |
+
df_2024 = df_2024.drop_duplicates(subset=['play_id'],keep='last')
|
17 |
+
|
18 |
+
# ### Import Datasets
|
19 |
+
# import datasets
|
20 |
+
# from datasets import load_dataset
|
21 |
+
# dataset = load_dataset('nesticot/mlb_data', data_files=['mlb_pitch_data_2020.csv' ])
|
22 |
+
# dataset_train = dataset['train']
|
23 |
+
# df_2024 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)
|
24 |
+
|
25 |
+
### PITCH COLOURS ###
|
26 |
+
pitch_colours = {
|
27 |
+
'Four-Seam Fastball':'#FF007D',#BC136F
|
28 |
+
'Sinker':'#98165D',#DC267F
|
29 |
+
'Cutter':'#BE5FA0',
|
30 |
+
|
31 |
+
'Changeup':'#F79E70',#F75233
|
32 |
+
'Splitter':'#FE6100',#F75233
|
33 |
+
'Screwball':'#F08223',
|
34 |
+
'Forkball':'#FFB000',
|
35 |
+
|
36 |
+
'Slider':'#67E18D',#1BB999#785EF0
|
37 |
+
'Sweeper':'#1BB999',#37CD85#904039
|
38 |
+
'Slurve':'#376748',#785EF0#549C07#BEABD8
|
39 |
|
40 |
+
'Knuckle Curve':'#311D8B',
|
41 |
+
'Curveball':'#3025CE',
|
42 |
+
'Slow Curve':'#274BFC',
|
43 |
+
'Eephus':'#648FFF',
|
|
|
44 |
|
45 |
+
'Knuckleball':'#867A08',
|
46 |
+
|
47 |
+
'Pitch Out':'#472C30',
|
48 |
+
'Other':'#9C8975',
|
49 |
+
}
|
50 |
|
|
|
|
|
|
|
51 |
spring_teams = df_2024.groupby(['pitcher_id']).tail(1)[['pitcher_id','pitcher_team']].set_index(['pitcher_id'])['pitcher_team'].to_dict()
|
52 |
|
53 |
+
season_start = '2024-03-20'
|
54 |
+
season_end = '2024-09-29'
|
55 |
+
season_fg=2024
|
56 |
+
chad_fg = requests.get(f'https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=0&season={season_fg}&season={season_fg}&month=1000&season1={season_fg}&ind=0&pageitems=2000000000&pagenum=1&ind=0&rost=0&players=&type=36&postseason=&sortdir=default&sortstat=sp_pitching').json()
|
57 |
|
|
|
|
|
|
|
|
|
58 |
|
59 |
+
chadwick_df_small = pd.DataFrame(data={
|
60 |
+
'key_mlbam':[x['xMLBAMID'] for x in chad_fg['data']],
|
61 |
+
'key_fangraphs':[x['playerid'] for x in chad_fg['data']],
|
62 |
+
'Name':[x['PlayerName'] for x in chad_fg['data']],
|
63 |
+
})
|
64 |
|
65 |
+
pitcher_dicts = chadwick_df_small.set_index('key_mlbam')['Name'].sort_values().to_dict()
|
66 |
+
mlb_fg_dicts = chadwick_df_small.set_index('key_mlbam')['key_fangraphs'].sort_values().to_dict()
|
67 |
|
68 |
|
69 |
+
statcast_pitch_summary = pd.read_csv('statcast_pitch_summary.csv')
|
70 |
+
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
|
|
|
|
|
|
|
71 |
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
+
df_2024_codes = psf.df_update_code(df_2024)
|
|
|
|
|
|
|
74 |
|
75 |
+
df_2024_update = psf.df_clean(df_2024_codes)
|
76 |
+
import joblib
|
77 |
+
model = joblib.load('joblib_model/tjstuff_model_20240318.joblib')
|
78 |
+
y_pred_mean = 0.0011434511
|
79 |
+
y_pred_std = 0.006554768
|
80 |
+
|
81 |
+
features = ['start_speed','spin_rate','extension','ivb','hb','x0','z0','fb_max_velo_diff','fb_max_ivb_diff','fb_max_hb_diff']
|
82 |
+
|
83 |
+
targets = ['delta_run_exp_mean']
|
84 |
+
|
85 |
+
|
86 |
+
df_2024_update['y_pred'] = model.predict(df_2024_update[features])
|
87 |
+
|
88 |
+
df_2024_update['tj_stuff_plus'] = 100 + 10*((-df_2024_update.y_pred +y_pred_mean) / y_pred_std)
|
89 |
+
|
90 |
+
team_logos = pd.read_csv('team_logos.csv')
|
91 |
+
|
92 |
+
mlb_stats = MLB_Scrape()
|
93 |
+
teams_df = mlb_stats.get_teams()
|
94 |
+
team_logo_dict = teams_df.set_index(['team_id'])['parent_org_id'].to_dict()
|
95 |
+
|
96 |
+
font_properties = {'family': 'calibi', 'size': 12}
|
97 |
+
font_properties_titles = {'family': 'calibi', 'size': 20}
|
98 |
+
font_properties_axes = {'family': 'calibi', 'size': 16}
|
99 |
+
df_plot = []
|
100 |
+
ax2_loc = []
|
101 |
+
gs = []
|
102 |
+
fig = []
|
103 |
+
|
104 |
+
function_dict={
|
105 |
+
'velocity_kde':'Velocity Distributions',
|
106 |
+
'break_plot':'Pitch Movement',
|
107 |
+
'rolling_tj_stuff':'Rolling tjStuff+',
|
108 |
+
'location_lhb':'Locations vs LHB',
|
109 |
+
'location_rhb':'Locations vs RHB',
|
110 |
+
}
|
111 |
+
|
112 |
+
split_dict = {'all':'All',
|
113 |
+
'left':'LHB',
|
114 |
+
'right':'RHB'}
|
115 |
+
|
116 |
+
split_dict_hand = {'all':['L','R'],
|
117 |
+
'left':['L'],
|
118 |
+
'right':['R']}
|
119 |
+
|
120 |
+
ball_dict = {'0':'0',
|
121 |
+
'1':'1',
|
122 |
+
'2':'2',
|
123 |
+
'3':'3'}
|
124 |
+
|
125 |
+
strike_dict = {'0':'0',
|
126 |
+
'1':'1',
|
127 |
+
'2':'2'}
|
128 |
+
|
129 |
+
# count_dict = {'0_0':'Through 0-0',
|
130 |
+
# '0_1':'Through 0-1',
|
131 |
+
# '0_2':'Through 0-2',
|
132 |
+
# '1_0':'Through 1-0',
|
133 |
+
# '1_1':'Through 1-1',
|
134 |
+
# '1_2':'Through 1-2',
|
135 |
+
# '2_1':'Through 2-1',
|
136 |
+
# '2_0':'Through 2-0',
|
137 |
+
# '3_0':'Through 3-0',
|
138 |
+
# '3_1':'Through 3-1',
|
139 |
+
# '2_2':'Through 2-2',
|
140 |
+
# '3_2':'Through 3-2'}
|
141 |
+
|
142 |
+
# count_dict_fg = {'0_0':'',
|
143 |
+
# '0_1':'61',
|
144 |
+
# '0_2':'62',
|
145 |
+
# '1_0':'63',
|
146 |
+
# '1_1':'64',
|
147 |
+
# '1_2':'65',
|
148 |
+
# '2_1':'66',
|
149 |
+
# '2_0':'67',
|
150 |
+
# '3_0':'68',
|
151 |
+
# '3_1':'69',
|
152 |
+
# '2_2':'70',
|
153 |
+
# '3_2':'71'}
|
154 |
|
155 |
+
from urllib.request import Request, urlopen
|
156 |
+
from shiny import App, reactive, ui, render
|
157 |
+
from shiny.ui import h2, tags
|
158 |
+
# importing OpenCV(cv2) module
|
159 |
+
app_ui = ui.page_fluid(
|
160 |
+
ui.layout_sidebar(
|
161 |
+
|
162 |
+
ui.panel_sidebar(
|
163 |
+
ui.row(
|
164 |
+
ui.column(6,
|
165 |
+
ui.input_select('player_id','Select Player',pitcher_dicts,selectize=True,multiple=False)),
|
166 |
+
ui.column(6, ui.output_ui('test','Select Game'))),
|
167 |
+
|
168 |
+
ui.row(
|
169 |
+
ui.column(4,
|
170 |
+
ui.input_select('plot_id_1','Plot Left',function_dict,multiple=False,selected='velocity_kde')),
|
171 |
+
ui.column(4,
|
172 |
+
ui.input_select('plot_id_2','Plot Middle',function_dict,multiple=False,selected='rolling_tj_stuff')),
|
173 |
+
ui.column(4,
|
174 |
+
ui.input_select('plot_id_3','Plot Right',function_dict,multiple=False,selected='break_plot'))),
|
175 |
+
|
176 |
+
# ui.input_select('count_id','Count',count_dict,multiple=True,selectize=True,selected='0_0'),
|
177 |
+
|
178 |
+
ui.row(
|
179 |
+
ui.column(6,
|
180 |
+
ui.input_select('ball_id','Balls',ball_dict,multiple=False,selected='0'),
|
181 |
+
ui.input_radio_buttons(
|
182 |
+
"count_id_balls",
|
183 |
+
"Count Filter Balls",
|
184 |
+
{
|
185 |
+
"exact": "Exact Balls",
|
186 |
+
"greater": ">= Balls",
|
187 |
+
"lesser": "<= Balls",
|
188 |
+
},selected='greater')),
|
189 |
+
ui.column(6,
|
190 |
+
ui.input_select('strike_id','Strikes',strike_dict,multiple=False,selected='0'),
|
191 |
+
ui.input_radio_buttons(
|
192 |
+
"count_id_strikes",
|
193 |
+
"Count Filter Strikes",
|
194 |
+
{
|
195 |
+
"exact": "Exact Strikes",
|
196 |
+
"greater": ">= Strikes",
|
197 |
+
"lesser": "<= Strikes",
|
198 |
+
},selected='greater'))),
|
199 |
+
ui.row(
|
200 |
+
ui.column(6,
|
201 |
+
ui.input_select('split_id','Select Split',split_dict,multiple=False)),
|
202 |
+
ui.column(6,
|
203 |
+
ui.input_numeric('rolling_window','Rolling Window (for tjStuff+ Plot)',min=1,value=10))),
|
204 |
+
|
205 |
+
|
206 |
+
|
207 |
+
ui.input_action_button("go", "Generate",class_="btn-primary"),
|
208 |
+
|
209 |
+
|
210 |
+
width=4)
|
211 |
+
,
|
212 |
+
ui.panel_main(
|
213 |
+
ui.navset_tab(
|
214 |
+
# ui.nav("Raw Data",
|
215 |
+
# ui.output_data_frame("raw_table")),
|
216 |
+
ui.nav("Season Summary",
|
217 |
+
ui.output_plot('plot',
|
218 |
+
width='2000px',
|
219 |
+
height='2000px')),
|
220 |
+
ui.nav("Game Summary",
|
221 |
+
ui.output_plot('plot_game',
|
222 |
+
width='2000px',
|
223 |
+
height='2000px'))
|
224 |
+
,id="my_tabs"))))
|
225 |
|
|
|
226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
|
229 |
|
|
|
|
|
230 |
|
|
|
|
|
231 |
|
|
|
|
|
|
|
|
|
232 |
|
|
|
|
|
|
|
|
|
233 |
|
|
|
|
|
234 |
|
235 |
+
#print(app_ui)
|
236 |
+
def server(input, output, session):
|
237 |
|
238 |
+
@render.ui
|
239 |
+
def test():
|
240 |
|
241 |
+
# @reactive.Effect
|
242 |
+
if input.my_tabs() == 'Season Summary':
|
243 |
|
244 |
+
return ui.input_date_range("date_range_id", "Date range input",start = df_2024.game_date.min(),
|
|
|
|
|
|
|
|
|
245 |
end = df_2024.game_date.max(),width=2,min=df_2024.game_date.min(),
|
246 |
+
max=df_2024.game_date.max()),
|
247 |
+
# @reactive.Effect
|
248 |
+
if input.my_tabs() == 'Game Summary':
|
249 |
+
pitcher_id_select = int(input.player_id())
|
250 |
+
df_plot = df_2024_update[(df_2024_update['pitcher_id']==pitcher_id_select)]
|
251 |
+
|
252 |
+
|
253 |
+
# ax0.text(x=0.5,y=0.30,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
|
254 |
+
df_plot['game_opp'] = df_plot['game_date'].astype(str) + ' vs ' + df_plot['batter_team'].astype(str)
|
255 |
+
#print(df_plot['game_opp'])
|
256 |
+
|
257 |
+
date_dict = pd.concat([df_plot.drop_duplicates(subset=['pitcher_id','game_id','game_opp'])[['game_id','game_opp']]]).set_index('game_id').to_dict()
|
258 |
+
return ui.input_select("game_id", "Select Game",date_dict,selectize=True)
|
259 |
+
|
260 |
+
@output
|
261 |
+
@render.plot
|
262 |
+
@reactive.event(input.go, ignore_none=False)
|
263 |
+
def plot():
|
264 |
+
#fig, ax = plt.subplots(3, 2, figsize=(9, 9))
|
265 |
+
|
266 |
+
font_properties = {'family': 'calibi', 'size': 12}
|
267 |
+
font_properties_titles = {'family': 'calibi', 'size': 20}
|
268 |
+
font_properties_axes = {'family': 'calibi', 'size': 16}
|
269 |
+
|
270 |
+
if len((input.player_id()))<1:
|
271 |
+
fig, ax = plt.subplots(1, 1, figsize=(9, 9))
|
272 |
+
ax.text(x=0.5,y=0.5,s='Please Select\nA Player',fontsize=150,ha='center')
|
273 |
+
ax.grid('off')
|
274 |
+
return
|
275 |
+
|
276 |
+
pitcher_id_select = int(input.player_id())
|
277 |
+
|
278 |
+
|
279 |
+
df_plot = df_2024_update[(df_2024_update['pitcher_id']==pitcher_id_select)]
|
280 |
+
|
281 |
+
df_plot = df_plot[df_plot['batter_hand'].isin(split_dict_hand[input.split_id()])]
|
282 |
+
|
283 |
+
if input.count_id_balls()=='greater' and input.count_id_strikes()=='greater' and int(input.ball_id())==0 and int(input.strike_id())==0:
|
284 |
+
ball_title = ''
|
285 |
+
strike_title = ''
|
286 |
+
else:
|
287 |
+
if input.count_id_balls()=='exact':
|
288 |
+
df_plot = df_plot[df_plot['balls']==int(input.ball_id())]
|
289 |
+
ball_title = str(f'{(input.ball_id())} Ball Count; ')
|
290 |
+
elif input.count_id_balls()=='greater':
|
291 |
+
df_plot = df_plot[df_plot['balls']>=int(input.ball_id())]
|
292 |
+
ball_title = str(f'At Least {(input.ball_id())} Ball Count; ')
|
293 |
+
elif input.count_id_balls()=='lesser':
|
294 |
+
df_plot = df_plot[df_plot['balls']<=int(input.ball_id())]
|
295 |
+
ball_title = str(f'At Most {(input.ball_id())} Ball Count; ')
|
296 |
+
|
297 |
+
if input.count_id_strikes()=='exact':
|
298 |
+
df_plot = df_plot[df_plot['strikes']==int(input.strike_id())]
|
299 |
+
strike_title = str(f'{(input.strike_id())} Strike Count; ')
|
300 |
+
elif input.count_id_strikes()=='greater':
|
301 |
+
df_plot = df_plot[df_plot['strikes']>=int(input.strike_id())]
|
302 |
+
strike_title = str(f'At Least {(input.strike_id())} Strike Count; ')
|
303 |
+
elif input.count_id_strikes()=='lesser':
|
304 |
+
df_plot = df_plot[df_plot['strikes']<=int(input.strike_id())]
|
305 |
+
strike_title = str(f'At Most {(input.strike_id())} Strike Count; ')
|
306 |
+
|
307 |
+
|
308 |
+
|
309 |
+
if input.split_id() == 'all':
|
310 |
+
split_title = ''
|
311 |
+
|
312 |
+
elif input.split_id() == 'left':
|
313 |
+
split_title = 'vs. LHH'
|
314 |
+
|
315 |
+
elif input.split_id() == 'right':
|
316 |
+
split_title = 'vs. RHH'
|
317 |
+
|
318 |
+
|
319 |
+
if len(df_plot)<1:
|
320 |
+
fig, ax = plt.subplots(1, 1, figsize=(9, 9))
|
321 |
+
ax.text(x=0.5,y=0.5,s='Please Select\nOther Parameters',fontsize=150,ha='center')
|
322 |
+
ax.grid('off')
|
323 |
+
return
|
324 |
+
|
325 |
+
df_plot['pitch_type_count'] = df_plot.groupby(['pitcher_id'])['pitch_type'].cumcount()+1
|
326 |
+
df_plot['pitch_type_count_each'] = df_plot.groupby(['pitch_type'])['pitch_type'].cumcount()+1
|
327 |
+
#df_plot = df_plot.merge(df_2024_update[['tj_stuff_plus','play_id']],left_on=['play_id'],right_on=['play_id'],how='left')
|
328 |
+
df_plot = df_plot.sort_values(by=['pitch_description'])
|
329 |
+
|
330 |
+
grouped_ivb = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description'])
|
331 |
+
grouped_ivb_all = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand'])
|
332 |
+
|
333 |
+
|
334 |
+
|
335 |
+
from matplotlib.gridspec import GridSpec
|
336 |
+
plt.rcParams['font.family'] = 'Calibri'
|
337 |
+
df_plot['prop'] = df_plot.groupby("pitch_type")["is_pitch"].transform("sum")
|
338 |
+
label_labels = df_plot.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
|
339 |
+
|
340 |
+
#plt.rcParams["figure.figsize"] = [10,10]
|
341 |
+
fig = plt.figure(figsize=(20, 20))
|
342 |
+
plt.rcParams.update({'figure.autolayout': True})
|
343 |
+
fig.set_facecolor('white')
|
344 |
+
sns.set_theme(style="whitegrid", palette=colour_palette)
|
345 |
+
print('this is the one plot')
|
346 |
+
# gs = GridSpec(7, 2, width_ratios=[1,1], height_ratios=[1.5,1,1,1,1,1,2.5])
|
347 |
+
gs = GridSpec(5, 5, height_ratios=[150,75,225,325,50],width_ratios=[1,100,100,100,1])
|
348 |
+
#### NO FG
|
349 |
+
####gs = GridSpec(5, 5, height_ratios=[225,0,225,325,50],width_ratios=[1,100,100,100,1])
|
350 |
+
#gs = GridSpec(4, 1, width_ratios=[1], height_ratios=[1,0.75,7-len(label_labels)/4,1+len(label_labels)/4])
|
351 |
+
|
352 |
+
gs.update(hspace=0.2, wspace=0.3)
|
353 |
+
|
354 |
+
# Add subplots to the grid
|
355 |
+
ax0 = fig.add_subplot(gs[0, :])
|
356 |
+
ax1_table = fig.add_subplot(gs[1, :])
|
357 |
+
ax2_left = fig.add_subplot(gs[2, 1])
|
358 |
+
ax2_middle = fig.add_subplot(gs[2, 2])
|
359 |
+
ax2_right = fig.add_subplot(gs[2, 3])
|
360 |
+
ax3 = fig.add_subplot(gs[-2, :])
|
361 |
+
#axfooter = fig.add_subplot(gs[-1, :])
|
362 |
+
|
363 |
+
ax1_table.axis('off')
|
364 |
+
|
365 |
+
sns.set_theme(style="whitegrid", palette=colour_palette)
|
366 |
+
fig.set_facecolor('white')
|
367 |
+
|
368 |
+
font_properties = {'family': 'calibi', 'size': 12}
|
369 |
+
font_properties_titles = {'family': 'calibi', 'size': 20}
|
370 |
+
font_properties_axes = {'family': 'calibi', 'size': 16}
|
371 |
+
|
372 |
+
## FANGRAPHS TABLE ###
|
373 |
+
data_pull = psf.fangraphs_scrape(pitcher_id=pitcher_id_select,
|
374 |
+
split=input.split_id(),
|
375 |
+
start_date=input.date_range_id()[0],
|
376 |
+
end_date=input.date_range_id()[1])
|
377 |
+
|
378 |
+
|
379 |
+
psf.fangraphs_table(data=data_pull,
|
380 |
+
stats=['IP','WHIP','ERA','FIP','TBF','K%','BB%','K-BB%'],
|
381 |
+
ax=ax1_table)
|
382 |
+
|
383 |
+
for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax2_left,ax2_middle,ax2_right],[1,2,3]):
|
384 |
+
if x == 'velocity_kde':
|
385 |
+
psf.velocity_kdes(df=df_plot,ax=y,gs=gs,gs_list=z,fig=fig)
|
386 |
+
if x == 'rolling_tj_stuff':
|
387 |
+
psf.tj_stuff_roling(df = df_plot,window = int(input.rolling_window()),ax=y)
|
388 |
+
if x == 'break_plot':
|
389 |
+
psf.break_plot(df=df_plot,ax=y)
|
390 |
+
if x == 'location_lhb':
|
391 |
+
psf.location_plot(df=df_plot,ax=y,hand='L')
|
392 |
+
if x == 'location_rhb':
|
393 |
+
psf.location_plot(df=df_plot,ax=y,hand='R')
|
394 |
+
|
395 |
+
pitches_list = df_plot['pitch_description'].unique()
|
396 |
+
colour_pitches = [pitch_colours[x] for x in pitches_list]
|
397 |
+
|
398 |
+
# handles, labels = ax2_right.get_legend_handles_labels()
|
399 |
+
|
400 |
+
# # Manually create handles and labels for each pitch-color pair
|
401 |
+
handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
|
402 |
+
labels = pitches_list
|
403 |
+
|
404 |
+
|
405 |
|
406 |
|
407 |
+
### FANGRAPHS TABLE ###
|
408 |
+
psf.table_summary(df=df_plot.copy(),
|
409 |
+
pitcher_id=pitcher_id_select,
|
410 |
+
ax=ax3,
|
411 |
+
df_group=grouped_ivb.copy(),
|
412 |
+
df_group_all=grouped_ivb_all.copy(),
|
413 |
+
statcast_pitch_summary=statcast_pitch_summary.copy())
|
414 |
|
415 |
|
416 |
+
# ############ FOOTER ################
|
417 |
+
# #fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
|
418 |
+
# axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
|
419 |
+
# axfooter.text(x=1-0.05,y=1,s='Data: MLB, Fangraphs',ha='right',fontname='Calibri',fontsize=24,va='top')
|
420 |
|
421 |
|
422 |
+
# axfooter.text(x=0.5,y=0.8,s='Colour Coding Compares to League Average By Pitch\ntjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type\ntjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10',
|
423 |
+
# ha='center',va='center',fontname='Calibri',fontsize=16)
|
424 |
+
# axfooter.axis('off')
|
425 |
+
# #fig.tight_layout()
|
426 |
|
427 |
+
|
|
|
|
|
428 |
|
429 |
+
# Get value counts of the column and sort in descending order
|
430 |
+
sorted_value_counts = df_plot['pitch_description'].value_counts().sort_values(ascending=False)
|
|
|
|
|
|
|
|
|
431 |
|
432 |
+
# Get the list of items ordered from most to least frequent
|
433 |
+
items_in_order = sorted_value_counts.index.tolist()
|
434 |
+
# Create a dictionary to map names to colors
|
435 |
+
name_to_color = dict(zip(labels, handles))
|
436 |
|
437 |
+
# Order the colors based on the correct order of names
|
438 |
+
ordered_colors = [name_to_color[name] for name in items_in_order]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
439 |
|
|
|
|
|
|
|
440 |
|
441 |
+
ax3.legend(ordered_colors, items_in_order, bbox_to_anchor=(0.1, 0.81, 0.8, 0.2), ncol=5,
|
442 |
+
fancybox=True,loc='lower center',fontsize=20,framealpha=1.0, markerscale=2,prop={'family': 'calibi', 'size': 20})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
443 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
444 |
|
445 |
+
################## Title ##########
|
446 |
+
title_spot = f'{df_plot.pitcher_name.values[0]}'
|
447 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
448 |
|
449 |
+
ax0.text(x=0.5,y=0.8,s=title_spot,fontname='Calibri',ha='center',fontsize=56,va='top')
|
450 |
+
ax0.text(x=0.5,y=0.5,s='Season Pitching Summary',fontname='Calibri',ha='center',fontsize=40,va='top',fontstyle='italic')
|
451 |
+
|
452 |
+
|
453 |
+
#ax0.text(x=0.5,y=0.25,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
|
454 |
+
# ax0.text(x=0.5,y=0.25,s=f'{season_fg} MLB Season',fontname='Calibri',ha='center',fontsize=30,va='top')
|
455 |
+
# ax0.axis('off')
|
456 |
|
457 |
+
|
458 |
+
ax0.text(x=0.5,y=0.25,s=f'{input.date_range_id()[0]} to {input.date_range_id()[1]}',fontname='Calibri',ha='center',fontsize=30,va='top',fontstyle='italic')
|
459 |
+
|
460 |
+
ax0.text(x=0.5,y=0.05,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
|
461 |
+
ax0.axis('off')
|
462 |
+
|
463 |
+
from matplotlib.offsetbox import (OffsetImage, AnnotationBbox)
|
464 |
+
import urllib
|
465 |
+
import urllib.request
|
466 |
+
import urllib.error
|
467 |
+
from urllib.error import HTTPError
|
468 |
+
|
469 |
+
try:
|
470 |
+
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/{pitcher_id_select}/headshot/67/current.png'
|
471 |
+
test_mage = plt.imread(url)
|
472 |
+
except urllib.error.HTTPError as err:
|
473 |
+
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/1/headshot/67/current.png'
|
474 |
+
imagebox = OffsetImage(test_mage, zoom = 0.5)
|
475 |
+
ab = AnnotationBbox(imagebox, (0.125, 0.4), frameon = False)
|
476 |
+
ax0.add_artist(ab)
|
477 |
+
|
478 |
+
player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id_select}&hydrate=currentTeam").json()
|
479 |
+
|
480 |
+
|
481 |
+
|
482 |
+
|
483 |
+
if 'currentTeam' in player_bio['people'][0]:
|
484 |
+
try:
|
485 |
+
url = team_logos[team_logos['id'] == team_logo_dict[player_bio['people'][0]['currentTeam']['id']]]['imageLink'].values[0]
|
486 |
+
|
487 |
+
im = plt.imread(url)
|
488 |
+
# response = requests.get(url)
|
489 |
+
# im = Image.open(BytesIO(response.content))
|
490 |
+
# im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0])
|
491 |
+
# ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
|
492 |
+
imagebox = OffsetImage(im, zoom = 0.4)
|
493 |
+
ab = AnnotationBbox(imagebox, (0.875, 0.40), frameon = False)
|
494 |
+
ax0.add_artist(ab)
|
495 |
+
except IndexError:
|
496 |
+
print()
|
|
|
|
|
497 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
498 |
|
499 |
+
############ FOOTER ################
|
500 |
+
#fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
|
501 |
+
axfooter = fig.add_subplot(gs[-1, :])
|
502 |
+
axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
|
503 |
+
axfooter.text(x=1-0.05,y=1,s='Data: MLB, Fangraphs',ha='right',fontname='Calibri',fontsize=24,va='top')
|
504 |
|
|
|
|
|
505 |
|
506 |
+
axfooter.text(x=0.5,y=0.8,s='Colour Coding Compares to League Average By Pitch\ntjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type\ntjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10',
|
507 |
+
ha='center',va='center',fontname='Calibri',fontsize=16)
|
508 |
+
axfooter.axis('off')
|
509 |
+
#fig.tight_layout()
|
510 |
|
511 |
+
fig.subplots_adjust(left=0.03, right=0.97, top=0.97, bottom=0.03)
|
512 |
|
513 |
|
514 |
+
@output
|
515 |
+
@render.plot
|
516 |
+
@reactive.event(input.go, ignore_none=False)
|
517 |
+
def plot_game():
|
518 |
+
#fig, ax = plt.subplots(3, 2, figsize=(9, 9))
|
519 |
+
|
520 |
+
font_properties = {'family': 'calibi', 'size': 12}
|
521 |
+
font_properties_titles = {'family': 'calibi', 'size': 20}
|
522 |
+
font_properties_axes = {'family': 'calibi', 'size': 16}
|
523 |
+
|
524 |
+
if len((input.player_id()))<1:
|
525 |
+
fig, ax = plt.subplots(1, 1, figsize=(9, 9))
|
526 |
+
ax.text(x=0.5,y=0.5,s='Please Select\nA Player',fontsize=150,ha='center')
|
527 |
+
ax.grid('off')
|
528 |
+
return
|
529 |
+
|
530 |
+
pitcher_id_select = int(input.player_id())
|
531 |
|
532 |
|
|
|
|
|
|
|
|
|
533 |
|
|
|
|
|
|
|
|
|
|
|
534 |
|
535 |
+
df_plot = df_2024_update[(df_2024_update['pitcher_id']==pitcher_id_select)&(df_2024_update['game_id']==int(input.game_id()))]
|
536 |
+
df_plot = df_plot[df_plot['batter_hand'].isin(split_dict_hand[input.split_id()])]
|
537 |
|
538 |
+
if input.count_id_balls()=='greater' and input.count_id_strikes()=='greater' and int(input.ball_id())==0 and int(input.strike_id())==0:
|
539 |
+
ball_title = ''
|
540 |
+
strike_title = ''
|
541 |
+
else:
|
542 |
+
if input.count_id_balls()=='exact':
|
543 |
+
df_plot = df_plot[df_plot['balls']==int(input.ball_id())]
|
544 |
+
ball_title = str(f'{(input.ball_id())} Ball Count; ')
|
545 |
+
elif input.count_id_balls()=='greater':
|
546 |
+
df_plot = df_plot[df_plot['balls']>=int(input.ball_id())]
|
547 |
+
ball_title = str(f'At Least {(input.ball_id())} Ball Count; ')
|
548 |
+
elif input.count_id_balls()=='lesser':
|
549 |
+
df_plot = df_plot[df_plot['balls']<=int(input.ball_id())]
|
550 |
+
ball_title = str(f'At Most {(input.ball_id())} Ball Count; ')
|
551 |
|
552 |
+
if input.count_id_strikes()=='exact':
|
553 |
+
df_plot = df_plot[df_plot['strikes']==int(input.strike_id())]
|
554 |
+
strike_title = str(f'{(input.strike_id())} Strike Count; ')
|
555 |
+
elif input.count_id_strikes()=='greater':
|
556 |
+
df_plot = df_plot[df_plot['strikes']>=int(input.strike_id())]
|
557 |
+
strike_title = str(f'At Least {(input.strike_id())} Strike Count; ')
|
558 |
+
elif input.count_id_strikes()=='lesser':
|
559 |
+
df_plot = df_plot[df_plot['strikes']<=int(input.strike_id())]
|
560 |
+
strike_title = str(f'At Most {(input.strike_id())} Strike Count; ')
|
561 |
+
|
562 |
|
563 |
+
|
564 |
|
|
|
|
|
|
|
|
|
565 |
|
566 |
+
if input.split_id() == 'all':
|
567 |
+
split_title = ''
|
568 |
|
569 |
+
elif input.split_id() == 'left':
|
570 |
+
split_title = 'vs. LHH'
|
571 |
+
|
572 |
+
elif input.split_id() == 'right':
|
573 |
+
split_title = 'vs. RHH'
|
574 |
+
|
575 |
+
if len(df_plot)<1:
|
576 |
+
fig, ax = plt.subplots(1, 1, figsize=(9, 9))
|
577 |
+
ax.text(x=0.5,y=0.5,s='Please Select\nOther Parameters',fontsize=150,ha='center')
|
578 |
+
ax.grid('off')
|
579 |
+
return
|
580 |
+
|
581 |
+
|
582 |
+
df_plot['pitch_type_count'] = df_plot.groupby(['pitcher_id'])['pitch_type'].cumcount()+1
|
583 |
+
df_plot['pitch_type_count_each'] = df_plot.groupby(['pitch_type'])['pitch_type'].cumcount()+1
|
584 |
+
#df_plot = df_plot.merge(df_2024_update[['tj_stuff_plus','play_id']],left_on=['play_id'],right_on=['play_id'],how='left')
|
585 |
+
df_plot = df_plot.sort_values(by=['pitch_description'])
|
586 |
+
|
587 |
+
# ax0.text(x=0.5,y=0.30,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
|
588 |
+
df_plot['game_opp'] = df_plot['game_date'].astype(str) + ' vs ' + df_plot['batter_team'].astype(str)
|
589 |
+
#print(df_plot['game_opp'])
|
590 |
+
|
591 |
+
#date_dict = pd.concat([df_plot.drop_duplicates(subset=['pitcher_id','game_id','game_opp'])[['game_id','game_opp']]]).set_index('game_id').to_dict()
|
592 |
+
|
593 |
+
grouped_ivb = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description'])
|
594 |
+
grouped_ivb_all = psf.group_ivb_update(df=df_plot,agg_list=['pitcher_id','pitcher_name','pitcher_hand'])
|
595 |
+
|
596 |
+
|
597 |
+
|
598 |
+
from matplotlib.gridspec import GridSpec
|
599 |
+
plt.rcParams['font.family'] = 'Calibri'
|
600 |
+
df_plot['prop'] = df_plot.groupby("pitch_type")["is_pitch"].transform("sum")
|
601 |
+
label_labels = df_plot.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
|
602 |
+
|
603 |
+
#plt.rcParams["figure.figsize"] = [10,10]
|
604 |
+
fig = plt.figure(figsize=(20, 20))
|
605 |
+
plt.rcParams.update({'figure.autolayout': True})
|
606 |
+
fig.set_facecolor('white')
|
607 |
+
sns.set_theme(style="whitegrid", palette=colour_palette)
|
608 |
+
print('this is the one plot')
|
609 |
+
# gs = GridSpec(7, 2, width_ratios=[1,1], height_ratios=[1.5,1,1,1,1,1,2.5])
|
610 |
+
gs = GridSpec(5, 5, height_ratios=[150,75,225,325,50],width_ratios=[1,100,100,100,1])
|
611 |
+
#### NO FG
|
612 |
+
####gs = GridSpec(5, 5, height_ratios=[225,0,225,325,50],width_ratios=[1,100,100,100,1])
|
613 |
+
#gs = GridSpec(4, 1, width_ratios=[1], height_ratios=[1,0.75,7-len(label_labels)/4,1+len(label_labels)/4])
|
614 |
+
|
615 |
+
gs.update(hspace=0.2, wspace=0.3)
|
616 |
+
|
617 |
+
# Add subplots to the grid
|
618 |
+
ax0 = fig.add_subplot(gs[0, :])
|
619 |
+
ax1_table = fig.add_subplot(gs[1, :])
|
620 |
+
ax2_left = fig.add_subplot(gs[2, 1])
|
621 |
+
ax2_middle = fig.add_subplot(gs[2, 2])
|
622 |
+
ax2_right = fig.add_subplot(gs[2, 3])
|
623 |
+
ax3 = fig.add_subplot(gs[-2, :])
|
624 |
+
# axfooter = fig.add_subplot(gs[-1, :])
|
625 |
+
|
626 |
+
ax1_table.axis('off')
|
627 |
+
|
628 |
+
sns.set_theme(style="whitegrid", palette=colour_palette)
|
629 |
+
fig.set_facecolor('white')
|
630 |
+
|
631 |
+
font_properties = {'family': 'calibi', 'size': 12}
|
632 |
+
font_properties_titles = {'family': 'calibi', 'size': 20}
|
633 |
+
font_properties_axes = {'family': 'calibi', 'size': 16}
|
634 |
+
|
635 |
+
print(df_2024_update['game_date'].values[0])
|
636 |
+
## FANGRAPHS TABLE ###
|
637 |
+
data_pull = psf.fangraphs_scrape(pitcher_id=pitcher_id_select,
|
638 |
+
split=input.split_id(),
|
639 |
+
start_date=df_plot['game_date'].values[0],
|
640 |
+
end_date=df_plot['game_date'].values[0])
|
641 |
+
|
642 |
+
|
643 |
+
psf.fangraphs_table(data=data_pull,
|
644 |
+
stats=['IP','WHIP','ERA','FIP','TBF','K%','BB%','K-BB%'],
|
645 |
+
ax=ax1_table)
|
646 |
+
|
647 |
+
# psf.velocity_kdes(df=df_plot,
|
648 |
+
# ax=ax2_loc,
|
649 |
+
# gs=gs,
|
650 |
+
# fig=fig)
|
651 |
+
|
652 |
+
# # psf.tj_stuff_roling(df = df_plot,
|
653 |
+
# # window = 5,
|
654 |
+
# # ax=ax2_velo)
|
655 |
+
# psf.location_plot(df=df_plot,ax=ax2_velo,hand='L')
|
656 |
+
|
657 |
+
# psf.location_plot(df=df_plot,ax=ax2_loc,hand='R')
|
658 |
+
# # # ## Break Plot
|
659 |
+
# psf.break_plot(df=df_plot,ax=ax2)
|
660 |
+
for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax2_left,ax2_middle,ax2_right],[1,2,3]):
|
661 |
+
if x == 'velocity_kde':
|
662 |
+
psf.velocity_kdes(df=df_plot,ax=y,gs=gs,gs_list=z,fig=fig)
|
663 |
+
if x == 'rolling_tj_stuff':
|
664 |
+
psf.tj_stuff_roling(df = df_plot,window = int(input.rolling_window()),ax=y)
|
665 |
+
if x == 'break_plot':
|
666 |
+
psf.break_plot(df=df_plot,ax=y)
|
667 |
+
if x == 'location_lhb':
|
668 |
+
psf.location_plot(df=df_plot,ax=y,hand='L')
|
669 |
+
if x == 'location_rhb':
|
670 |
+
psf.location_plot(df=df_plot,ax=y,hand='R')
|
671 |
+
|
672 |
+
pitches_list = df_plot['pitch_description'].unique()
|
673 |
+
colour_pitches = [pitch_colours[x] for x in pitches_list]
|
674 |
+
|
675 |
+
# handles, labels = ax2_right.get_legend_handles_labels()
|
676 |
|
677 |
+
# # Manually create handles and labels for each pitch-color pair
|
678 |
+
handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
|
679 |
+
labels = pitches_list
|
680 |
|
|
|
|
|
|
|
681 |
|
682 |
+
### FANGRAPHS TABLE ###
|
683 |
+
psf.table_summary(df=df_plot.copy(),
|
684 |
+
pitcher_id=pitcher_id_select,
|
685 |
+
ax=ax3,
|
686 |
+
df_group=grouped_ivb.copy(),
|
687 |
+
df_group_all=grouped_ivb_all.copy(),
|
688 |
+
statcast_pitch_summary=statcast_pitch_summary.copy())
|
|
|
|
|
|
|
689 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
690 |
|
691 |
|
|
|
|
|
|
|
692 |
|
|
|
693 |
|
694 |
+
# Get value counts of the column and sort in descending order
|
695 |
+
sorted_value_counts = df_plot['pitch_description'].value_counts().sort_values(ascending=False)
|
696 |
|
697 |
+
# Get the list of items ordered from most to least frequent
|
698 |
+
items_in_order = sorted_value_counts.index.tolist()
|
699 |
+
# Create a dictionary to map names to colors
|
700 |
+
name_to_color = dict(zip(labels, handles))
|
701 |
|
702 |
+
# Order the colors based on the correct order of names
|
703 |
+
ordered_colors = [name_to_color[name] for name in items_in_order]
|
704 |
|
705 |
|
706 |
+
ax3.legend(ordered_colors, items_in_order, bbox_to_anchor=(0.1, 0.81, 0.8, 0.2), ncol=5,
|
707 |
+
fancybox=True,loc='lower center',fontsize=20,framealpha=1.0, markerscale=2,prop={'family': 'calibi', 'size': 20})
|
|
|
|
|
708 |
|
|
|
|
|
|
|
|
|
|
|
709 |
|
710 |
+
################## Title ##########
|
711 |
+
title_spot = f'{df_plot.pitcher_name.values[0]}'
|
712 |
|
|
|
713 |
|
714 |
+
ax0.text(x=0.5,y=0.8,s=title_spot,fontname='Calibri',ha='center',fontsize=56,va='top')
|
715 |
+
ax0.text(x=0.5,y=0.5,s='Game Pitching Summary',fontname='Calibri',ha='center',fontsize=40,va='top',fontstyle='italic')
|
716 |
|
717 |
|
718 |
+
#ax0.text(x=0.5,y=0.25,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
|
719 |
+
#ax0.text(x=0.5,y=0.25,s=f'{season_fg} MLB Season',fontname='Calibri',ha='center',fontsize=30,va='top')
|
720 |
+
#ax0.text(x=0.5,y=0.25,s=f'2024 Spring Training',fontname='Calibri',ha='center',fontsize=30,va='top')
|
721 |
+
# ax0.text(x=0.5,y=0.25,s=f'{season_fg} MLB Season',fontname='Calibri',ha='center',fontsize=30,va='top')
|
722 |
+
|
723 |
+
ax0.text(x=0.5,y=0.25,s= df_plot['game_opp'].values[0],fontname='Calibri',ha='center',fontstyle='italic',fontsize=30,va='top')
|
724 |
+
|
725 |
+
ax0.text(x=0.5,y=0.05,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
|
726 |
+
ax0.axis('off')
|
727 |
|
|
|
|
|
728 |
|
729 |
+
from matplotlib.offsetbox import (OffsetImage, AnnotationBbox)
|
730 |
+
import urllib
|
731 |
+
import urllib.request
|
732 |
+
import urllib.error
|
733 |
+
from urllib.error import HTTPError
|
734 |
|
735 |
+
try:
|
736 |
+
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/{pitcher_id_select}/headshot/67/current.png'
|
737 |
+
test_mage = plt.imread(url)
|
738 |
+
except urllib.error.HTTPError as err:
|
739 |
+
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/1/headshot/67/current.png'
|
740 |
+
imagebox = OffsetImage(test_mage, zoom = 0.5)
|
741 |
+
ab = AnnotationBbox(imagebox, (0.125, 0.4), frameon = False)
|
742 |
+
ax0.add_artist(ab)
|
|
|
743 |
|
744 |
+
player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id_select}&hydrate=currentTeam").json()
|
|
|
|
|
|
|
|
|
|
|
745 |
|
|
|
|
|
|
|
|
|
|
|
746 |
|
747 |
|
748 |
|
749 |
+
if 'currentTeam' in player_bio['people'][0]:
|
750 |
+
try:
|
751 |
+
url = team_logos[team_logos['id'] == team_logo_dict[player_bio['people'][0]['currentTeam']['id']]]['imageLink'].values[0]
|
752 |
|
753 |
+
im = plt.imread(url)
|
754 |
+
# response = requests.get(url)
|
755 |
+
# im = Image.open(BytesIO(response.content))
|
756 |
+
# im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0])
|
757 |
+
# ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
|
758 |
+
imagebox = OffsetImage(im, zoom = 0.4)
|
759 |
+
ab = AnnotationBbox(imagebox, (0.875, 0.40), frameon = False)
|
760 |
+
ax0.add_artist(ab)
|
761 |
+
except IndexError:
|
762 |
+
print()
|
763 |
+
|
764 |
+
############ FOOTER ################
|
765 |
+
#fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
|
766 |
+
axfooter = fig.add_subplot(gs[-1, :])
|
767 |
+
axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
|
768 |
+
axfooter.text(x=1-0.05,y=1,s='Data: MLB, Fangraphs',ha='right',fontname='Calibri',fontsize=24,va='top')
|
769 |
|
770 |
|
771 |
+
axfooter.text(x=0.5,y=0.8,s='Colour Coding Compares to League Average By Pitch\ntjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type\ntjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10',
|
772 |
+
ha='center',va='center',fontname='Calibri',fontsize=16)
|
773 |
+
axfooter.axis('off')
|
774 |
+
#fig.tight_layout()
|
775 |
|
776 |
|
777 |
+
fig.subplots_adjust(left=0.03, right=0.97, top=0.97, bottom=0.03)
|
778 |
|
779 |
app = App(app_ui, server)
|
joblib_model/barrel_model.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9428e89f2a408148377efb3cd169dc8790bcc89df9495cb895b9db5a955e8fb7
|
3 |
+
size 11447
|
joblib_model/in_zone.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5300b15a6ccfb1dd1e79c85bd9ea478a1945c454845e6be31cd8815e4063a3e
|
3 |
+
size 54459064
|
joblib_model/model_attack_zone.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2671d4db2606cfee299dcffba2a94138fce77c1b7ef6ad14695a972a38dda3c8
|
3 |
+
size 50570139
|
joblib_model/no_swing.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3da3e7ab2b513b87d05e90ae30c788ac819dfcaa7cc1cd9943fc13d2958a00f
|
3 |
+
size 279409
|
joblib_model/swing.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fef4a66363e5f3fdc70ae45c5382bd986c800ff8bf9296a1f9b334461e70fd4
|
3 |
+
size 262137
|
joblib_model/tjloc_model_20240311.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a3c0bf3adc88f904fedc22117f7aa6802926519e1587729107cd2902bace574
|
3 |
+
size 121855
|
joblib_model/tjstuff_model_20240123.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8f5c09c7c37ef8bba7476cd3c864c0060a196aa6c9591850352190eab0458b4
|
3 |
+
size 121388
|
joblib_model/tjstuff_model_20240317.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8f5c09c7c37ef8bba7476cd3c864c0060a196aa6c9591850352190eab0458b4
|
3 |
+
size 121388
|
joblib_model/tjstuff_model_20240318.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef4a03c6d8877e68b4098ff139292c75ee9fab2f58ee8687cd4484182e15c0ca
|
3 |
+
size 432529
|
joblib_model/xwoba_model.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05bade9c0420657d3f0dfe35f0b1adbd2d5ae25c87a07bdf6629987f29926438
|
3 |
+
size 10684246
|
pitch_summary_functions.py
ADDED
@@ -0,0 +1,1005 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import json
|
5 |
+
from matplotlib.ticker import FuncFormatter
|
6 |
+
from matplotlib.ticker import MaxNLocator
|
7 |
+
import math
|
8 |
+
from matplotlib.patches import Ellipse
|
9 |
+
import matplotlib.transforms as transforms
|
10 |
+
import matplotlib.colors
|
11 |
+
import matplotlib.colors as mcolors
|
12 |
+
import seaborn as sns
|
13 |
+
import matplotlib.pyplot as plt
|
14 |
+
import requests
|
15 |
+
|
16 |
+
font_properties = {'family': 'calibi', 'size': 12}
|
17 |
+
font_properties_titles = {'family': 'calibi', 'size': 20}
|
18 |
+
font_properties_axes = {'family': 'calibi', 'size': 16}
|
19 |
+
|
20 |
+
|
21 |
+
colour_palette = ['#FFB000','#648FFF','#785EF0',
|
22 |
+
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
|
23 |
+
season_start = '2024-03-20'
|
24 |
+
season_end = '2024-09-29'
|
25 |
+
season_fg=2024
|
26 |
+
chad_fg = requests.get(f'https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=0&season={season_fg}&season={season_fg}&month=1000&season1={season_fg}&ind=0&pageitems=2000000000&pagenum=1&ind=0&rost=0&players=&type=36&postseason=&sortdir=default&sortstat=sp_pitching').json()
|
27 |
+
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
|
28 |
+
|
29 |
+
|
30 |
+
chadwick_df_small = pd.DataFrame(data={
|
31 |
+
'key_mlbam':[x['xMLBAMID'] for x in chad_fg['data']],
|
32 |
+
'key_fangraphs':[x['playerid'] for x in chad_fg['data']],
|
33 |
+
'Name':[x['PlayerName'] for x in chad_fg['data']],
|
34 |
+
})
|
35 |
+
|
36 |
+
pitcher_dicts = chadwick_df_small.set_index('key_mlbam')['Name'].sort_values().to_dict()
|
37 |
+
mlb_fg_dicts = chadwick_df_small.set_index('key_mlbam')['key_fangraphs'].sort_values().to_dict()
|
38 |
+
|
39 |
+
|
40 |
+
### DF UPDATE CODE ###
|
41 |
+
def df_update_code(df):
|
42 |
+
print('Starting')
|
43 |
+
#df = pd.read_csv('2024_spring_data.csv',index_col=[0])
|
44 |
+
print('Starting')
|
45 |
+
|
46 |
+
|
47 |
+
df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5
|
48 |
+
df['t'] = (df['vy_f'] - df['vy0']) / df['ay']
|
49 |
+
df['vz_f'] = (df['vz0']) + (df['az'] * df['t'])
|
50 |
+
df['vaa'] = -np.arctan(df['vz_f'] / df['vy_f']) * (180 / np.pi)
|
51 |
+
|
52 |
+
#df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5
|
53 |
+
#df['t'] = (df['vy_f'] - df['vy0']) / df['ay']
|
54 |
+
df['vx_f'] = (df['vx0']) + (df['ax'] * df['t'])
|
55 |
+
df['haa'] = -np.arctan(df['vx_f'] / df['vy_f']) * (180 / np.pi)
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
end_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
|
60 |
+
'double', 'sac_fly', 'force_out', 'home_run',
|
61 |
+
'grounded_into_double_play', 'fielders_choice', 'field_error',
|
62 |
+
'triple', 'sac_bunt', 'double_play', 'intent_walk',
|
63 |
+
'fielders_choice_out', 'strikeout_double_play',
|
64 |
+
'sac_fly_double_play', 'catcher_interf', 'other_out']
|
65 |
+
|
66 |
+
|
67 |
+
|
68 |
+
df['pa'] = df.event_type.isin(end_codes)
|
69 |
+
#df['pa'] = 1
|
70 |
+
df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.fillna('None').unique()])))
|
71 |
+
df['bb'] = df.event_type.isin(list(filter(None, [x if 'walk' in x else '' for x in df.event_type.fillna('None').unique()])))
|
72 |
+
df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32)
|
73 |
+
|
74 |
+
df = df.drop_duplicates(subset=['play_id'])
|
75 |
+
df = df.dropna(subset=['start_speed'])
|
76 |
+
|
77 |
+
|
78 |
+
|
79 |
+
swing_codes = ['Swinging Strike', 'In play, no out',
|
80 |
+
'Foul', 'In play, out(s)',
|
81 |
+
'In play, run(s)', 'Swinging Strike (Blocked)',
|
82 |
+
'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout']
|
83 |
+
|
84 |
+
swings_in = ['Swinging Strike', 'In play, no out',
|
85 |
+
'Foul', 'In play, out(s)',
|
86 |
+
'In play, run(s)', 'Swinging Strike (Blocked)',
|
87 |
+
'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout']
|
88 |
+
|
89 |
+
swing_strike_codes = ['Swinging Strike',
|
90 |
+
'Swinging Strike (Blocked)','Missed Bunt','Foul Tip','Swinging Pitchout']
|
91 |
+
|
92 |
+
|
93 |
+
contact_codes = ['In play, no out',
|
94 |
+
'Foul', 'In play, out(s)',
|
95 |
+
'In play, run(s)',
|
96 |
+
'Foul Bunt']
|
97 |
+
|
98 |
+
codes_in = ['In play, out(s)',
|
99 |
+
'Swinging Strike',
|
100 |
+
'Ball',
|
101 |
+
'Foul',
|
102 |
+
'In play, no out',
|
103 |
+
'Called Strike',
|
104 |
+
'Foul Tip',
|
105 |
+
'In play, run(s)',
|
106 |
+
'Hit By Pitch',
|
107 |
+
'Ball In Dirt',
|
108 |
+
'Pitchout',
|
109 |
+
'Swinging Strike (Blocked)',
|
110 |
+
'Foul Bunt',
|
111 |
+
'Missed Bunt',
|
112 |
+
'Foul Pitchout',
|
113 |
+
'Intent Ball',
|
114 |
+
'Swinging Pitchout']
|
115 |
+
|
116 |
+
df['in_zone'] = df['zone'] < 10
|
117 |
+
|
118 |
+
|
119 |
+
df = df.drop_duplicates(subset=['play_id'])
|
120 |
+
|
121 |
+
|
122 |
+
|
123 |
+
df_codes = df[df.play_description.isin(codes_in)].dropna(subset=['in_zone'])
|
124 |
+
|
125 |
+
df_codes['bip'] = ~df_codes.launch_speed.isna()
|
126 |
+
conditions = [
|
127 |
+
(df_codes['launch_speed'].isna()),
|
128 |
+
(df_codes['launch_speed']*1.5 - df_codes['launch_angle'] >= 117 ) & (df_codes['launch_speed'] + df_codes['launch_angle'] >= 124) & (df_codes['launch_speed'] > 98) & (df_codes['launch_angle'] >= 8) & (df_codes['launch_angle'] <= 50)
|
129 |
+
]
|
130 |
+
|
131 |
+
choices = [False,True]
|
132 |
+
df_codes['barrel'] = np.select(conditions, choices, default=np.nan)
|
133 |
+
|
134 |
+
conditions_ss = [
|
135 |
+
(df_codes['launch_angle'].isna()),
|
136 |
+
(df_codes['launch_angle'] >= 8 ) * (df_codes['launch_angle'] <= 32 )
|
137 |
+
]
|
138 |
+
|
139 |
+
choices_ss = [False,True]
|
140 |
+
df_codes['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan)
|
141 |
+
conditions_hh = [
|
142 |
+
(df_codes['launch_speed'].isna()),
|
143 |
+
(df_codes['launch_speed'] >= 94.5 )
|
144 |
+
]
|
145 |
+
|
146 |
+
choices_hh = [False,True]
|
147 |
+
df_codes['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan)
|
148 |
+
|
149 |
+
|
150 |
+
conditions_tb = [
|
151 |
+
(df_codes['event_type']=='single'),
|
152 |
+
(df_codes['event_type']=='double'),
|
153 |
+
(df_codes['event_type']=='triple'),
|
154 |
+
(df_codes['event_type']=='home_run'),
|
155 |
+
]
|
156 |
+
|
157 |
+
choices_tb = [1,2,3,4]
|
158 |
+
|
159 |
+
df_codes['tb'] = np.select(conditions_tb, choices_tb, default=np.nan)
|
160 |
+
|
161 |
+
conditions_woba = [
|
162 |
+
(df_codes['event_type']=='walk'),
|
163 |
+
(df_codes['event_type']=='hit_by_pitch'),
|
164 |
+
(df_codes['event_type']=='single'),
|
165 |
+
(df_codes['event_type']=='double'),
|
166 |
+
(df_codes['event_type']=='triple'),
|
167 |
+
(df_codes['event_type']=='home_run'),
|
168 |
+
]
|
169 |
+
|
170 |
+
choices_woba = [0.705,
|
171 |
+
0.688,
|
172 |
+
0.897,
|
173 |
+
1.233,
|
174 |
+
1.612,
|
175 |
+
2.013]
|
176 |
+
|
177 |
+
df_codes['woba'] = np.select(conditions_woba, choices_woba, default=np.nan)
|
178 |
+
|
179 |
+
|
180 |
+
woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
|
181 |
+
'double', 'sac_fly', 'force_out', 'home_run',
|
182 |
+
'grounded_into_double_play', 'fielders_choice', 'field_error',
|
183 |
+
'triple', 'sac_bunt', 'double_play',
|
184 |
+
'fielders_choice_out', 'strikeout_double_play',
|
185 |
+
'sac_fly_double_play', 'other_out']
|
186 |
+
|
187 |
+
|
188 |
+
|
189 |
+
|
190 |
+
|
191 |
+
conditions_woba_code = [
|
192 |
+
(df_codes['event_type'].isin(woba_codes))
|
193 |
+
]
|
194 |
+
|
195 |
+
choices_woba_code = [1]
|
196 |
+
|
197 |
+
df_codes['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan)
|
198 |
+
|
199 |
+
|
200 |
+
#df_codes['barrel'] = (df_codes.launch_speed >= 98) & (df_codes.launch_angle >= (26 - (-98 + df_codes.launch_speed))) & (df_codes.launch_angle <= 30 + (-98 + df_codes.launch_speed)) & (df_codes.launch_angle >= 8) & (df_codes.launch_angle <= 50)
|
201 |
+
|
202 |
+
|
203 |
+
|
204 |
+
#df_codes['barrel'] = (df_codes.launch_speed >= 98) & (df_codes.launch_angle >= (26 - (-98 + df_codes.launch_speed))) & (df_codes.launch_angle <= 30 + (-98 + df_codes.launch_speed)) & (df_codes.launch_angle >= 8) & (df_codes.launch_angle <= 50)
|
205 |
+
df_codes['pitches'] = 1
|
206 |
+
df_codes['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df_codes.play_code]
|
207 |
+
df_codes['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df_codes.play_code]
|
208 |
+
df_codes['swings'] = [1 if x in swings_in else 0 for x in df_codes.play_description]
|
209 |
+
|
210 |
+
df_codes['out_zone'] = df_codes.in_zone == False
|
211 |
+
df_codes['zone_swing'] = (df_codes.in_zone == True)&(df_codes.swings == 1)
|
212 |
+
df_codes['zone_contact'] = (df_codes.in_zone == True)&(df_codes.swings == 1)&(df_codes.whiffs == 0)
|
213 |
+
df_codes['ozone_swing'] = (df_codes.in_zone==False)&(df_codes.swings == 1)
|
214 |
+
df_codes['ozone_contact'] = (df_codes.in_zone==False)&(df_codes.swings == 1)&(df_codes.whiffs == 0)
|
215 |
+
|
216 |
+
return df_codes
|
217 |
+
|
218 |
+
### GET COLOURS##
|
219 |
+
def get_color(value,normalize,cmap_sum):
|
220 |
+
color = cmap_sum(normalize(value))
|
221 |
+
return mcolors.to_hex(color)
|
222 |
+
|
223 |
+
### PERCENTILE ###
|
224 |
+
def percentile(n):
|
225 |
+
def percentile_(x):
|
226 |
+
return x.quantile(n)
|
227 |
+
percentile_.__name__ = 'percentile_{:02.0f}'.format(n*100)
|
228 |
+
return percentile_
|
229 |
+
|
230 |
+
### TJ STUFF+ DF CLEAN ###
|
231 |
+
def df_clean(df):
|
232 |
+
df_copy = df.copy()
|
233 |
+
df_copy.loc[df_copy['pitcher_hand'] == 'L','hb'] *= -1
|
234 |
+
df_copy.loc[df_copy['pitcher_hand'] == 'L','x0'] *= -1
|
235 |
+
df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] = 360 - df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction']
|
236 |
+
|
237 |
+
df_copy['pitch_l'] = [1 if x == 'L' else 0 for x in df_copy['pitcher_hand']]
|
238 |
+
df_copy['bat_l'] = [1 if x == 'L' else 0 for x in df_copy['batter_hand']]
|
239 |
+
#df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "KN", "CS", "SC", "FA"])].reset_index(drop=True)
|
240 |
+
#df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "CS", "SC", "FA"])].reset_index(drop=True)
|
241 |
+
|
242 |
+
df_copy['pitch_type'] = df_copy['pitch_type'].replace({'FT':'SI',
|
243 |
+
#'KC':'CU',
|
244 |
+
'SV':'SL',
|
245 |
+
'FO':'FS'})
|
246 |
+
|
247 |
+
df_copy_fb_sum = df_copy[df_copy.pitch_type.isin(["FF", "FC", "SI"])].groupby(['pitcher_id']).agg(
|
248 |
+
fb_velo = ('start_speed','mean'),
|
249 |
+
fb_max_ivb = ('ivb',percentile(0.9)),
|
250 |
+
fb_max_x = ('hb',percentile(0.9)),
|
251 |
+
fb_min_x = ('hb',percentile(0.1)),
|
252 |
+
fb_max_velo = ('start_speed',percentile(0.9)),
|
253 |
+
fb_axis = ('spin_direction','mean'),
|
254 |
+
)
|
255 |
+
|
256 |
+
df_copy = df_copy.merge(df_copy_fb_sum,left_on='pitcher_id',right_index=True,how='left')
|
257 |
+
|
258 |
+
df_copy['fb_velo_diff'] = df_copy['start_speed']- df_copy['fb_velo']
|
259 |
+
df_copy['fb_max_ivb_diff'] = df_copy['ivb']- df_copy['fb_max_ivb']
|
260 |
+
df_copy['fb_max_hb_diff'] = -abs(df_copy['hb']- df_copy['fb_max_x'])
|
261 |
+
df_copy['fb_min_hb_diff'] = df_copy['hb']- df_copy['fb_min_x']
|
262 |
+
df_copy['fb_max_velo_diff'] = df_copy['start_speed']- df_copy['fb_max_velo']
|
263 |
+
df_copy['fb_axis_diff'] = df_copy['spin_direction']- df_copy['fb_axis']
|
264 |
+
|
265 |
+
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_velo_diff'] = 0
|
266 |
+
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_ivb_diff'] = 0
|
267 |
+
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_hb_diff'] = 0
|
268 |
+
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_min_hb_diff'] = 0
|
269 |
+
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_velo_diff'] = 0
|
270 |
+
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_axis_diff'] = 0
|
271 |
+
|
272 |
+
|
273 |
+
df_copy['max_speed'] = df_copy.groupby(['pitcher_id'])['start_speed'].transform('max')
|
274 |
+
df_copy['max_speed_diff'] = df_copy['start_speed'] - df_copy['max_speed']
|
275 |
+
|
276 |
+
df_copy['max_ivb'] = df_copy.groupby(['pitcher_id'])['ivb'].transform('max')
|
277 |
+
df_copy['max_ivb_diff'] = df_copy['ivb'] - df_copy['max_ivb']
|
278 |
+
|
279 |
+
df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5
|
280 |
+
df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay']
|
281 |
+
df_copy['vz_f'] = (df_copy['vz0']) + (df_copy['az'] * df_copy['t'])
|
282 |
+
df_copy['vaa'] = -np.arctan(df_copy['vz_f'] / df_copy['vy_f']) * (180 / np.pi)
|
283 |
+
|
284 |
+
#df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5
|
285 |
+
#df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay']
|
286 |
+
df_copy['vx_f'] = (df_copy['vx0']) + (df_copy['ax'] * df_copy['t'])
|
287 |
+
df_copy['haa'] = -np.arctan(df_copy['vx_f'] / df_copy['vy_f']) * (180 / np.pi)
|
288 |
+
|
289 |
+
# df_copy['x_diff'] = df_copy['x0'] - df_copy['px']
|
290 |
+
# df_copy['z_diff'] = df_copy['z0'] - df_copy['pz']
|
291 |
+
|
292 |
+
# df_copy['vaa'] = np.arctan(df_copy['z_diff'] / df_copy['release_pos_y']) * 360 / np.pi
|
293 |
+
# df_copy['haa'] = np.arctan(-df_copy['x_diff'] / df_copy['release_pos_y']) * 360 / np.pi
|
294 |
+
|
295 |
+
df_copy = df_copy.dropna(subset=['pitch_type']).fillna(0)
|
296 |
+
return df_copy
|
297 |
+
|
298 |
+
### PITCH COLOURS ###
|
299 |
+
pitch_colours = {
|
300 |
+
'Four-Seam Fastball':'#FF007D',#BC136F
|
301 |
+
'Sinker':'#98165D',#DC267F
|
302 |
+
'Cutter':'#BE5FA0',
|
303 |
+
|
304 |
+
'Changeup':'#F79E70',#F75233
|
305 |
+
'Splitter':'#FE6100',#F75233
|
306 |
+
'Screwball':'#F08223',
|
307 |
+
'Forkball':'#FFB000',
|
308 |
+
|
309 |
+
'Slider':'#67E18D',#1BB999#785EF0
|
310 |
+
'Sweeper':'#1BB999',#37CD85#904039
|
311 |
+
'Slurve':'#376748',#785EF0#549C07#BEABD8
|
312 |
+
|
313 |
+
'Knuckle Curve':'#311D8B',
|
314 |
+
'Curveball':'#3025CE',
|
315 |
+
'Slow Curve':'#274BFC',
|
316 |
+
'Eephus':'#648FFF',
|
317 |
+
|
318 |
+
'Knuckleball':'#867A08',
|
319 |
+
|
320 |
+
'Pitch Out':'#472C30',
|
321 |
+
'Other':'#9C8975',
|
322 |
+
}
|
323 |
+
|
324 |
+
### PITCH ELLIPSE ###
|
325 |
+
def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
|
326 |
+
"""
|
327 |
+
Create a plot of the covariance confidence ellipse of *x* and *y*.
|
328 |
+
|
329 |
+
Parameters
|
330 |
+
----------
|
331 |
+
x, y : array-like, shape (n, )
|
332 |
+
Input data.
|
333 |
+
|
334 |
+
ax : matplotlib.axes.Axes
|
335 |
+
The axes object to draw the ellipse into.
|
336 |
+
|
337 |
+
n_std : float
|
338 |
+
The number of standard deviations to determine the ellipse's radiuses.
|
339 |
+
|
340 |
+
**kwargs
|
341 |
+
Forwarded to `~matplotlib.patches.Ellipse`
|
342 |
+
|
343 |
+
Returns
|
344 |
+
-------
|
345 |
+
matplotlib.patches.Ellipse
|
346 |
+
"""
|
347 |
+
|
348 |
+
if x.size != y.size:
|
349 |
+
raise ValueError("x and y must be the same size")
|
350 |
+
try:
|
351 |
+
cov = np.cov(x, y)
|
352 |
+
pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
|
353 |
+
# Using a special case to obtain the eigenvalues of this
|
354 |
+
# two-dimensional dataset.
|
355 |
+
ell_radius_x = np.sqrt(1 + pearson)
|
356 |
+
ell_radius_y = np.sqrt(1 - pearson)
|
357 |
+
ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
|
358 |
+
facecolor=facecolor,linewidth=2,linestyle='--', **kwargs)
|
359 |
+
|
360 |
+
|
361 |
+
# Calculating the standard deviation of x from
|
362 |
+
# the squareroot of the variance and multiplying
|
363 |
+
# with the given number of standard deviations.
|
364 |
+
scale_x = np.sqrt(cov[0, 0]) * n_std
|
365 |
+
mean_x = np.mean(x)
|
366 |
+
|
367 |
+
|
368 |
+
# calculating the standard deviation of y ...
|
369 |
+
scale_y = np.sqrt(cov[1, 1]) * n_std
|
370 |
+
mean_y = np.mean(y)
|
371 |
+
|
372 |
+
|
373 |
+
transf = transforms.Affine2D() \
|
374 |
+
.rotate_deg(45) \
|
375 |
+
.scale(scale_x, scale_y) \
|
376 |
+
.translate(mean_x, mean_y)
|
377 |
+
|
378 |
+
|
379 |
+
|
380 |
+
ellipse.set_transform(transf + ax.transData)
|
381 |
+
except ValueError:
|
382 |
+
return
|
383 |
+
|
384 |
+
return ax.add_patch(ellipse)
|
385 |
+
|
386 |
+
# DEFINE STRIKE ZONE
|
387 |
+
strike_zone = pd.DataFrame({
|
388 |
+
'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9],
|
389 |
+
'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5]
|
390 |
+
})
|
391 |
+
|
392 |
+
### STRIKE ZONE ###
|
393 |
+
def draw_line(axis,alpha_spot=1,catcher_p = True):
|
394 |
+
|
395 |
+
axis.plot(strike_zone['PlateLocSide'], strike_zone['PlateLocHeight'], color='black', linewidth=1.3,zorder=3,alpha=alpha_spot,)
|
396 |
+
|
397 |
+
# ax.plot([-0.2833333, -0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
|
398 |
+
# ax.plot([0.2833333, 0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
|
399 |
+
# ax.plot([-0.85, 0.85], [2.2, 2.2], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
|
400 |
+
# ax.plot([-0.85, 0.85], [2.9, 2.9], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
|
401 |
+
if catcher_p:
|
402 |
+
# Add dashed line
|
403 |
+
# Add home plate
|
404 |
+
axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
405 |
+
axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
406 |
+
axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
407 |
+
axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
408 |
+
axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
409 |
+
else:
|
410 |
+
axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
411 |
+
axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
412 |
+
axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
413 |
+
axis.plot([0, 0.9], [-.35, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
414 |
+
axis.plot([0.9, 0.708], [-0.1,0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
415 |
+
|
416 |
+
|
417 |
+
|
418 |
+
### FANGRAPHS STATS DICT ###
|
419 |
+
fangraphs_stats_dict = {'IP':{'table_header':'$\\bf{IP}$','format':'.1f',} ,
|
420 |
+
'TBF':{'table_header':'$\\bf{PA}$','format':'.0f',} ,
|
421 |
+
'AVG':{'table_header':'$\\bf{AVG}$','format':'.3f',} ,
|
422 |
+
'K/9':{'table_header':'$\\bf{K\/9}$','format':'.2f',} ,
|
423 |
+
'BB/9':{'table_header':'$\\bf{BB\/9}$','format':'.2f',} ,
|
424 |
+
'K/BB':{'table_header':'$\\bf{K\/BB}$','format':'.2f',} ,
|
425 |
+
'HR/9':{'table_header':'$\\bf{HR\/9}$','format':'.2f',} ,
|
426 |
+
'K%':{'table_header':'$\\bf{K\%}$','format':'.1%',} ,
|
427 |
+
'BB%':{'table_header':'$\\bf{BB\%}$','format':'.1%',} ,
|
428 |
+
'K-BB%':{'table_header':'$\\bf{K-BB\%}$','format':'.1%',} ,
|
429 |
+
'WHIP':{'table_header':'$\\bf{WHIP}$','format':'.2f',} ,
|
430 |
+
'BABIP':{'table_header':'$\\bf{BABIP}$','format':'.3f',} ,
|
431 |
+
'LOB%':{'table_header':'$\\bf{LOB\%}$','format':'.1%',} ,
|
432 |
+
'xFIP':{'table_header':'$\\bf{xFIP}$','format':'.2f',} ,
|
433 |
+
'FIP':{'table_header':'$\\bf{FIP}$','format':'.2f',} ,
|
434 |
+
'H':{'table_header':'$\\bf{H}$','format':'.0f',} ,
|
435 |
+
'2B':{'table_header':'$\\bf{2B}$','format':'.0f',} ,
|
436 |
+
'3B':{'table_header':'$\\bf{3B}$','format':'.0f',} ,
|
437 |
+
'R':{'table_header':'$\\bf{R}$','format':'.0f',} ,
|
438 |
+
'ER':{'table_header':'$\\bf{ER}$','format':'.0f',} ,
|
439 |
+
'HR':{'table_header':'$\\bf{HR}$','format':'.0f',} ,
|
440 |
+
'BB':{'table_header':'$\\bf{BB}$','format':'.0f',} ,
|
441 |
+
'IBB':{'table_header':'$\\bf{IBB}$','format':'.0f',} ,
|
442 |
+
'HBP':{'table_header':'$\\bf{HBP}$','format':'.0f',} ,
|
443 |
+
'SO':{'table_header':'$\\bf{SO}$','format':'.0f',} ,
|
444 |
+
'OBP':{'table_header':'$\\bf{OBP}$','format':'.0f',} ,
|
445 |
+
'SLG':{'table_header':'$\\bf{SLG}$','format':'.0f',} ,
|
446 |
+
'ERA':{'table_header':'$\\bf{ERA}$','format':'.2f',} ,
|
447 |
+
'wOBA':{'table_header':'$\\bf{wOBA}$','format':'.3f',} ,
|
448 |
+
'G':{'table_header':'$\\bf{G}$','format':'.0f',} }
|
449 |
+
|
450 |
+
|
451 |
+
## Fangraphs Table
|
452 |
+
|
453 |
+
### FANGRAPHS SPLITS SCRAPE ###
|
454 |
+
split_dict = {'all':[],
|
455 |
+
'left':['5'],
|
456 |
+
'right':['6']
|
457 |
+
}
|
458 |
+
|
459 |
+
def fangraphs_scrape(pitcher_id=808967,
|
460 |
+
split='all',
|
461 |
+
start_date='2024-03-20',
|
462 |
+
end_date='2024-09-29'):
|
463 |
+
|
464 |
+
|
465 |
+
url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders"
|
466 |
+
|
467 |
+
payload = {
|
468 |
+
"strPlayerId": str(mlb_fg_dicts[pitcher_id]),
|
469 |
+
"strSplitArr": split_dict[split],
|
470 |
+
"strGroup": "season",
|
471 |
+
"strPosition": "P",
|
472 |
+
"strType": "2",
|
473 |
+
"strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')),
|
474 |
+
"strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')),
|
475 |
+
"strSplitTeams": False,
|
476 |
+
"dctFilters": [],
|
477 |
+
"strStatType": "player",
|
478 |
+
"strAutoPt": False,
|
479 |
+
"arrPlayerId": [],
|
480 |
+
"strSplitArrPitch": [],
|
481 |
+
"arrWxTemperature": None,
|
482 |
+
"arrWxPressure": None,
|
483 |
+
"arrWxAirDensity": None,
|
484 |
+
"arrWxElevation": None,
|
485 |
+
"arrWxWindSpeed": None
|
486 |
+
}
|
487 |
+
json_payload = json.dumps(payload)
|
488 |
+
headers = {'Content-Type': 'application/json'}
|
489 |
+
response = requests.post(url, data=json_payload, headers=headers)
|
490 |
+
data_pull = response.json()['data'][0]
|
491 |
+
|
492 |
+
payload_advanced = {
|
493 |
+
"strPlayerId": str(mlb_fg_dicts[pitcher_id]),
|
494 |
+
"strSplitArr": split_dict[split],
|
495 |
+
"strGroup": "season",
|
496 |
+
"strPosition": "P",
|
497 |
+
"strType": "1",
|
498 |
+
"strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')),
|
499 |
+
"strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')),
|
500 |
+
"strSplitTeams": False,
|
501 |
+
"dctFilters": [],
|
502 |
+
"strStatType": "player",
|
503 |
+
"strAutoPt": False,
|
504 |
+
"arrPlayerId": [],
|
505 |
+
"strSplitArrPitch": [],
|
506 |
+
"arrWxTemperature": None,
|
507 |
+
"arrWxPressure": None,
|
508 |
+
"arrWxAirDensity": None,
|
509 |
+
"arrWxElevation": None,
|
510 |
+
"arrWxWindSpeed": None
|
511 |
+
}
|
512 |
+
|
513 |
+
json_payload_advanced = json.dumps(payload_advanced)
|
514 |
+
headers = {'Content-Type': 'application/json'}
|
515 |
+
response_advanced = requests.post(url, data=json_payload_advanced, headers=headers)
|
516 |
+
data_pull_advanced = response_advanced.json()['data'][0]
|
517 |
+
|
518 |
+
data_pull.update(data_pull_advanced)
|
519 |
+
|
520 |
+
return data_pull
|
521 |
+
|
522 |
+
|
523 |
+
### FANGRAPHS TABLE PLOT ###
|
524 |
+
def fangraphs_table(data,
|
525 |
+
stats,
|
526 |
+
ax):
|
527 |
+
|
528 |
+
|
529 |
+
fg_values = [data[x] if x in data else '---' for x in stats]
|
530 |
+
df_fg = pd.DataFrame(data=dict(zip(stats,fg_values)),index=[0])
|
531 |
+
|
532 |
+
df_fg.loc[0] = [format(df_fg[x][0],fangraphs_stats_dict[x]['format']) if df_fg[x][0] != '---' else '---' for x in df_fg]
|
533 |
+
table_fg = ax.table(cellText=df_fg.values, colLabels=df_fg.columns, cellLoc='center',
|
534 |
+
bbox=[0.04, 0.2, 0.92, 0.8])
|
535 |
+
|
536 |
+
min_font_size = 20
|
537 |
+
table_fg.set_fontsize(min_font_size)
|
538 |
+
|
539 |
+
|
540 |
+
new_column_names = [fangraphs_stats_dict[x]['table_header'] if x in data else '---' for x in stats]
|
541 |
+
# #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%']
|
542 |
+
for i, col_name in enumerate(new_column_names):
|
543 |
+
table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
|
544 |
+
|
545 |
+
ax.axis('off')
|
546 |
+
|
547 |
+
|
548 |
+
return table_fg
|
549 |
+
|
550 |
+
### VELOCITY KDES ###
|
551 |
+
def velocity_kdes(df,
|
552 |
+
ax,
|
553 |
+
gs,
|
554 |
+
gs_list,
|
555 |
+
fig):
|
556 |
+
|
557 |
+
sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False)
|
558 |
+
|
559 |
+
# Get the list of items ordered from most to least frequent
|
560 |
+
items_in_order = sorted_value_counts.index.tolist()
|
561 |
+
|
562 |
+
# Create the inner subplot inside the outer subplot
|
563 |
+
import matplotlib.gridspec as gridspec
|
564 |
+
ax.axis ('off')
|
565 |
+
#ax.set_ylabel('Pitch Velocity Distribution', fontdict=font_properties_axes)
|
566 |
+
ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20})
|
567 |
+
|
568 |
+
inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order),1, subplot_spec=gs[2,gs_list])
|
569 |
+
ax_top = []
|
570 |
+
for inner in inner_grid_1:
|
571 |
+
ax_top.append(fig.add_subplot(inner))
|
572 |
+
|
573 |
+
|
574 |
+
ax_number = 0
|
575 |
+
|
576 |
+
for i in items_in_order[0:]:
|
577 |
+
if np.unique(df[df['pitch_type']==i]['start_speed']).size == 1: # Check if all values are the same
|
578 |
+
print('just')
|
579 |
+
ax_top[ax_number].plot([np.unique(df[df['pitch_type']==i]['start_speed']),np.unique(df[df['pitch_type']==i]['start_speed'])],[0,1], linewidth=4,
|
580 |
+
color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]],zorder=20)
|
581 |
+
# ax_top[ax_number].plot(np.unique(df_melt[df_melt['Player']==i]['value']), [0.5]*len(np.unique(df_melt[df_melt['Player']==i]['value'])), linewidth=4)
|
582 |
+
else:
|
583 |
+
sns.kdeplot(df[df['pitch_type']==i]['start_speed'],ax=ax_top[ax_number],fill=True,
|
584 |
+
clip=(df[df['pitch_type']==i]['start_speed'].min(),df[df['pitch_type']==i]['start_speed'].max()),
|
585 |
+
color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]])
|
586 |
+
ax_top[ax_number].set_xlim(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5)
|
587 |
+
ax_top[ax_number].set_xlabel('')
|
588 |
+
ax_top[ax_number].set_ylabel('')
|
589 |
+
if ax_number < len(items_in_order)-1:
|
590 |
+
ax_top[ax_number].spines['top'].set_visible(False)
|
591 |
+
ax_top[ax_number].spines['right'].set_visible(False)
|
592 |
+
ax_top[ax_number].spines['left'].set_visible(False)
|
593 |
+
ax_top[ax_number].tick_params(axis='x', colors='none')
|
594 |
+
|
595 |
+
|
596 |
+
ax_top[ax_number].set_xticks(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5))
|
597 |
+
ax_top[ax_number].set_yticks([])
|
598 |
+
ax_top[ax_number].grid(axis='x', linestyle='--')
|
599 |
+
ax_top[ax_number].text(-0.01, 0.5, i, transform=ax_top[ax_number].transAxes,
|
600 |
+
fontsize=14, va='center', ha='right')
|
601 |
+
ax_number = ax_number + 1
|
602 |
+
ax_top[-1].spines['top'].set_visible(False)
|
603 |
+
ax_top[-1].spines['right'].set_visible(False)
|
604 |
+
ax_top[-1].spines['left'].set_visible(False)
|
605 |
+
|
606 |
+
|
607 |
+
ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5)))
|
608 |
+
ax_top[-1].set_xlabel('Velocity (mph)')
|
609 |
+
|
610 |
+
### TJ STUFF+ ROLLING ###
|
611 |
+
def tj_stuff_roling(df,
|
612 |
+
window,
|
613 |
+
ax):
|
614 |
+
## Velocity Plot
|
615 |
+
sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False)
|
616 |
+
|
617 |
+
# Get the list of items ordered from most to least frequent
|
618 |
+
items_in_order = sorted_value_counts.index.tolist()
|
619 |
+
|
620 |
+
|
621 |
+
for i in items_in_order:
|
622 |
+
if max(df[df['pitch_type']==i]['pitch_type_count_each']) >= window:
|
623 |
+
sns.lineplot(x=range(1,max(df[df['pitch_type']==i]['pitch_type_count_each'])+1),
|
624 |
+
y=df[df['pitch_type']==i]['tj_stuff_plus'].rolling(window).sum()/window,
|
625 |
+
color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]],
|
626 |
+
ax=ax,linewidth=3)
|
627 |
+
|
628 |
+
# Adjust x-axis limits to start from 1
|
629 |
+
ax.set_xlim(window,max(df['pitch_type_count_each']))
|
630 |
+
ax.set_ylim(70,130)
|
631 |
+
#ax.get_legend().remove()
|
632 |
+
ax.set_xlabel('Pitches', fontdict=font_properties_axes)
|
633 |
+
ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
|
634 |
+
ax.set_title(f"{window} Pitch Rolling tjStuff+",fontdict=font_properties_titles)
|
635 |
+
# ax.axis('square')
|
636 |
+
# ax.set_xlim(left=1)
|
637 |
+
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
|
638 |
+
|
639 |
+
### BREAK PLOT ###
|
640 |
+
def break_plot(df,
|
641 |
+
ax):
|
642 |
+
|
643 |
+
label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
|
644 |
+
j = 0
|
645 |
+
for label in label_labels:
|
646 |
+
subset = df[df['pitch_description'] == label]
|
647 |
+
print(label)
|
648 |
+
if len(subset) > 4:
|
649 |
+
if df['pitcher_hand'].values[0] == 'R':
|
650 |
+
subset['hb'] = subset['hb']*1
|
651 |
+
if df['pitcher_hand'].values[0] == 'L':
|
652 |
+
subset['hb'] = subset['hb']*1
|
653 |
+
subset['ivb'] = subset['ivb']*1
|
654 |
+
|
655 |
+
try:
|
656 |
+
confidence_ellipse(subset['hb'], subset['ivb'], ax=ax,edgecolor = pitch_colours[label],n_std=2,facecolor= pitch_colours[label],alpha=0.2)
|
657 |
+
except ValueError:
|
658 |
+
return
|
659 |
+
j=j+1
|
660 |
+
else:
|
661 |
+
j=j+1
|
662 |
+
|
663 |
+
if df['pitcher_hand'].values[0] == 'R':
|
664 |
+
sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2)
|
665 |
+
if df['pitcher_hand'].values[0] == 'L':
|
666 |
+
sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2)
|
667 |
+
|
668 |
+
ax.set_xlim((-25,25))
|
669 |
+
ax.set_ylim((-25,25))
|
670 |
+
|
671 |
+
ax.hlines(y=0,xmin=-50,xmax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1)
|
672 |
+
ax.vlines(x=0,ymin=-50,ymax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1)
|
673 |
+
ax.set_xlabel('Horizontal Break (in)', fontdict=font_properties_axes)
|
674 |
+
ax.set_ylabel('Induced Vertical Break (in)', fontdict=font_properties_axes)
|
675 |
+
ax.set_title("Pitch Breaks",fontdict=font_properties_titles)
|
676 |
+
|
677 |
+
|
678 |
+
ax.get_legend().remove()
|
679 |
+
|
680 |
+
|
681 |
+
# ax1.set_xticklabels(ax1.get_xticks(), fontdict=font_properties)
|
682 |
+
ax.set_xticklabels(ax.get_xticks(), fontdict=font_properties)
|
683 |
+
|
684 |
+
# ax1.set_yticklabels(ax1.get_yticks(), fontdict=font_properties)
|
685 |
+
ax.set_yticklabels(ax.get_yticks(), fontdict=font_properties)
|
686 |
+
|
687 |
+
|
688 |
+
|
689 |
+
#ax1.set_aspect('equal', adjustable='box')
|
690 |
+
if df['pitcher_hand'].values[0] == 'R':
|
691 |
+
ax.text(-24.5,-24.5,s='← Glove Side',fontstyle='italic',ha='left',va='bottom',
|
692 |
+
bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3)
|
693 |
+
ax.text(24.5,-24.5,s='Arm Side →',fontstyle='italic',ha='right',va='bottom',
|
694 |
+
bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3)
|
695 |
+
#ax.invert_xaxis()
|
696 |
+
if df['pitcher_hand'].values[0] == 'L':
|
697 |
+
ax.invert_xaxis()
|
698 |
+
ax.text(24.5,-24.5,s='← Arm Side',fontstyle='italic',ha='left',va='bottom',
|
699 |
+
bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3)
|
700 |
+
ax.text(-24.5,-24.5,s='Glove Side →',fontstyle='italic',ha='right',va='bottom',
|
701 |
+
bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3)
|
702 |
+
ax.set_aspect('equal', adjustable='box')
|
703 |
+
#ax1.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
|
704 |
+
ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
|
705 |
+
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
|
706 |
+
|
707 |
+
### TABLE SUMMARY ###
|
708 |
+
def table_summary(df,
|
709 |
+
pitcher_id,
|
710 |
+
ax,
|
711 |
+
df_group,
|
712 |
+
df_group_all,
|
713 |
+
statcast_pitch_summary):
|
714 |
+
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
|
715 |
+
|
716 |
+
ax.axis('off')
|
717 |
+
df_group['spin_direction_adj'] = [(x + 180) for x in df_group['spin_direction']]
|
718 |
+
#(((df_group.groupby('pitch_description').mean()[['spin_direction_adj']] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 )
|
719 |
+
clock_time = ((df_group.groupby('pitch_description').mean()['spin_direction_adj']) %360 // 30 )+ (((df_group.groupby('pitch_description').mean()['spin_direction_adj'] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 )
|
720 |
+
# print('Clocks')
|
721 |
+
# print(clock_time)
|
722 |
+
clock_time = (clock_time.astype(int) + clock_time%1*60/100).round(2).astype(str).str.replace('.',':').str.replace(':0',':00').str.replace(':3',':30').str.replace('0:','12:').str.replace('112:','10:').to_frame()
|
723 |
+
df_group = df_group.merge(right=clock_time,left_on='pitch_description',right_index=True,suffixes=['','_clock'])
|
724 |
+
|
725 |
+
|
726 |
+
plot_table = df_group[df_group['pitcher_id']==pitcher_id].sort_values(
|
727 |
+
by=['pitches'],ascending=False)[['pitch_description','pitches','start_speed','ivb',
|
728 |
+
'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release',
|
729 |
+
'extension','tj_stuff_plus','spin_direction_adj_clock','zone_percent','chase_percent','whiff_rate']]
|
730 |
+
|
731 |
+
# if df['pitcher_hand'].values[0] == 'L':
|
732 |
+
# plot_table['hb'] = plot_table['hb']*-1
|
733 |
+
|
734 |
+
#if df['pitcher_hand'].values[0] == 'R':
|
735 |
+
plot_table['horizontal_release'] = plot_table['horizontal_release']*-1
|
736 |
+
|
737 |
+
plot_table['pitch_percent'] = plot_table['pitches'] / plot_table['pitches'].sum()
|
738 |
+
|
739 |
+
plot_table = plot_table[['pitch_description','pitches','pitch_percent','start_speed','ivb',
|
740 |
+
'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release',
|
741 |
+
'extension','spin_direction_adj_clock','tj_stuff_plus','zone_percent','chase_percent','whiff_rate']]
|
742 |
+
|
743 |
+
plot_table_all = pd.DataFrame(data={'pitch_description': 'All',
|
744 |
+
'pitches': plot_table['pitches'].sum(),
|
745 |
+
'pitch_percent': 1.0,
|
746 |
+
'start_speed': '—',
|
747 |
+
'ivb': '—',
|
748 |
+
'hb': '—',
|
749 |
+
'spin_rate': '—',
|
750 |
+
'vaa': '—',
|
751 |
+
'haa': '—',
|
752 |
+
'vertical_release': '—',
|
753 |
+
'horizontal_release': '—',
|
754 |
+
'extension': df['extension'].mean(),
|
755 |
+
'spin_direction_adj_clock': '—',
|
756 |
+
'tj_stuff_plus': df[df['pitcher_id']==pitcher_id]['tj_stuff_plus'].mean(),
|
757 |
+
'zone_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['zone_percent'].values[0],
|
758 |
+
'chase_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['chase_percent'].values[0],
|
759 |
+
'whiff_rate': df_group_all[df_group_all['pitcher_id']==pitcher_id]['whiff_rate'].values[0],
|
760 |
+
|
761 |
+
|
762 |
+
},index=[0]
|
763 |
+
)
|
764 |
+
|
765 |
+
plot_table = pd.concat([plot_table,plot_table_all]).fillna('—')
|
766 |
+
|
767 |
+
|
768 |
+
|
769 |
+
plt.rcParams['font.family'] = 'Calibri'
|
770 |
+
table = ax.table(cellText=plot_table.values, colLabels=plot_table.columns, cellLoc='center',
|
771 |
+
colWidths=[2.3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], bbox=[0.04, 0, 0.92, 0.8])
|
772 |
+
|
773 |
+
min_font_size = 14
|
774 |
+
# Set table properties
|
775 |
+
table.auto_set_font_size(False)
|
776 |
+
#table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10)))
|
777 |
+
table.set_fontsize(min_font_size)
|
778 |
+
table.scale(1, 0.5)
|
779 |
+
|
780 |
+
min_font_size = 20
|
781 |
+
# Set font size for values
|
782 |
+
# Adjust the font size as needed
|
783 |
+
for i in range(len(plot_table)+1):
|
784 |
+
for j in range(len(plot_table.columns)):
|
785 |
+
if i > 0: # Skip the header row
|
786 |
+
cell = table.get_celld()[i, j]
|
787 |
+
cell.set_fontsize(min_font_size)
|
788 |
+
|
789 |
+
|
790 |
+
for i in range(len(plot_table)):
|
791 |
+
|
792 |
+
if table.get_celld()[(i+1, 0)].get_text().get_text() != 'All':
|
793 |
+
table.get_celld()[(i+1, 0)].set_facecolor(pitch_colours[table.get_celld()[(i+1, 0)].get_text().get_text()]) # Header cell color
|
794 |
+
if table.get_celld()[(i+1, 0)].get_text().get_text() in ['Split-Finger','Slider','Changeup']:
|
795 |
+
table.get_celld()[(i+1, 0)].set_text_props(color='#000000',fontweight='bold')
|
796 |
+
else:
|
797 |
+
table.get_celld()[(i+1, 0)].set_text_props(color='#ffffff',fontweight='bold')
|
798 |
+
if table.get_celld()[(i+1, 0)].get_text().get_text() == 'Four-Seam Fastball':
|
799 |
+
table.get_celld()[(i+1, 0)].get_text().set_text('4-Seam Fastball')
|
800 |
+
|
801 |
+
select_df = statcast_pitch_summary[statcast_pitch_summary['pitch_description'] == plot_table['pitch_description'].values[i]]
|
802 |
+
|
803 |
+
normalize = mcolors.Normalize(vmin=select_df['start_speed'].mean()-select_df.pitch_velocity_std.mean(),
|
804 |
+
vmax=select_df['start_speed'].mean()+select_df.pitch_velocity_std.mean()) # Define the range of values
|
805 |
+
|
806 |
+
if table.get_celld()[(i+1, 3)].get_text().get_text() != '—':
|
807 |
+
table.get_celld()[(i+1, 3)].set_facecolor(get_color(float(table.get_celld()[(i+1, 3)].get_text().get_text()),normalize,cmap_sum)) # Header cell color
|
808 |
+
|
809 |
+
|
810 |
+
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
|
811 |
+
normalize = mcolors.Normalize(vmin=select_df['extension'].mean()*0.9, vmax=select_df['extension'].mean()*1.1)
|
812 |
+
if table.get_celld()[(i+1,11)].get_text().get_text() != '—':
|
813 |
+
table.get_celld()[(i+1,11)].set_facecolor(get_color(float(table.get_celld()[(i+1, 11)].get_text().get_text()),normalize,cmap_sum)) # Header cell color
|
814 |
+
|
815 |
+
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
|
816 |
+
normalize = mcolors.Normalize(vmin=80, vmax=120)
|
817 |
+
print(normalize)
|
818 |
+
if table.get_celld()[(i+1,13)].get_text().get_text() != '—':
|
819 |
+
|
820 |
+
table.get_celld()[(i+1,13)].set_facecolor(get_color(float(table.get_celld()[(i+1, 13)].get_text().get_text()),normalize,cmap_sum)) # Header cell color
|
821 |
+
|
822 |
+
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
|
823 |
+
normalize = mcolors.Normalize(vmin=select_df['zone_percent'].mean()*0.7, vmax=select_df['zone_percent'].mean()*1.3)
|
824 |
+
if table.get_celld()[(i+1,14)].get_text().get_text() != '—':
|
825 |
+
table.get_celld()[(i+1,14)].set_facecolor(get_color(float(table.get_celld()[(i+1, 14)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color
|
826 |
+
|
827 |
+
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
|
828 |
+
normalize = mcolors.Normalize(vmin=select_df['chase_percent'].mean()*0.7, vmax=select_df['chase_percent'].mean()*1.3)
|
829 |
+
if table.get_celld()[(i+1,15)].get_text().get_text() != '—':
|
830 |
+
table.get_celld()[(i+1,15)].set_facecolor(get_color(float(table.get_celld()[(i+1, 15)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color
|
831 |
+
|
832 |
+
|
833 |
+
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
|
834 |
+
normalize = mcolors.Normalize(vmin=select_df['whiff_rate'].mean()*0.7, vmax=select_df['whiff_rate'].mean()*1.3)
|
835 |
+
if table.get_celld()[(i+1,16)].get_text().get_text() != '—':
|
836 |
+
table.get_celld()[(i+1,16)].set_facecolor(get_color(float(table.get_celld()[(i+1, 16)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color
|
837 |
+
|
838 |
+
table.get_celld()[(len(plot_table), 0)].set_text_props(color='#000000',fontweight='bold')
|
839 |
+
|
840 |
+
|
841 |
+
new_column_names = ['$\\bf{Pitch\ Name}$',
|
842 |
+
'$\\bf{Count}$',
|
843 |
+
'$\\bf{Pitch\%}$',
|
844 |
+
'$\\bf{Velocity}$',
|
845 |
+
'$\\bf{iVB}$',
|
846 |
+
'$\\bf{HB}$',
|
847 |
+
'$\\bf{Spin}$',
|
848 |
+
'$\\bf{VAA}$',
|
849 |
+
'$\\bf{HAA}$',
|
850 |
+
'$\\bf{vRel}$',
|
851 |
+
'$\\bf{hRel}$',
|
852 |
+
|
853 |
+
'$\\bf{Ext.}$',
|
854 |
+
'$\\bf{Axis}$',
|
855 |
+
'$\\bf{tjStuff+}$',
|
856 |
+
'$\\bf{Zone\%}$',
|
857 |
+
'$\\bf{Chase\%}$',
|
858 |
+
'$\\bf{Whiff\%}$',
|
859 |
+
]
|
860 |
+
|
861 |
+
for i, col_name in enumerate(new_column_names):
|
862 |
+
table.get_celld()[(0, i)].get_text().set_text(col_name)
|
863 |
+
|
864 |
+
float_list = ['start_speed','ivb',
|
865 |
+
'hb', 'vaa', 'haa', 'vertical_release','horizontal_release', 'extension']
|
866 |
+
for fl in float_list:
|
867 |
+
# Subset of column names
|
868 |
+
subset_columns = [fl]
|
869 |
+
|
870 |
+
# Get the list of column indices
|
871 |
+
column_indices = [plot_table.columns.get_loc(col) for col in subset_columns]
|
872 |
+
|
873 |
+
# # print(column_indices)
|
874 |
+
for row_l in range(1,len(plot_table)+1):
|
875 |
+
# print(row_l)
|
876 |
+
if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—':
|
877 |
+
# print()
|
878 |
+
# print(fl)
|
879 |
+
table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
|
880 |
+
|
881 |
+
|
882 |
+
|
883 |
+
percent_list = ['pitch_percent','zone_percent','chase_percent','whiff_rate']
|
884 |
+
for fl in percent_list:
|
885 |
+
# Subset of column names
|
886 |
+
subset_columns = [fl]
|
887 |
+
|
888 |
+
# Get the list of column indices
|
889 |
+
column_indices = [plot_table.columns.get_loc(col) for col in subset_columns]
|
890 |
+
|
891 |
+
# # print(column_indices)
|
892 |
+
for row_l in range(1,len(plot_table)+1):
|
893 |
+
# print(row_l)
|
894 |
+
if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—':
|
895 |
+
|
896 |
+
# print(fl)
|
897 |
+
table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1%}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
|
898 |
+
|
899 |
+
|
900 |
+
int_list = ['tj_stuff_plus','spin_rate']
|
901 |
+
for fl in int_list:
|
902 |
+
# Subset of column names
|
903 |
+
subset_columns = [fl]
|
904 |
+
|
905 |
+
# Get the list of column indices
|
906 |
+
column_indices = [plot_table.columns.get_loc(col) for col in subset_columns]
|
907 |
+
|
908 |
+
# # print(column_indices)
|
909 |
+
for row_l in range(1,len(plot_table)+1):
|
910 |
+
# print(row_l)
|
911 |
+
if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—':
|
912 |
+
# print(fl)
|
913 |
+
|
914 |
+
table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.0f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%'))))
|
915 |
+
|
916 |
+
return table
|
917 |
+
|
918 |
+
### GROUED IVB CREATION ###
|
919 |
+
def group_ivb_update(df,
|
920 |
+
agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description']):
|
921 |
+
|
922 |
+
grouped_ivb = df.groupby(agg_list).agg(
|
923 |
+
pitches = ('start_speed','count'),
|
924 |
+
|
925 |
+
start_speed = ('start_speed','mean'),
|
926 |
+
ivb = ('ivb','mean'),
|
927 |
+
hb = ('hb','mean'),
|
928 |
+
spin_rate = ('spin_rate','mean'),
|
929 |
+
vaa = ('vaa','mean'),
|
930 |
+
haa = ('haa','mean'),
|
931 |
+
horizontal_release = ('x0','mean'),
|
932 |
+
vertical_release = ('z0','mean'),
|
933 |
+
extension = ('extension','mean'),
|
934 |
+
spin_direction = ('spin_direction','mean'),
|
935 |
+
tj_stuff_plus = ('tj_stuff_plus','mean'),
|
936 |
+
swings = ('swings','sum'),
|
937 |
+
in_zone = ('in_zone','sum'),
|
938 |
+
out_zone = ('out_zone','sum'),
|
939 |
+
whiffs = ('whiffs','sum'),
|
940 |
+
zone_swing = ('zone_swing','sum'),
|
941 |
+
zone_contact = ('zone_contact','sum'),
|
942 |
+
ozone_swing = ('ozone_swing','sum'),
|
943 |
+
ozone_contact = ('ozone_contact','sum'),
|
944 |
+
).reset_index()
|
945 |
+
|
946 |
+
|
947 |
+
grouped_ivb['zone_contact_percent'] = [grouped_ivb.zone_contact[x]/grouped_ivb.zone_swing[x] if grouped_ivb.zone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))]
|
948 |
+
|
949 |
+
grouped_ivb['zone_swing_percent'] = [grouped_ivb.zone_swing[x]/grouped_ivb.in_zone[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))]
|
950 |
+
|
951 |
+
grouped_ivb['zone_percent'] = [grouped_ivb.in_zone[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))]
|
952 |
+
|
953 |
+
grouped_ivb['chase_percent'] = [grouped_ivb.ozone_swing[x]/(grouped_ivb.pitches[x] - grouped_ivb.in_zone[x]) if (grouped_ivb.pitches[x]- grouped_ivb.in_zone[x]) != 0 else np.nan for x in range(len(grouped_ivb))]
|
954 |
+
|
955 |
+
grouped_ivb['chase_contact'] = [grouped_ivb.ozone_contact[x]/grouped_ivb.ozone_swing[x] if grouped_ivb.ozone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))]
|
956 |
+
|
957 |
+
grouped_ivb['swing_percent'] = [grouped_ivb.swings[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))]
|
958 |
+
|
959 |
+
grouped_ivb['whiff_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.swings[x] if grouped_ivb.swings[x] != 0 else np.nan for x in range(len(grouped_ivb))]
|
960 |
+
|
961 |
+
grouped_ivb['swstr_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))]
|
962 |
+
|
963 |
+
return grouped_ivb
|
964 |
+
|
965 |
+
|
966 |
+
####LHH
|
967 |
+
def location_plot(df,ax,hand):
|
968 |
+
label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
|
969 |
+
j = 0
|
970 |
+
for label in label_labels:
|
971 |
+
|
972 |
+
subset = df[(df['pitch_description'] == label)&(df['batter_hand'] == hand)]
|
973 |
+
print(label)
|
974 |
+
if len(subset) >= 5:
|
975 |
+
confidence_ellipse(subset['px'], subset['pz'], ax=ax,edgecolor = pitch_colours[label],n_std=1.5,facecolor= pitch_colours[label],alpha=0.3)
|
976 |
+
j=j+1
|
977 |
+
else:
|
978 |
+
j=j+1
|
979 |
+
|
980 |
+
pitch_location_group = df[(df['batter_hand'] == hand)].groupby(['pitch_description']).agg(
|
981 |
+
pitches = ('start_speed','count'),
|
982 |
+
px = ('px','mean'),
|
983 |
+
pz = ('pz','mean')).reset_index()
|
984 |
+
|
985 |
+
pitch_location_group['pitch_percent'] = pitch_location_group['pitches']/pitch_location_group['pitches'].sum()
|
986 |
+
|
987 |
+
|
988 |
+
## Location Plot
|
989 |
+
sns.scatterplot(ax=ax,x=pitch_location_group['px'],
|
990 |
+
y=pitch_location_group['pz'],
|
991 |
+
hue=pitch_location_group['pitch_description'],
|
992 |
+
palette=pitch_colours,ec='black',
|
993 |
+
s=pitch_location_group['pitch_percent']*750,
|
994 |
+
linewidth=2,
|
995 |
+
zorder=2)
|
996 |
+
|
997 |
+
ax.axis('square')
|
998 |
+
draw_line(ax,alpha_spot=0.75,catcher_p=False)
|
999 |
+
ax.axis('off')
|
1000 |
+
ax.set_xlim((-2.75,2.75))
|
1001 |
+
ax.set_ylim((-0.5,5))
|
1002 |
+
if len(pitch_location_group['px'])>0:
|
1003 |
+
ax.get_legend().remove()
|
1004 |
+
ax.grid(False)
|
1005 |
+
ax.set_title(f"Pitch Locations vs {hand}HB\n{pitch_location_group['pitches'].sum()} Pitches",fontdict=font_properties_titles)
|
statcast_pitch_summary.csv
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pitch_description,pitches,start_speed,ivb,hb,spin_rate,vaa,haa,horizontal_release,vertical_release,extension,swings,in_zone,out_zone,whiffs,zone_swing,zone_contact,ozone_swing,ozone_contact,zone_contact_percent,zone_swing_percent,zone_percent,chase_percent,chase_contact,swing_percent,whiff_rate,swstr_rate,pitch_velocity_std
|
2 |
+
Changeup,78501,85.45244264,5.696969465,4.756409472,1788.814529,-7.397429532,0.066029801,-0.505147738,5.637483042,6.459312389,39508,30631,47870,12279,23324,18225,16184,9004,0.781383982,0.761450818,0.390198851,0.338082306,0.556351953,0.503280213,0.310797813,0.15641839,3.411101475
|
3 |
+
Curveball,51673,79.28082171,-9.353269599,-3.755365471,2529.177413,-9.685324545,1.128810646,-0.615065727,5.935777432,6.356118766,21550,22503,29170,6681,13243,11063,8307,3806,0.835384732,0.588499311,0.435488553,0.284778882,0.45816781,0.417045652,0.310023202,0.129293828,3.845559953
|
4 |
+
Cutter,55802,89.23017813,7.67180567,-1.473115659,2387.582408,-6.38378616,1.393012805,-0.885454511,5.73043569,6.364762822,27663,28608,27194,6695,19891,16452,7772,4516,0.827107737,0.695295022,0.512669797,0.285798338,0.581060216,0.49573492,0.242020027,0.119977779,3.290564827
|
5 |
+
Eephus,523,47.97782027,16.7292543,-0.099235182,1170.40153,-14.91983588,1.506952393,-1.3219283,7.275302765,4.554736692,227,184,339,7,135,131,92,89,0.97037037,0.733695652,0.351816444,0.271386431,0.967391304,0.434034417,0.030837004,0.013384321,6.869976111
|
6 |
+
Fastball,1140,67.09807018,15.87526316,5.559912281,1638.396309,-8.730726579,1.203451128,-1.423913573,6.371107598,4.816106427,573,543,597,41,405,379,168,153,0.935802469,0.745856354,0.476315789,0.281407035,0.910714286,0.502631579,0.071553229,0.035964912,9.112324944
|
7 |
+
Forkball,778,82.96773779,1.274164524,7.219151671,1079.151436,-8.98002479,1.264533775,-1.651045178,5.833003132,6.46649016,358,188,590,199,144,93,214,66,0.645833333,0.765957447,0.241645244,0.362711864,0.308411215,0.460154242,0.555865922,0.255784062,1.581383091
|
8 |
+
Four-Seam Fastball,230963,94.190831,15.70507094,3.1856687,2282.171475,-4.780780532,0.546832454,-0.72858478,5.693779333,6.512547603,111297,125822,105141,24710,85724,69571,25573,17016,0.811569689,0.681311694,0.54477124,0.243225763,0.665389278,0.481882379,0.222018563,0.106986833,2.47865324
|
9 |
+
Knuckle Ball,190,75.68631579,-1.973157895,3.434736842,336.9684211,-8.570861876,1.201227902,-1.463108102,5.626929407,6.28061513,81,93,97,20,63,48,18,13,0.761904762,0.677419355,0.489473684,0.18556701,0.722222222,0.426315789,0.24691358,0.105263158,4.960630412
|
10 |
+
Knuckle Curve,12153,81.43644368,-9.543701144,-5.97670534,2469.155167,-9.592522092,1.924129612,-1.025192611,5.916888283,6.36037149,5439,5095,7058,1800,3147,2683,2292,956,0.852557992,0.617664377,0.419238048,0.324737886,0.417102967,0.447543816,0.330943188,0.148111577,3.335305502
|
11 |
+
Screwball,74,80.18648649,-4.459459459,9.043243243,2094.554054,-8.648739106,-0.179827995,-1.153713198,6.063270539,6.211306526,34,33,41,5,24,23,10,6,0.958333333,0.727272727,0.445945946,0.243902439,0.6,0.459459459,0.147058824,0.067567568,1.450679985
|
12 |
+
Sinker,110889,93.31860599,7.843805066,6.938142647,2149.451219,-5.886868132,0.131331991,-0.761006834,5.525340998,6.420015363,50199,61433,49456,7185,37708,33570,12491,9444,0.890262013,0.613806912,0.554004455,0.252567939,0.756064366,0.452695939,0.143130341,0.064794524,2.892717921
|
13 |
+
Slider,126080,85.12149032,1.606287278,-3.060486199,2408.269843,-7.741990027,1.624081206,-0.917878953,5.66131706,6.416260991,60686,56660,69420,20416,38692,31069,21994,9201,0.802982529,0.682880339,0.449397208,0.316825122,0.418341366,0.481329315,0.336420262,0.161928934,3.327382065
|
14 |
+
Slow Curve,51,59.1745098,-9.101960784,-0.780392157,2058.285714,-14.41006285,1.056678233,-0.636672072,6.687193223,5.631413586,21,15,36,1,13,13,8,7,1,0.866666667,0.294117647,0.222222222,0.875,0.411764706,0.047619048,0.019607843,11.82586729
|
15 |
+
Slurve,2330,82.12420601,-3.000300429,-4.449828326,2523.909406,-8.360925975,1.378365433,-0.669493725,5.578432792,6.027864672,984,1068,1262,273,619,538,365,173,0.86914378,0.579588015,0.458369099,0.289223455,0.473972603,0.422317597,0.277439024,0.117167382,2.558741806
|
16 |
+
Splitter,15569,86.61465733,3.256830882,10.44272593,1355.748708,-7.739259105,0.553210998,-1.537501892,5.729877252,6.425062745,8129,5689,9880,2825,4542,3517,3587,1787,0.77432849,0.798382844,0.365405614,0.36305668,0.498187901,0.522127304,0.34752122,0.181450318,3.210384888
|
17 |
+
Sweeper,30959,81.87371039,1.235039891,-7.533357021,2573.086585,-7.702769263,2.197097666,-0.997430186,5.419746312,6.442381843,14382,13480,17479,4702,8883,7108,5499,2572,0.800180119,0.658976261,0.435414581,0.314606099,0.467721404,0.464549889,0.326936448,0.151878291,2.916945647
|
18 |
+
All,717675,88.99915324,7.084329815,1.49919922,2248.528397,-6.5395245,0.826992658,-0.783178307,5.67100296,6.439312301,341131,352045,365630,87839,236557,194483,104574,58809,0.822140118,0.671951029,0.490535409,0.286010448,0.562367319,0.475327969,0.257493456,0.122393841,6.109454214
|
team_logos.csv
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
id,city,name,franchise,abbreviation,imageAbbreviation,imageLink
|
2 |
+
108,Los Angeles Angels,Angels,Los Angeles Angels,LAA,LAA,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/LAA.png&h=400&w=400
|
3 |
+
109,Arizona Diamondbacks,D-backs,Arizona Diamondbacks,AZ,ARI,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ARI.png&h=400&w=400
|
4 |
+
110,Baltimore Orioles,Orioles,Baltimore Orioles,BAL,BAL,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/BAL.png&h=400&w=400
|
5 |
+
111,Boston Red Sox,Red Sox,Boston Red Sox,BOS,BOS,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/BOS.png&h=400&w=400
|
6 |
+
112,Chicago Cubs,Cubs,Chicago Cubs,CHC,CHC,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/CHC.png&h=400&w=400
|
7 |
+
113,Cincinnati Reds,Reds,Cincinnati Reds,CIN,CIN,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/CIN.png&h=400&w=400
|
8 |
+
114,Cleveland Guardians,Guardians,Cleveland Guardians,CLE,CLE,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/CLE.png&h=400&w=400
|
9 |
+
115,Colorado Rockies,Rockies,Colorado Rockies,COL,COL,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/COL.png&h=400&w=400
|
10 |
+
116,Detroit Tigers,Tigers,Detroit Tigers,DET,DET,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/DET.png&h=400&w=400
|
11 |
+
117,Houston Astros,Astros,Houston Astros,HOU,HOU,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/HOU.png&h=400&w=400
|
12 |
+
118,Kansas City Royals,Royals,Kansas City Royals,KC,KC,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/KC.png&h=400&w=400
|
13 |
+
119,Los Angeles Dodgers,Dodgers,Los Angeles Dodgers,LAD,LAD,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/LAD.png&h=400&w=400
|
14 |
+
120,Washington Nationals,Nationals,Washington Nationals,WSH,WSH,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/WSH.png&h=400&w=400
|
15 |
+
121,New York Mets,Mets,New York Mets,NYM,NYM,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/NYM.png&h=400&w=400
|
16 |
+
133,Oakland Athletics,Athletics,Oakland Athletics,OAK,OAK,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/OAK.png&h=400&w=400
|
17 |
+
134,Pittsburgh Pirates,Pirates,Pittsburgh Pirates,PIT,PIT,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/PIT.png&h=400&w=400
|
18 |
+
135,San Diego Padres,Padres,San Diego Padres,SD,SD,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/SD.png&h=400&w=400
|
19 |
+
136,Seattle Mariners,Mariners,Seattle Mariners,SEA,SEA,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/SEA.png&h=400&w=400
|
20 |
+
137,San Francisco Giants,Giants,San Francisco Giants,SF,SF,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/SF.png&h=400&w=400
|
21 |
+
138,St. Louis Cardinals,Cardinals,St. Louis Cardinals,STL,STL,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/STL.png&h=400&w=400
|
22 |
+
139,Tampa Bay Rays,Rays,Tampa Bay Rays,TB,TB,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/TB.png&h=400&w=400
|
23 |
+
140,Texas Rangers,Rangers,Texas Rangers,TEX,TEX,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/TEX.png&h=400&w=400
|
24 |
+
141,Toronto Blue Jays,Blue Jays,Toronto Blue Jays,TOR,TOR,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/TOR.png&h=400&w=400
|
25 |
+
142,Minnesota Twins,Twins,Minnesota Twins,MIN,MIN,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/MIN.png&h=400&w=400
|
26 |
+
143,Philadelphia Phillies,Phillies,Philadelphia Phillies,PHI,PHI,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/PHI.png&h=400&w=400
|
27 |
+
144,Atlanta Braves,Braves,Atlanta Braves,ATL,ATL,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ATL.png&h=400&w=400
|
28 |
+
145,Chicago White Sox,White Sox,Chicago White Sox,CWS,CHW,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/CHW.png&h=400&w=400
|
29 |
+
146,Miami Marlins,Marlins,Miami Marlins,MIA,MIA,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/MIA.png&h=400&w=400
|
30 |
+
147,New York Yankees,Yankees,New York Yankees,NYY,NYY,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/NYY.png&h=400&w=400
|
31 |
+
158,Milwaukee Brewers,Brewers,Milwaukee Brewers,MIL,MIL,https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/MIL.png&h=400&w=400
|
32 |
+
11,MLB,MLB,Free Agent,FA,MLB,https://a.espncdn.com/combiner/i?img=/i/teamlogos/leagues/500/mlb.png?w=400&h=400&transparent=true
|