Upload 5 files
Browse files- app.py +518 -611
- left.png +0 -0
- pitcher_update.py +562 -0
- right.png +0 -0
app.py
CHANGED
@@ -2,7 +2,7 @@ import pandas as pd
|
|
2 |
import numpy as np
|
3 |
import matplotlib.pyplot as plt
|
4 |
import seaborn as sns
|
5 |
-
import pitch_summary_functions as psf
|
6 |
import requests
|
7 |
import matplotlib
|
8 |
from api_scraper import MLB_Scrape
|
@@ -10,19 +10,20 @@ from shinywidgets import output_widget, render_widget
|
|
10 |
import shinyswatch
|
11 |
|
12 |
|
13 |
-
|
|
|
14 |
colour_palette = ['#FFB000','#648FFF','#785EF0',
|
15 |
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
|
16 |
|
17 |
import datasets
|
18 |
from datasets import load_dataset
|
19 |
### Import Datasets
|
20 |
-
dataset = load_dataset('nesticot/mlb_data', data_files=['
|
21 |
dataset_train = dataset['train']
|
22 |
df_2024 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True).drop_duplicates(subset=['play_id'],keep='last')
|
23 |
|
|
|
24 |
|
25 |
-
# df_2024.loc[(df_2024['pitcher_id']==804636)&(df_2024['pitch_type'].isin(['FF','FC']),'start_speed'] += 3
|
26 |
# ### Import Datasets
|
27 |
# import datasets
|
28 |
# from datasets import load_dataset
|
@@ -33,7 +34,6 @@ df_2024 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys()
|
|
33 |
### PITCH COLOURS ###
|
34 |
pitch_colours = {
|
35 |
'Four-Seam Fastball':'#FF007D',#BC136F
|
36 |
-
'Fastball':'#FF007D',
|
37 |
'Sinker':'#98165D',#DC267F
|
38 |
'Cutter':'#BE5FA0',
|
39 |
|
@@ -57,117 +57,48 @@ pitch_colours = {
|
|
57 |
'Other':'#9C8975',
|
58 |
}
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
season_end = '2024-09-29'
|
64 |
-
season_fg=2024
|
65 |
-
#chad_fg = requests.get(f'https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=0&season={season_fg}&season={season_fg}&month=1000&season1={season_fg}&ind=0&pageitems=2000000000&pagenum=1&ind=0&rost=0&players=&type=36&postseason=&sortdir=default&sortstat=sp_pitching').json()
|
66 |
-
|
67 |
-
|
68 |
-
# chadwick_df_small = pd.DataFrame(data={
|
69 |
-
# 'key_mlbam':[x['xMLBAMID'] for x in chad_fg['data']],
|
70 |
-
# 'key_fangraphs':[x['playerid'] for x in chad_fg['data']],
|
71 |
-
# 'Name':[x['PlayerName'] for x in chad_fg['data']],
|
72 |
-
# })
|
73 |
-
|
74 |
-
|
75 |
-
# mlb_fg_dicts = chadwick_df_small.set_index('key_mlbam')['key_fangraphs'].sort_values().to_dict()
|
76 |
-
|
77 |
-
|
78 |
-
statcast_pitch_summary = pd.read_csv('statcast_pitch_summary.csv')
|
79 |
-
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
|
80 |
-
|
81 |
-
|
82 |
-
df_2024_codes = psf.df_update_code(df_2024)
|
83 |
-
|
84 |
-
df_2024_update = psf.df_clean(df_2024_codes)
|
85 |
-
import joblib
|
86 |
-
model = joblib.load('joblib_model/tjstuff_model_20240318.joblib')
|
87 |
-
y_pred_mean = 0.0011434511
|
88 |
-
y_pred_std = 0.006554768
|
89 |
|
90 |
-
xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
|
91 |
|
92 |
-
features = ['start_speed','spin_rate','extension','ivb','hb','x0','z0','fb_max_velo_diff','fb_max_ivb_diff','fb_max_hb_diff']
|
93 |
|
94 |
-
|
|
|
|
|
|
|
|
|
95 |
|
|
|
|
|
96 |
|
97 |
-
|
98 |
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
-
|
102 |
-
|
103 |
-
df_2024_update.loc[df_2024_update[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'woba_pred'] = [sum(x) for x in xwoba_model.predict_proba(df_2024_update.loc[df_2024_update[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
|
104 |
-
|
105 |
-
pitcher_dicts = df_2024_update.set_index('pitcher_id')['pitcher_name'].sort_values().to_dict()
|
106 |
|
107 |
team_logos = pd.read_csv('team_logos.csv')
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
team_logo_dict = teams_df.set_index(['team_id'])['parent_org_id'].to_dict()
|
112 |
-
|
113 |
-
font_properties = {'family': 'calibi', 'size': 12}
|
114 |
-
font_properties_titles = {'family': 'calibi', 'size': 20}
|
115 |
-
font_properties_axes = {'family': 'calibi', 'size': 16}
|
116 |
-
df_plot = []
|
117 |
-
ax2_loc = []
|
118 |
-
gs = []
|
119 |
-
fig = []
|
120 |
-
|
121 |
-
function_dict={
|
122 |
-
'velocity_kde':'Velocity Distributions',
|
123 |
-
'break_plot':'Pitch Movement',
|
124 |
-
'rolling_tj_stuff':'Rolling tjStuff+',
|
125 |
-
'location_lhb':'Locations vs LHB',
|
126 |
-
'location_rhb':'Locations vs RHB',
|
127 |
-
}
|
128 |
-
|
129 |
-
split_dict = {'all':'All',
|
130 |
-
'left':'LHB',
|
131 |
-
'right':'RHB'}
|
132 |
-
|
133 |
-
split_dict_hand = {'all':['L','R'],
|
134 |
-
'left':['L'],
|
135 |
-
'right':['R']}
|
136 |
-
|
137 |
-
ball_dict = {'0':'0',
|
138 |
-
'1':'1',
|
139 |
-
'2':'2',
|
140 |
-
'3':'3'}
|
141 |
-
|
142 |
-
strike_dict = {'0':'0',
|
143 |
-
'1':'1',
|
144 |
-
'2':'2'}
|
145 |
-
|
146 |
-
# count_dict = {'0_0':'Through 0-0',
|
147 |
-
# '0_1':'Through 0-1',
|
148 |
-
# '0_2':'Through 0-2',
|
149 |
-
# '1_0':'Through 1-0',
|
150 |
-
# '1_1':'Through 1-1',
|
151 |
-
# '1_2':'Through 1-2',
|
152 |
-
# '2_1':'Through 2-1',
|
153 |
-
# '2_0':'Through 2-0',
|
154 |
-
# '3_0':'Through 3-0',
|
155 |
-
# '3_1':'Through 3-1',
|
156 |
-
# '2_2':'Through 2-2',
|
157 |
-
# '3_2':'Through 3-2'}
|
158 |
-
|
159 |
-
# count_dict_fg = {'0_0':'',
|
160 |
-
# '0_1':'61',
|
161 |
-
# '0_2':'62',
|
162 |
-
# '1_0':'63',
|
163 |
-
# '1_1':'64',
|
164 |
-
# '1_2':'65',
|
165 |
-
# '2_1':'66',
|
166 |
-
# '2_0':'67',
|
167 |
-
# '3_0':'68',
|
168 |
-
# '3_1':'69',
|
169 |
-
# '2_2':'70',
|
170 |
-
# '3_2':'71'}
|
171 |
|
172 |
from urllib.request import Request, urlopen
|
173 |
from shiny import App, reactive, ui, render
|
@@ -189,74 +120,39 @@ app_ui = ui.page_fluid(
|
|
189 |
shinyswatch.theme.simplex(),
|
190 |
ui.tags.h4("TJStats"),
|
191 |
ui.tags.i("Baseball Analytics and Visualizations"),
|
|
|
192 |
ui.row(
|
193 |
|
194 |
|
195 |
ui.layout_sidebar(
|
196 |
|
197 |
ui.panel_sidebar(
|
198 |
-
ui.row(
|
199 |
-
ui.column(6,
|
200 |
-
ui.input_select('player_id','Select Player',pitcher_dicts,selectize=True,multiple=False)),
|
201 |
-
ui.column(6, ui.output_ui('test','Select Game'))),
|
202 |
|
203 |
-
ui.row(
|
204 |
-
ui.column(4,
|
205 |
-
ui.input_select('plot_id_1','Plot Left',function_dict,multiple=False,selected='velocity_kde')),
|
206 |
-
ui.column(4,
|
207 |
-
ui.input_select('plot_id_2','Plot Middle',function_dict,multiple=False,selected='rolling_tj_stuff')),
|
208 |
-
ui.column(4,
|
209 |
-
ui.input_select('plot_id_3','Plot Right',function_dict,multiple=False,selected='break_plot'))),
|
210 |
-
|
211 |
-
# ui.input_select('count_id','Count',count_dict,multiple=True,selectize=True,selected='0_0'),
|
212 |
-
|
213 |
-
ui.row(
|
214 |
-
ui.column(6,
|
215 |
-
ui.input_select('ball_id','Balls',ball_dict,multiple=False,selected='0'),
|
216 |
-
ui.input_radio_buttons(
|
217 |
-
"count_id_balls",
|
218 |
-
"Count Filter Balls",
|
219 |
-
{
|
220 |
-
"exact": "Exact Balls",
|
221 |
-
"greater": ">= Balls",
|
222 |
-
"lesser": "<= Balls",
|
223 |
-
},selected='greater')),
|
224 |
-
ui.column(6,
|
225 |
-
ui.input_select('strike_id','Strikes',strike_dict,multiple=False,selected='0'),
|
226 |
-
ui.input_radio_buttons(
|
227 |
-
"count_id_strikes",
|
228 |
-
"Count Filter Strikes",
|
229 |
-
{
|
230 |
-
"exact": "Exact Strikes",
|
231 |
-
"greater": ">= Strikes",
|
232 |
-
"lesser": "<= Strikes",
|
233 |
-
},selected='greater'))),
|
234 |
-
ui.row(
|
235 |
-
ui.column(6,
|
236 |
-
ui.input_select('split_id','Select Split',split_dict,multiple=False)),
|
237 |
-
ui.column(6,
|
238 |
-
ui.input_numeric('rolling_window','Rolling Window (for tjStuff+ Plot)',min=1,value=10))),
|
239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
|
241 |
|
242 |
-
ui.input_action_button("go", "Generate",class_="btn-primary"),
|
243 |
|
244 |
|
245 |
-
|
246 |
-
|
|
|
247 |
ui.panel_main(
|
248 |
ui.navset_tab(
|
249 |
# ui.nav("Raw Data",
|
250 |
# ui.output_data_frame("raw_table")),
|
251 |
ui.nav("Season Summary",
|
252 |
ui.output_plot('plot',
|
253 |
-
width='
|
254 |
-
height='
|
255 |
-
ui.nav("Game Summary",
|
256 |
-
ui.output_plot('plot_game',
|
257 |
-
width='2000px',
|
258 |
-
height='2000px'))
|
259 |
-
,id="my_tabs"))))))
|
260 |
|
261 |
|
262 |
|
@@ -271,7 +167,7 @@ app_ui = ui.page_fluid(
|
|
271 |
def server(input, output, session):
|
272 |
|
273 |
@render.ui
|
274 |
-
def
|
275 |
|
276 |
# @reactive.Effect
|
277 |
if input.my_tabs() == 'Season Summary':
|
@@ -279,19 +175,14 @@ def server(input, output, session):
|
|
279 |
return ui.input_date_range("date_range_id", "Date range input",start = df_2024.game_date.min(),
|
280 |
end = df_2024.game_date.max(),width=2,min=df_2024.game_date.min(),
|
281 |
max=df_2024.game_date.max()),
|
282 |
-
# @reactive.Effect
|
283 |
-
if input.my_tabs() == 'Game Summary':
|
284 |
-
pitcher_id_select = int(input.player_id())
|
285 |
-
df_plot = df_2024_update[(df_2024_update['pitcher_id']==pitcher_id_select)]
|
286 |
-
|
287 |
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
@output
|
296 |
@render.plot
|
297 |
@reactive.event(input.go, ignore_none=False)
|
@@ -307,559 +198,576 @@ def server(input, output, session):
|
|
307 |
ax.text(x=0.5,y=0.5,s='Please Select\nA Player',fontsize=150,ha='center')
|
308 |
ax.grid('off')
|
309 |
return
|
310 |
-
|
311 |
-
pitcher_id_select = int(input.player_id())
|
312 |
|
313 |
|
314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
df_plot = df_plot[(pd.to_datetime(df_plot['game_date']).dt.date>=input.date_range_id()[0])&
|
316 |
(pd.to_datetime(df_plot['game_date']).dt.date<=input.date_range_id()[1])]
|
317 |
|
318 |
-
df_plot = df_plot[df_plot['batter_hand'].isin(split_dict_hand[input.split_id()])]
|
319 |
-
|
320 |
-
if input.count_id_balls()=='greater' and input.count_id_strikes()=='greater' and int(input.ball_id())==0 and int(input.strike_id())==0:
|
321 |
-
ball_title = ''
|
322 |
-
strike_title = ''
|
323 |
-
else:
|
324 |
-
if input.count_id_balls()=='exact':
|
325 |
-
df_plot = df_plot[df_plot['balls']==int(input.ball_id())]
|
326 |
-
ball_title = str(f'{(input.ball_id())} Ball Count; ')
|
327 |
-
elif input.count_id_balls()=='greater':
|
328 |
-
df_plot = df_plot[df_plot['balls']>=int(input.ball_id())]
|
329 |
-
ball_title = str(f'At Least {(input.ball_id())} Ball Count; ')
|
330 |
-
elif input.count_id_balls()=='lesser':
|
331 |
-
df_plot = df_plot[df_plot['balls']<=int(input.ball_id())]
|
332 |
-
ball_title = str(f'At Most {(input.ball_id())} Ball Count; ')
|
333 |
-
|
334 |
-
if input.count_id_strikes()=='exact':
|
335 |
-
df_plot = df_plot[df_plot['strikes']==int(input.strike_id())]
|
336 |
-
strike_title = str(f'{(input.strike_id())} Strike Count; ')
|
337 |
-
elif input.count_id_strikes()=='greater':
|
338 |
-
df_plot = df_plot[df_plot['strikes']>=int(input.strike_id())]
|
339 |
-
strike_title = str(f'At Least {(input.strike_id())} Strike Count; ')
|
340 |
-
elif input.count_id_strikes()=='lesser':
|
341 |
-
df_plot = df_plot[df_plot['strikes']<=int(input.strike_id())]
|
342 |
-
strike_title = str(f'At Most {(input.strike_id())} Strike Count; ')
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
if input.split_id() == 'all':
|
347 |
-
split_title = ''
|
348 |
-
|
349 |
-
elif input.split_id() == 'left':
|
350 |
-
split_title = 'vs. LHH'
|
351 |
-
|
352 |
-
elif input.split_id() == 'right':
|
353 |
-
split_title = 'vs. RHH'
|
354 |
-
|
355 |
-
|
356 |
-
if len(df_plot)<1:
|
357 |
-
fig, ax = plt.subplots(1, 1, figsize=(9, 9))
|
358 |
-
ax.text(x=0.5,y=0.5,s='Please Select\nOther Parameters',fontsize=150,ha='center')
|
359 |
-
ax.grid('off')
|
360 |
-
return
|
361 |
|
362 |
-
|
363 |
-
df_plot
|
364 |
-
|
365 |
-
|
366 |
-
|
|
|
367 |
|
368 |
-
|
369 |
-
|
370 |
|
|
|
|
|
371 |
|
|
|
|
|
|
|
|
|
372 |
|
373 |
-
|
374 |
-
|
375 |
-
df_plot['prop'] = df_plot.groupby("pitch_type")["is_pitch"].transform("sum")
|
376 |
-
label_labels = df_plot.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
|
377 |
|
378 |
-
#
|
379 |
-
|
380 |
-
plt.rcParams.update({'figure.autolayout': True})
|
381 |
-
fig.set_facecolor('white')
|
382 |
-
sns.set_theme(style="whitegrid", palette=colour_palette)
|
383 |
-
print('this is the one plot')
|
384 |
-
# gs = GridSpec(7, 2, width_ratios=[1,1], height_ratios=[1.5,1,1,1,1,1,2.5])
|
385 |
-
gs = GridSpec(5, 5, height_ratios=[150,75,225,325,50],width_ratios=[1,100,100,100,1])
|
386 |
-
#### NO FG
|
387 |
-
####gs = GridSpec(5, 5, height_ratios=[225,0,225,325,50],width_ratios=[1,100,100,100,1])
|
388 |
-
#gs = GridSpec(4, 1, width_ratios=[1], height_ratios=[1,0.75,7-len(label_labels)/4,1+len(label_labels)/4])
|
389 |
|
390 |
-
|
|
|
|
|
|
|
391 |
|
392 |
-
#
|
393 |
-
|
394 |
-
ax1_table = fig.add_subplot(gs[1, :])
|
395 |
-
ax2_left = fig.add_subplot(gs[2, 1])
|
396 |
-
ax2_middle = fig.add_subplot(gs[2, 2])
|
397 |
-
ax2_right = fig.add_subplot(gs[2, 3])
|
398 |
-
ax3 = fig.add_subplot(gs[-2, :])
|
399 |
-
#axfooter = fig.add_subplot(gs[-1, :])
|
400 |
|
401 |
-
|
|
|
|
|
|
|
402 |
|
403 |
-
|
404 |
-
|
|
|
|
|
|
|
|
|
|
|
405 |
|
406 |
-
|
407 |
-
|
408 |
-
font_properties_axes = {'family': 'calibi', 'size': 16}
|
409 |
|
410 |
-
#
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
|
416 |
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
|
421 |
-
|
422 |
-
end_date = str(pd.to_datetime(input.date_range_id()[1]).strftime('%m/%d/%Y'))
|
423 |
|
|
|
|
|
|
|
424 |
|
425 |
-
|
426 |
|
427 |
-
pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat']]
|
428 |
-
pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat']]
|
429 |
-
pitcher_stats_call_df = pd.DataFrame(data=dict(zip(pitcher_stats_call_header,pitcher_stats_call_values)),index=[0])
|
430 |
-
pitcher_stats_call_df['k_percent'] = pitcher_stats_call_df['strikeOuts']/pitcher_stats_call_df['battersFaced']
|
431 |
-
pitcher_stats_call_df['bb_percent'] = pitcher_stats_call_df['baseOnBalls']/pitcher_stats_call_df['battersFaced']
|
432 |
-
pitcher_stats_call_df['k_bb_percent'] = pitcher_stats_call_df['k_percent']-pitcher_stats_call_df['bb_percent']
|
433 |
-
pitcher_stats_call_df_small = pitcher_stats_call_df[['inningsPitched','battersFaced','era','whip','k_percent','bb_percent','k_bb_percent']]
|
434 |
|
435 |
-
pitcher_stats_call_df_small['k_percent'] = pitcher_stats_call_df_small['k_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
|
436 |
-
pitcher_stats_call_df_small['bb_percent'] = pitcher_stats_call_df_small['bb_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
|
437 |
-
pitcher_stats_call_df_small['k_bb_percent'] = pitcher_stats_call_df_small['k_bb_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
|
438 |
|
439 |
-
table_fg = ax1_table.table(cellText=pitcher_stats_call_df_small.values, colLabels=pitcher_stats_call_df_small.columns, cellLoc='center',
|
440 |
-
bbox=[0.04, 0.2, 0.92, 0.8])
|
441 |
|
442 |
-
|
443 |
-
table_fg.set_fontsize(min_font_size)
|
444 |
|
445 |
|
446 |
-
|
447 |
-
# #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%']
|
448 |
-
for i, col_name in enumerate(new_column_names):
|
449 |
-
table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
|
450 |
|
451 |
-
|
|
|
|
|
|
|
452 |
|
453 |
|
|
|
454 |
|
455 |
-
for x
|
456 |
-
if x == 'velocity_kde':
|
457 |
-
psf.velocity_kdes(df=df_plot,ax=y,gs=gs,gs_list=z,fig=fig)
|
458 |
-
if x == 'rolling_tj_stuff':
|
459 |
-
psf.tj_stuff_roling(df = df_plot,window = int(input.rolling_window()),ax=y)
|
460 |
-
if x == 'break_plot':
|
461 |
-
psf.break_plot(df=df_plot,ax=y)
|
462 |
-
if x == 'location_lhb':
|
463 |
-
psf.location_plot(df=df_plot,ax=y,hand='L')
|
464 |
-
if x == 'location_rhb':
|
465 |
-
psf.location_plot(df=df_plot,ax=y,hand='R')
|
466 |
|
467 |
-
|
468 |
-
colour_pitches = [pitch_colours[x] for x in pitches_list]
|
469 |
-
|
470 |
-
# handles, labels = ax2_right.get_legend_handles_labels()
|
471 |
|
472 |
-
|
473 |
-
handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
|
474 |
-
labels = pitches_list
|
475 |
-
|
476 |
-
|
477 |
|
|
|
478 |
|
479 |
-
|
480 |
-
psf.table_summary(df=df_plot.copy(),
|
481 |
-
pitcher_id=pitcher_id_select,
|
482 |
-
ax=ax3,
|
483 |
-
df_group=grouped_ivb.copy(),
|
484 |
-
df_group_all=grouped_ivb_all.copy(),
|
485 |
-
statcast_pitch_summary=statcast_pitch_summary.copy())
|
486 |
|
|
|
487 |
|
488 |
-
|
489 |
-
# #fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
|
490 |
-
# axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
|
491 |
-
# axfooter.text(x=1-0.05,y=1,s='Data: MLB, Fangraphs',ha='right',fontname='Calibri',fontsize=24,va='top')
|
492 |
|
|
|
493 |
|
494 |
-
|
495 |
-
# ha='center',va='center',fontname='Calibri',fontsize=16)
|
496 |
-
# axfooter.axis('off')
|
497 |
-
# #fig.tight_layout()
|
498 |
|
499 |
-
|
500 |
|
501 |
-
|
502 |
-
sorted_value_counts = df_plot['pitch_description'].value_counts().sort_values(ascending=False)
|
503 |
|
504 |
-
|
505 |
-
items_in_order = sorted_value_counts.index.tolist()
|
506 |
-
# Create a dictionary to map names to colors
|
507 |
-
name_to_color = dict(zip(labels, handles))
|
508 |
|
509 |
-
# Order the colors based on the correct order of names
|
510 |
-
ordered_colors = [name_to_color[name] for name in items_in_order]
|
511 |
|
512 |
|
513 |
-
|
514 |
-
fancybox=True,loc='lower center',fontsize=20,framealpha=1.0, markerscale=2,prop={'family': 'calibi', 'size': 20})
|
515 |
|
|
|
516 |
|
517 |
-
|
518 |
-
title_spot = f'{df_plot.pitcher_name.values[0]}'
|
519 |
|
|
|
520 |
|
521 |
-
ax0.text(x=0.5,y=0.8,s=title_spot,fontname='Calibri',ha='center',fontsize=56,va='top')
|
522 |
-
ax0.text(x=0.5,y=0.35,s='A Season Pitching Summary',fontname='Calibri',ha='center',fontsize=40,va='top',fontstyle='italic')
|
523 |
|
524 |
-
|
525 |
-
#ax0.text(x=0.5,y=0.05,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
|
526 |
-
ax0.axis('off')
|
527 |
-
ax0.text(x=0.5,y=0.5,s=f"{ player_bio['people'][0]['pitchHand']['code']}HP, Age: {player_bio['people'][0]['currentAge']}, {player_bio['people'][0]['height']}/{player_bio['people'][0]['weight']}",fontname='Calibri',ha='center',fontsize=24,va='top')
|
528 |
|
529 |
-
|
530 |
-
# ax0.text(x=0.5,y=0.25,s=f'{season_fg} MLB Season',fontname='Calibri',ha='center',fontsize=30,va='top')
|
531 |
-
# ax0.axis('off')
|
532 |
|
|
|
533 |
|
534 |
-
|
535 |
-
|
536 |
-
ax0.text(x=0.5,y=0.0,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
|
537 |
-
ax0.axis('off')
|
538 |
|
539 |
-
|
540 |
-
import urllib
|
541 |
-
import urllib.request
|
542 |
-
import urllib.error
|
543 |
-
from urllib.error import HTTPError
|
544 |
|
545 |
-
|
546 |
-
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_180/v1/people/{pitcher_id_select}/headshot/milb/current.png'
|
547 |
-
test_mage = plt.imread(url)
|
548 |
-
except urllib.error.HTTPError as err:
|
549 |
-
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/1/headshot/67/current.png'
|
550 |
-
test_mage = plt.imread(url)
|
551 |
-
imagebox = OffsetImage(test_mage, zoom = 0.5)
|
552 |
-
ab = AnnotationBbox(imagebox, (0.125, 0.4), frameon = False)
|
553 |
-
ax0.add_artist(ab)
|
554 |
|
555 |
-
|
556 |
|
|
|
|
|
|
|
557 |
|
|
|
558 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
559 |
|
560 |
-
|
561 |
-
|
562 |
-
|
|
|
|
|
|
|
563 |
|
564 |
-
|
565 |
-
|
566 |
-
# im = Image.open(BytesIO(response.content))
|
567 |
-
# im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0])
|
568 |
-
# ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
|
569 |
-
imagebox = OffsetImage(im, zoom = 0.4)
|
570 |
-
ab = AnnotationBbox(imagebox, (0.875, 0.40), frameon = False)
|
571 |
-
ax0.add_artist(ab)
|
572 |
-
except IndexError:
|
573 |
-
print()
|
574 |
-
|
575 |
|
576 |
-
############ FOOTER ################
|
577 |
-
#fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
|
578 |
-
axfooter = fig.add_subplot(gs[-1, :])
|
579 |
-
axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
|
580 |
-
axfooter.text(x=1-0.05,y=1,s='Data: MLB',ha='right',fontname='Calibri',fontsize=24,va='top')
|
581 |
|
582 |
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
#fig.tight_layout()
|
587 |
|
588 |
-
fig.subplots_adjust(left=0.03, right=0.97, top=0.97, bottom=0.03)
|
589 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
590 |
|
591 |
-
@output
|
592 |
-
@render.plot
|
593 |
-
@reactive.event(input.go, ignore_none=False)
|
594 |
-
def plot_game():
|
595 |
-
#fig, ax = plt.subplots(3, 2, figsize=(9, 9))
|
596 |
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
if len((input.player_id()))<1:
|
602 |
-
fig, ax = plt.subplots(1, 1, figsize=(9, 9))
|
603 |
-
ax.text(x=0.5,y=0.5,s='Please Select\nA Player',fontsize=150,ha='center')
|
604 |
-
ax.grid('off')
|
605 |
-
return
|
606 |
-
|
607 |
-
pitcher_id_select = int(input.player_id())
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
df_plot = df_2024_update[(df_2024_update['pitcher_id']==pitcher_id_select)&(df_2024_update['game_id']==int(input.game_id()))]
|
613 |
-
df_plot = df_plot[df_plot['batter_hand'].isin(split_dict_hand[input.split_id()])]
|
614 |
-
|
615 |
-
if input.count_id_balls()=='greater' and input.count_id_strikes()=='greater' and int(input.ball_id())==0 and int(input.strike_id())==0:
|
616 |
-
ball_title = ''
|
617 |
-
strike_title = ''
|
618 |
-
else:
|
619 |
-
if input.count_id_balls()=='exact':
|
620 |
-
df_plot = df_plot[df_plot['balls']==int(input.ball_id())]
|
621 |
-
ball_title = str(f'{(input.ball_id())} Ball Count; ')
|
622 |
-
elif input.count_id_balls()=='greater':
|
623 |
-
df_plot = df_plot[df_plot['balls']>=int(input.ball_id())]
|
624 |
-
ball_title = str(f'At Least {(input.ball_id())} Ball Count; ')
|
625 |
-
elif input.count_id_balls()=='lesser':
|
626 |
-
df_plot = df_plot[df_plot['balls']<=int(input.ball_id())]
|
627 |
-
ball_title = str(f'At Most {(input.ball_id())} Ball Count; ')
|
628 |
-
|
629 |
-
if input.count_id_strikes()=='exact':
|
630 |
-
df_plot = df_plot[df_plot['strikes']==int(input.strike_id())]
|
631 |
-
strike_title = str(f'{(input.strike_id())} Strike Count; ')
|
632 |
-
elif input.count_id_strikes()=='greater':
|
633 |
-
df_plot = df_plot[df_plot['strikes']>=int(input.strike_id())]
|
634 |
-
strike_title = str(f'At Least {(input.strike_id())} Strike Count; ')
|
635 |
-
elif input.count_id_strikes()=='lesser':
|
636 |
-
df_plot = df_plot[df_plot['strikes']<=int(input.strike_id())]
|
637 |
-
strike_title = str(f'At Most {(input.strike_id())} Strike Count; ')
|
638 |
-
|
639 |
|
640 |
-
|
641 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
642 |
|
643 |
-
|
644 |
-
|
645 |
|
646 |
-
elif input.split_id() == 'left':
|
647 |
-
split_title = 'vs. LHH'
|
648 |
-
|
649 |
-
elif input.split_id() == 'right':
|
650 |
-
split_title = 'vs. RHH'
|
651 |
|
652 |
-
|
653 |
-
|
654 |
-
|
655 |
-
ax.grid('off')
|
656 |
-
return
|
657 |
|
658 |
|
659 |
-
df_plot['pitch_type_count'] = df_plot.groupby(['pitcher_id'])['pitch_type'].cumcount()+1
|
660 |
-
df_plot['pitch_type_count_each'] = df_plot.groupby(['pitch_type'])['pitch_type'].cumcount()+1
|
661 |
-
#df_plot = df_plot.merge(df_2024_update[['tj_stuff_plus','play_id']],left_on=['play_id'],right_on=['play_id'],how='left')
|
662 |
-
df_plot = df_plot.sort_values(by=['pitch_description'])
|
663 |
-
df_plot = df_plot.sort_values(by=['start_time'])
|
664 |
|
665 |
-
|
666 |
-
|
667 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
668 |
|
669 |
-
#
|
|
|
670 |
|
671 |
-
|
672 |
-
|
|
|
|
|
|
|
673 |
|
|
|
|
|
|
|
|
|
674 |
|
|
|
675 |
|
676 |
-
from matplotlib.gridspec import GridSpec
|
677 |
-
plt.rcParams['font.family'] = 'Calibri'
|
678 |
-
df_plot['prop'] = df_plot.groupby("pitch_type")["is_pitch"].transform("sum")
|
679 |
-
label_labels = df_plot.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique()
|
680 |
|
681 |
-
#plt.rcParams["figure.figsize"] = [10,10]
|
682 |
-
fig = plt.figure(figsize=(20, 20))
|
683 |
-
plt.rcParams.update({'figure.autolayout': True})
|
684 |
-
fig.set_facecolor('white')
|
685 |
-
sns.set_theme(style="whitegrid", palette=colour_palette)
|
686 |
-
print('this is the one plot')
|
687 |
-
# gs = GridSpec(7, 2, width_ratios=[1,1], height_ratios=[1.5,1,1,1,1,1,2.5])
|
688 |
-
gs = GridSpec(5, 5, height_ratios=[150,75,225,325,50],width_ratios=[1,100,100,100,1])
|
689 |
-
#### NO FG
|
690 |
-
####gs = GridSpec(5, 5, height_ratios=[225,0,225,325,50],width_ratios=[1,100,100,100,1])
|
691 |
-
#gs = GridSpec(4, 1, width_ratios=[1], height_ratios=[1,0.75,7-len(label_labels)/4,1+len(label_labels)/4])
|
692 |
|
693 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
694 |
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
|
699 |
-
|
700 |
-
|
701 |
-
|
702 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
703 |
|
704 |
-
|
|
|
705 |
|
706 |
-
|
707 |
-
|
708 |
|
709 |
-
|
710 |
-
|
711 |
-
font_properties_axes = {'family': 'calibi', 'size': 16}
|
712 |
|
713 |
-
|
714 |
-
|
715 |
-
# data_pull = psf.fangraphs_scrape(pitcher_id=pitcher_id_select,
|
716 |
-
# split=input.split_id(),
|
717 |
-
# start_date=df_plot['game_date'].values[0],
|
718 |
-
# end_date=df_plot['game_date'].values[0])
|
719 |
|
720 |
-
|
721 |
-
|
722 |
|
723 |
|
724 |
-
|
725 |
-
|
726 |
-
pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat']]
|
727 |
-
pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][0]['stat']]
|
728 |
-
pitcher_stats_call_df = pd.DataFrame(data=dict(zip(pitcher_stats_call_header,pitcher_stats_call_values)),index=[0])
|
729 |
-
# pitcher_stats_call_df['k_percent'] = pitcher_stats_call_df['strikeOuts']/pitcher_stats_call_df['battersFaced']
|
730 |
-
# pitcher_stats_call_df['bb_percent'] = pitcher_stats_call_df['baseOnBalls']/pitcher_stats_call_df['battersFaced']
|
731 |
-
# pitcher_stats_call_df['k_bb_percent'] = pitcher_stats_call_df['k_percent']-pitcher_stats_call_df['bb_percent']
|
732 |
-
pitcher_stats_call_df_small = pitcher_stats_call_df[['inningsPitched','battersFaced','earnedRuns','hits','strikeOuts','baseOnBalls','hitByPitch','homeRuns']]
|
733 |
-
pitcher_stats_call_df_small['whiffs'] = int(df_plot['is_whiff'].sum())
|
734 |
-
# pitcher_stats_call_df_small['k_percent'] = pitcher_stats_call_df_small['k_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
|
735 |
-
# pitcher_stats_call_df_small['bb_percent'] = pitcher_stats_call_df_small['bb_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
|
736 |
-
# pitcher_stats_call_df_small['k_bb_percent'] = pitcher_stats_call_df_small['k_bb_percent'].astype(float).apply(lambda x: '{:.1%}'.format(x))
|
737 |
-
|
738 |
-
table_fg = ax1_table.table(cellText=pitcher_stats_call_df_small.values, colLabels=pitcher_stats_call_df_small.columns, cellLoc='center',
|
739 |
-
bbox=[0.04, 0.2, 0.92, 0.8])
|
740 |
-
|
741 |
-
min_font_size = 20
|
742 |
-
table_fg.set_fontsize(min_font_size)
|
743 |
-
|
744 |
-
|
745 |
-
new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{ER}$','$\\bf{H}$','$\\bf{K}$','$\\bf{BB}$','$\\bf{HBP}$','$\\bf{HR}$','$\\bf{Whiffs}$']
|
746 |
-
# #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%']
|
747 |
-
for i, col_name in enumerate(new_column_names):
|
748 |
-
table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
|
749 |
-
|
750 |
-
ax1_table.axis('off')
|
751 |
-
|
752 |
-
|
753 |
-
# psf.fangraphs_table(data=data_pull,
|
754 |
-
# stats=['IP','WHIP','ERA','FIP','TBF','K%','BB%','K-BB%'],
|
755 |
-
# ax=ax1_table)
|
756 |
-
|
757 |
-
|
758 |
-
# psf.velocity_kdes(df=df_plot,
|
759 |
-
# ax=ax2_loc,
|
760 |
-
# gs=gs,
|
761 |
-
# fig=fig)
|
762 |
-
|
763 |
-
# # psf.tj_stuff_roling(df = df_plot,
|
764 |
-
# # window = 5,
|
765 |
-
# # ax=ax2_velo)
|
766 |
-
# psf.location_plot(df=df_plot,ax=ax2_velo,hand='L')
|
767 |
-
|
768 |
-
# psf.location_plot(df=df_plot,ax=ax2_loc,hand='R')
|
769 |
-
# # # ## Break Plot
|
770 |
-
# psf.break_plot(df=df_plot,ax=ax2)
|
771 |
-
for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax2_left,ax2_middle,ax2_right],[1,2,3]):
|
772 |
-
if x == 'velocity_kde':
|
773 |
-
psf.velocity_kdes(df=df_plot,ax=y,gs=gs,gs_list=z,fig=fig)
|
774 |
-
if x == 'rolling_tj_stuff':
|
775 |
-
psf.tj_stuff_roling(df = df_plot,window = int(input.rolling_window()),ax=y)
|
776 |
-
if x == 'break_plot':
|
777 |
-
psf.break_plot(df=df_plot,ax=y)
|
778 |
-
if x == 'location_lhb':
|
779 |
-
psf.location_plot(df=df_plot,ax=y,hand='L')
|
780 |
-
if x == 'location_rhb':
|
781 |
-
psf.location_plot(df=df_plot,ax=y,hand='R')
|
782 |
-
|
783 |
-
pitches_list = df_plot['pitch_description'].unique()
|
784 |
-
colour_pitches = [pitch_colours[x] for x in pitches_list]
|
785 |
-
|
786 |
-
# handles, labels = ax2_right.get_legend_handles_labels()
|
787 |
|
788 |
-
|
789 |
-
|
790 |
-
labels = pitches_list
|
791 |
|
|
|
|
|
|
|
|
|
|
|
792 |
|
793 |
-
### FANGRAPHS TABLE ###
|
794 |
-
psf.table_summary(df=df_plot.copy(),
|
795 |
-
pitcher_id=pitcher_id_select,
|
796 |
-
ax=ax3,
|
797 |
-
df_group=grouped_ivb.copy(),
|
798 |
-
df_group_all=grouped_ivb_all.copy(),
|
799 |
-
statcast_pitch_summary=statcast_pitch_summary.copy())
|
800 |
|
|
|
|
|
|
|
|
|
|
|
801 |
|
|
|
802 |
|
803 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
804 |
|
805 |
-
# Get value counts of the column and sort in descending order
|
806 |
-
sorted_value_counts = df_plot['pitch_description'].value_counts().sort_values(ascending=False)
|
807 |
|
808 |
-
# Get the list of items ordered from most to least frequent
|
809 |
-
items_in_order = sorted_value_counts.index.tolist()
|
810 |
-
# Create a dictionary to map names to colors
|
811 |
-
name_to_color = dict(zip(labels, handles))
|
812 |
|
813 |
-
# Order the colors based on the correct order of names
|
814 |
-
ordered_colors = [name_to_color[name] for name in items_in_order]
|
815 |
|
|
|
|
|
816 |
|
817 |
-
|
818 |
-
|
|
|
819 |
|
820 |
|
821 |
-
|
822 |
-
|
|
|
|
|
|
|
823 |
|
824 |
|
825 |
-
ax0.text(x=0.5,y=0.8,s=title_spot,fontname='Calibri',ha='center',fontsize=56,va='top')
|
826 |
-
ax0.text(x=0.5,y=0.35,s='A Game Pitching Summary',fontname='Calibri',ha='center',fontsize=40,va='top',fontstyle='italic')
|
827 |
|
|
|
|
|
|
|
|
|
|
|
|
|
828 |
|
829 |
-
#
|
830 |
-
#
|
831 |
-
|
832 |
-
|
|
|
833 |
|
834 |
-
|
835 |
-
|
836 |
-
|
837 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
838 |
|
839 |
|
840 |
-
from matplotlib.offsetbox import (OffsetImage, AnnotationBbox)
|
841 |
import urllib
|
842 |
import urllib.request
|
843 |
import urllib.error
|
844 |
from urllib.error import HTTPError
|
845 |
|
846 |
try:
|
847 |
-
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/
|
848 |
test_mage = plt.imread(url)
|
849 |
except urllib.error.HTTPError as err:
|
850 |
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/1/headshot/67/current.png'
|
851 |
-
|
852 |
-
|
853 |
-
ab
|
854 |
-
ax0.add_artist(ab)
|
855 |
|
856 |
-
|
857 |
|
858 |
-
player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id_select}&hydrate=currentTeam").json()
|
859 |
-
#ax0.text(x=0.5,y=0.05,s=f'{ball_title}{strike_title}{split_title}',fontname='Calibri',ha='center',fontsize=20,va='top')
|
860 |
-
ax0.axis('off')
|
861 |
-
ax0.text(x=0.5,y=0.5,s=f"{ player_bio['people'][0]['pitchHand']['code']}HP, Age: {player_bio['people'][0]['currentAge']}, {player_bio['people'][0]['height']}/{player_bio['people'][0]['weight']}",fontname='Calibri',ha='center',fontsize=24,va='top')
|
862 |
|
|
|
|
|
|
|
|
|
|
|
|
|
863 |
|
864 |
|
865 |
if 'currentTeam' in player_bio['people'][0]:
|
@@ -871,25 +779,24 @@ def server(input, output, session):
|
|
871 |
# im = Image.open(BytesIO(response.content))
|
872 |
# im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0])
|
873 |
# ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
|
874 |
-
imagebox = OffsetImage(im, zoom = 0.
|
875 |
-
ab = AnnotationBbox(imagebox, (0.
|
876 |
-
|
877 |
except IndexError:
|
878 |
print()
|
879 |
-
|
880 |
-
############ FOOTER ################
|
881 |
-
#fig.text(x=0.5,y=0.05,s='Note: Colour Coding Compares to League Average By Pitch',ha='center',fontname='Calibri',fontsize=10)
|
882 |
-
axfooter = fig.add_subplot(gs[-1, :])
|
883 |
-
axfooter.text(x=0.05,y=1,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=24,va='top')
|
884 |
-
axfooter.text(x=1-0.05,y=1,s='Data: MLB',ha='right',fontname='Calibri',fontsize=24,va='top')
|
885 |
|
886 |
|
887 |
-
|
888 |
-
|
889 |
-
|
890 |
-
#fig.tight_layout()
|
891 |
|
|
|
|
|
892 |
|
893 |
-
|
|
|
|
|
|
|
|
|
894 |
|
895 |
app = App(app_ui, server)
|
|
|
2 |
import numpy as np
|
3 |
import matplotlib.pyplot as plt
|
4 |
import seaborn as sns
|
5 |
+
#import pitch_summary_functions as psf
|
6 |
import requests
|
7 |
import matplotlib
|
8 |
from api_scraper import MLB_Scrape
|
|
|
10 |
import shinyswatch
|
11 |
|
12 |
|
13 |
+
season = 2024
|
14 |
+
level = 'mlb'
|
15 |
colour_palette = ['#FFB000','#648FFF','#785EF0',
|
16 |
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
|
17 |
|
18 |
import datasets
|
19 |
from datasets import load_dataset
|
20 |
### Import Datasets
|
21 |
+
dataset = load_dataset('nesticot/mlb_data', data_files=[f'{level}_pitch_data_{season}.csv' ])
|
22 |
dataset_train = dataset['train']
|
23 |
df_2024 = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True).drop_duplicates(subset=['play_id'],keep='last')
|
24 |
|
25 |
+
# df_2024 = pd.read_csv('C:/Users/thoma/Google Drive/Python/Baseball/season_stats/2024/2024_regular_data.csv',index_col=[0])
|
26 |
|
|
|
27 |
# ### Import Datasets
|
28 |
# import datasets
|
29 |
# from datasets import load_dataset
|
|
|
34 |
### PITCH COLOURS ###
|
35 |
pitch_colours = {
|
36 |
'Four-Seam Fastball':'#FF007D',#BC136F
|
|
|
37 |
'Sinker':'#98165D',#DC267F
|
38 |
'Cutter':'#BE5FA0',
|
39 |
|
|
|
57 |
'Other':'#9C8975',
|
58 |
}
|
59 |
|
60 |
+
import pitcher_update as pu
|
61 |
+
df_2024 = pu.df_update(df_2024)
|
62 |
+
df_2024['pitch_count_hand'] = df_2024.groupby(['pitcher_id','batter_hand'])['start_speed'].transform('count')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
|
|
64 |
|
|
|
65 |
|
66 |
+
# DEFINE STRIKE ZONE
|
67 |
+
strike_zone = pd.DataFrame({
|
68 |
+
'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9],
|
69 |
+
'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5]
|
70 |
+
})
|
71 |
|
72 |
+
### STRIKE ZONE ###
|
73 |
+
def draw_line(axis,alpha_spot=1,catcher_p = True):
|
74 |
|
75 |
+
axis.plot(strike_zone['PlateLocSide'], strike_zone['PlateLocHeight'], color='black', linewidth=1.3,zorder=3,alpha=alpha_spot,)
|
76 |
|
77 |
+
# ax.plot([-0.2833333, -0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
|
78 |
+
# ax.plot([0.2833333, 0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
|
79 |
+
# ax.plot([-0.85, 0.85], [2.2, 2.2], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
|
80 |
+
# ax.plot([-0.85, 0.85], [2.9, 2.9], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3)
|
81 |
+
if catcher_p:
|
82 |
+
# Add dashed line
|
83 |
+
# Add home plate
|
84 |
+
axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
85 |
+
axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
86 |
+
axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
87 |
+
axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
88 |
+
axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
89 |
+
else:
|
90 |
+
axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
91 |
+
axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
92 |
+
axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
93 |
+
axis.plot([0, 0.9], [-.35, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
94 |
+
axis.plot([0.9, 0.708], [-0.1,0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1)
|
95 |
|
96 |
+
pitcher_dicts = df_2024.set_index('pitcher_id')['pitcher_name'].sort_values().to_dict()
|
|
|
|
|
|
|
|
|
97 |
|
98 |
team_logos = pd.read_csv('team_logos.csv')
|
99 |
+
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
|
100 |
+
cmap_sum2 = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#FFFFFF','#FFB000',])
|
101 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
from urllib.request import Request, urlopen
|
104 |
from shiny import App, reactive, ui, render
|
|
|
120 |
shinyswatch.theme.simplex(),
|
121 |
ui.tags.h4("TJStats"),
|
122 |
ui.tags.i("Baseball Analytics and Visualizations"),
|
123 |
+
ui.tags.h5("Pitcher Heat Maps"),
|
124 |
ui.row(
|
125 |
|
126 |
|
127 |
ui.layout_sidebar(
|
128 |
|
129 |
ui.panel_sidebar(
|
|
|
|
|
|
|
|
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
+
ui.input_select('player_id','Select Player',pitcher_dicts,selectize=True,multiple=False),
|
133 |
+
ui.output_ui('game_id_select','Date Range'),
|
134 |
+
|
135 |
+
|
136 |
+
ui.output_ui('pitch_type_select','Select Pitch Type'),
|
137 |
+
ui.input_action_button("go", "Generate",class_="btn-primary"),width=2
|
138 |
+
|
139 |
+
|
140 |
+
),
|
141 |
|
142 |
|
|
|
143 |
|
144 |
|
145 |
+
|
146 |
+
|
147 |
+
|
148 |
ui.panel_main(
|
149 |
ui.navset_tab(
|
150 |
# ui.nav("Raw Data",
|
151 |
# ui.output_data_frame("raw_table")),
|
152 |
ui.nav("Season Summary",
|
153 |
ui.output_plot('plot',
|
154 |
+
width='1600px',
|
155 |
+
height='900px')),id="my_tabs"))))))
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
|
158 |
|
|
|
167 |
def server(input, output, session):
|
168 |
|
169 |
@render.ui
|
170 |
+
def game_id_select():
|
171 |
|
172 |
# @reactive.Effect
|
173 |
if input.my_tabs() == 'Season Summary':
|
|
|
175 |
return ui.input_date_range("date_range_id", "Date range input",start = df_2024.game_date.min(),
|
176 |
end = df_2024.game_date.max(),width=2,min=df_2024.game_date.min(),
|
177 |
max=df_2024.game_date.max()),
|
|
|
|
|
|
|
|
|
|
|
178 |
|
179 |
+
@render.ui
|
180 |
+
def pitch_type_select():
|
181 |
+
pitch_dicts = df_2024[(df_2024['pitcher_id']==int(input.player_id()))].set_index('pitch_type')['pitch_description'].sort_values().to_dict()
|
182 |
|
183 |
+
# @reactive.Effect
|
184 |
+
return ui.input_select('pitch_type','Select Pitch Type',pitch_dicts,selectize=True,multiple=False)
|
185 |
+
|
186 |
@output
|
187 |
@render.plot
|
188 |
@reactive.event(input.go, ignore_none=False)
|
|
|
198 |
ax.text(x=0.5,y=0.5,s='Please Select\nA Player',fontsize=150,ha='center')
|
199 |
ax.grid('off')
|
200 |
return
|
|
|
|
|
201 |
|
202 |
|
203 |
+
pitcher_input = int(input.player_id())
|
204 |
+
pitch_input = input.pitch_type()
|
205 |
+
|
206 |
+
df_plot_full = df_2024[(df_2024['pitcher_id']==pitcher_input)]
|
207 |
+
df_plot_full['h_s_b'] = df_plot_full.groupby(['batter_hand','strikes', 'balls']).transform('count')['pitcher_id']
|
208 |
+
df_plot_full['h_s_b_pitch'] = df_plot_full.groupby(['batter_hand','strikes', 'balls','pitch_type']).transform('count')['pitcher_id']
|
209 |
+
df_plot_full['h_s_b_pitch_percent'] = df_plot_full['h_s_b_pitch']/df_plot_full['h_s_b']
|
210 |
+
|
211 |
+
|
212 |
+
df_plot = df_plot_full[(df_plot_full['pitch_type']==pitch_input)]
|
213 |
df_plot = df_plot[(pd.to_datetime(df_plot['game_date']).dt.date>=input.date_range_id()[0])&
|
214 |
(pd.to_datetime(df_plot['game_date']).dt.date<=input.date_range_id()[1])]
|
215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
|
217 |
+
print("THIS IS HERE")
|
218 |
+
print(df_plot)
|
219 |
+
pivot_table_l = df_plot[df_plot['batter_hand'].isin(['L'])].groupby(['batter_hand','strikes', 'balls'])[['h_s_b_pitch_percent']].mean().reset_index().pivot('strikes','balls','h_s_b_pitch_percent')#.fillna(0).style.background_gradient(cmap=cmap_sum2, axis=None).format("{:.0%}")
|
220 |
+
# Create a new index and columns range
|
221 |
+
new_index = range(3)
|
222 |
+
new_columns = range(4)
|
223 |
|
224 |
+
# Reindex the pivot table
|
225 |
+
pivot_table_l = pivot_table_l.reindex(index=new_index, columns=new_columns)
|
226 |
|
227 |
+
# Fill any missing values with 0
|
228 |
+
pivot_table_l = pivot_table_l.fillna(0)
|
229 |
|
230 |
+
pivot_table_l = df_plot[df_plot['batter_hand']=='L'].groupby(['batter_hand','strikes', 'balls'])[['h_s_b_pitch_percent']].mean().reset_index().pivot('strikes','balls','h_s_b_pitch_percent')#.fillna(0).style.background_gradient(cmap=cmap_sum2, axis=None).format("{:.0%}")
|
231 |
+
# Create a new index and columns range
|
232 |
+
new_index = range(3)
|
233 |
+
new_columns = range(4)
|
234 |
|
235 |
+
# Reindex the pivot table
|
236 |
+
pivot_table_l = pivot_table_l.reindex(index=new_index, columns=new_columns)
|
|
|
|
|
237 |
|
238 |
+
# Fill any missing values with 0
|
239 |
+
pivot_table_l = pivot_table_l.fillna(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
|
241 |
+
pivot_table_r = df_plot[df_plot['batter_hand']=='R'].groupby(['batter_hand','strikes', 'balls'])[['h_s_b_pitch_percent']].mean().reset_index().pivot('strikes','balls','h_s_b_pitch_percent')#.fillna(0).style.background_gradient(cmap=cmap_sum2, axis=None).format("{:.0%}")
|
242 |
+
# Create a new index and columns range
|
243 |
+
new_index = range(3)
|
244 |
+
new_columns = range(4)
|
245 |
|
246 |
+
# Reindex the pivot table
|
247 |
+
pivot_table_r = pivot_table_r.reindex(index=new_index, columns=new_columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
|
249 |
+
# Fill any missing values with 0
|
250 |
+
pivot_table_r = pivot_table_r.fillna(0)
|
251 |
+
|
252 |
+
|
253 |
|
254 |
+
# Assuming you have a DataFrame called 'df_plot_full' with columns 'pitch_type', 'strikes', and 'balls'
|
255 |
+
|
256 |
+
# Filter the dataset to include only slider pitches
|
257 |
+
# slider_pitches = df_plot_full[df_plot_full['pitch_type'] == 'SL']
|
258 |
+
|
259 |
+
# Group the filtered dataset by strike and ball counts
|
260 |
+
# grouped_counts = slider_pitches.groupby(['pitcher_hand','strikes', 'balls']).size().reset_index(name='total_pitches')
|
261 |
|
262 |
+
# Calculate the proportion of slider pitches for each strike and ball count
|
263 |
+
# grouped_counts['proportion'] = grouped_counts['total_pitches'] / grouped_counts['total_pitches'].sum()
|
|
|
264 |
|
265 |
+
# Print the resulting DataFrame
|
266 |
+
df_summ = df_plot.groupby(['batter_hand']).agg(
|
267 |
+
pitch_count = ('pitch_count_hand','max'),
|
268 |
+
pa = ('pa','sum'),
|
269 |
+
ab = ('ab','sum'),
|
270 |
+
obp_pa = ('obp','sum'),
|
271 |
+
hits = ('hits','sum'),
|
272 |
+
on_base = ('on_base','sum'),
|
273 |
+
k = ('k','sum'),
|
274 |
+
bb = ('bb','sum'),
|
275 |
+
bb_minus_k = ('bb_minus_k','sum'),
|
276 |
+
csw = ('csw','sum'),
|
277 |
+
bip = ('bip','sum'),
|
278 |
+
bip_div = ('bip_div','sum'),
|
279 |
+
tb = ('tb','sum'),
|
280 |
+
woba = ('woba','sum'),
|
281 |
+
woba_contact = ('woba_contact','sum'),
|
282 |
+
xwoba = ('woba_pred','sum'),
|
283 |
+
xwoba_contact = ('woba_pred_contact','sum'),
|
284 |
+
woba_codes = ('woba_codes','sum'),
|
285 |
+
hard_hit = ('hard_hit','sum'),
|
286 |
+
barrel = ('barrel','sum'),
|
287 |
+
sweet_spot = ('sweet_spot','sum'),
|
288 |
+
max_launch_speed = ('launch_speed','max'),
|
289 |
+
launch_speed = ('launch_speed','mean'),
|
290 |
+
launch_angle = ('launch_angle','mean'),
|
291 |
+
pitches = ('is_pitch','sum'),
|
292 |
+
swings = ('swings','sum'),
|
293 |
+
in_zone = ('in_zone','sum'),
|
294 |
+
out_zone = ('out_zone','sum'),
|
295 |
+
whiffs = ('whiffs','sum'),
|
296 |
+
zone_swing = ('zone_swing','sum'),
|
297 |
+
zone_contact = ('zone_contact','sum'),
|
298 |
+
ozone_swing = ('ozone_swing','sum'),
|
299 |
+
ozone_contact = ('ozone_contact','sum'),
|
300 |
+
ground_ball = ('trajectory_ground_ball','sum'),
|
301 |
+
line_drive = ('trajectory_line_drive','sum'),
|
302 |
+
fly_ball =('trajectory_fly_ball','sum'),
|
303 |
+
pop_up = ('trajectory_popup','sum'),
|
304 |
+
attack_zone = ('attack_zone','count'),
|
305 |
+
heart = ('heart','sum'),
|
306 |
+
shadow = ('shadow','sum'),
|
307 |
+
chase = ('chase','sum'),
|
308 |
+
waste = ('waste','sum'),
|
309 |
+
heart_swing = ('heart_swing','sum'),
|
310 |
+
shadow_swing = ('shadow_swing','sum'),
|
311 |
+
chase_swing = ('chase_swing','sum'),
|
312 |
+
waste_swing = ('waste_swing','sum'),
|
313 |
+
heart_whiff = ('heart_whiff','sum'),
|
314 |
+
shadow_whiff = ('shadow_whiff','sum'),
|
315 |
+
chase_whiff = ('chase_whiff','sum'),
|
316 |
+
waste_whiff = ('waste_whiff','sum'),
|
317 |
+
).reset_index()
|
318 |
|
319 |
|
320 |
+
df_summ['avg'] = [df_summ.hits[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
|
321 |
+
df_summ['obp'] = [df_summ.on_base[x]/df_summ.obp_pa[x] if df_summ.obp_pa[x] != 0 else np.nan for x in range(len(df_summ))]
|
322 |
+
df_summ['slg'] = [df_summ.tb[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
|
323 |
|
324 |
+
df_summ['ops'] = df_summ['obp']+df_summ['slg']
|
|
|
325 |
|
326 |
+
df_summ['k_percent'] = [df_summ.k[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
|
327 |
+
df_summ['bb_percent'] =[df_summ.bb[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
|
328 |
+
df_summ['bb_minus_k_percent'] =[(df_summ.bb_minus_k[x])/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
|
329 |
|
330 |
+
df_summ['bb_over_k_percent'] =[df_summ.bb[x]/df_summ.k[x] if df_summ.k[x] != 0 else np.nan for x in range(len(df_summ))]
|
331 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
|
|
|
|
|
|
|
333 |
|
|
|
|
|
334 |
|
335 |
+
df_summ['csw_percent'] =[df_summ.csw[x]/df_summ.pitches[x] if df_summ.pitches[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
336 |
|
337 |
|
338 |
+
df_summ['sweet_spot_percent'] = [df_summ.sweet_spot[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
|
|
|
|
339 |
|
340 |
+
df_summ['woba_percent'] = [df_summ.woba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
|
341 |
+
df_summ['woba_percent_contact'] = [df_summ.woba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
|
342 |
+
#df_summ['hard_hit_percent'] = [df_summ.sweet_spot[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
|
343 |
+
df_summ['hard_hit_percent'] = [df_summ.hard_hit[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
|
344 |
|
345 |
|
346 |
+
df_summ['barrel_percent'] = [df_summ.barrel[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
|
347 |
|
348 |
+
df_summ['zone_contact_percent'] = [df_summ.zone_contact[x]/df_summ.zone_swing[x] if df_summ.zone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
349 |
|
350 |
+
df_summ['zone_swing_percent'] = [df_summ.zone_swing[x]/df_summ.in_zone[x] if df_summ.in_zone[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
|
|
|
|
351 |
|
352 |
+
df_summ['zone_percent'] = [df_summ.in_zone[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
|
|
|
|
|
|
|
|
|
353 |
|
354 |
+
df_summ['chase_percent'] = [df_summ.ozone_swing[x]/(df_summ.pitches[x] - df_summ.in_zone[x]) if (df_summ.pitches[x]- df_summ.in_zone[x]) != 0 else np.nan for x in range(len(df_summ))]
|
355 |
|
356 |
+
df_summ['chase_contact'] = [df_summ.ozone_contact[x]/df_summ.ozone_swing[x] if df_summ.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
|
358 |
+
df_summ['swing_percent'] = [df_summ.swings[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
|
359 |
|
360 |
+
df_summ['whiff_rate'] = [df_summ.whiffs[x]/df_summ.swings[x] if df_summ.swings[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
|
|
|
|
361 |
|
362 |
+
df_summ['swstr_rate'] = [df_summ.whiffs[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
|
363 |
|
364 |
+
df_summ['ground_ball_percent'] = [df_summ.ground_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
|
|
|
|
365 |
|
366 |
+
df_summ['line_drive_percent'] = [df_summ.line_drive[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
|
367 |
|
368 |
+
df_summ['fly_ball_percent'] = [df_summ.fly_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
369 |
|
370 |
+
df_summ['pop_up_percent'] = [df_summ.pop_up[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
|
|
|
|
371 |
|
|
|
|
|
372 |
|
373 |
|
374 |
+
df_summ['heart_zone_percent'] = [df_summ.heart[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
375 |
|
376 |
+
df_summ['shadow_zone_percent'] = [df_summ.shadow[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
|
377 |
|
378 |
+
df_summ['chase_zone_percent'] = [df_summ.chase[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
379 |
|
380 |
+
df_summ['waste_zone_percent'] = [df_summ.waste[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
|
381 |
|
|
|
|
|
382 |
|
383 |
+
df_summ['heart_zone_swing_percent'] = [df_summ.heart_swing[x]/df_summ.heart[x] if df_summ.heart[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
|
|
|
|
384 |
|
385 |
+
df_summ['shadow_zone_swing_percent'] = [df_summ.shadow_swing[x]/df_summ.shadow[x] if df_summ.shadow[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
|
|
386 |
|
387 |
+
df_summ['chase_zone_swing_percent'] = [df_summ.chase_swing[x]/df_summ.chase[x] if df_summ.chase[x] != 0 else np.nan for x in range(len(df_summ))]
|
388 |
|
389 |
+
df_summ['waste_zone_swing_percent'] = [df_summ.waste_swing[x]/df_summ.waste[x] if df_summ.waste[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
|
|
|
|
390 |
|
391 |
+
df_summ['heart_zone_whiff_percent'] = [df_summ.heart_whiff[x]/df_summ.heart_swing[x] if df_summ.heart_swing[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
|
|
|
|
|
|
392 |
|
393 |
+
df_summ['shadow_zone_whiff_percent'] = [df_summ.shadow_whiff[x]/df_summ.shadow_swing[x] if df_summ.shadow_swing[x] != 0 else np.nan for x in range(len(df_summ))]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
394 |
|
395 |
+
df_summ['chase_zone_whiff_percent'] = [df_summ.chase_whiff[x]/df_summ.chase_swing[x] if df_summ.chase_swing[x] != 0 else np.nan for x in range(len(df_summ))]
|
396 |
|
397 |
+
df_summ['waste_zone_whiff_percent'] = [df_summ.waste_whiff[x]/df_summ.waste_swing[x] if df_summ.waste_swing[x] != 0 else np.nan for x in range(len(df_summ))]
|
398 |
+
df_summ['xwoba_percent'] = [df_summ.xwoba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
|
399 |
+
df_summ['xwoba_percent_contact'] = [df_summ.xwoba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
|
400 |
|
401 |
+
df_summ['pitch_percent'] = [df_summ.pitches[x]/df_summ.pitch_count[x] if df_summ.pitch_count[x] != 0 else np.nan for x in range(len(df_summ))]
|
402 |
|
403 |
+
table_left = df_summ[df_summ['batter_hand']=='L'][['pitch_percent',
|
404 |
+
'pitches',
|
405 |
+
'heart_zone_percent',
|
406 |
+
'shadow_zone_percent',
|
407 |
+
'chase_zone_percent',
|
408 |
+
'waste_zone_percent',
|
409 |
+
'csw_percent',
|
410 |
+
'whiff_rate',
|
411 |
+
'chase_percent',
|
412 |
+
'bip',
|
413 |
+
'xwoba_percent_contact'
|
414 |
+
]]
|
415 |
|
416 |
+
### GET COLOURS##
|
417 |
+
import matplotlib.colors
|
418 |
+
import matplotlib.colors as mcolors
|
419 |
+
def get_color(value,normalize):
|
420 |
+
color = cmap_sum(normalize(value))
|
421 |
+
return mcolors.to_hex(color)
|
422 |
|
423 |
+
normalize = mcolors.Normalize(vmin=table_left['pitch_percent']*0.5,
|
424 |
+
vmax=table_left['pitch_percent']*1.5) # Define the range of values
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
|
|
|
|
|
|
|
|
|
|
|
426 |
|
427 |
|
428 |
+
df_colour_left = pd.DataFrame(data=[[get_color(x,normalize) for x in pivot_table_l.loc[0]],
|
429 |
+
[get_color(x,normalize) for x in pivot_table_l.loc[1]],
|
430 |
+
[get_color(x,normalize) for x in pivot_table_l.loc[2]]],)
|
|
|
431 |
|
|
|
432 |
|
433 |
+
table_left['pitch_percent'] = table_left['pitch_percent'].map('{:.1%}'.format)
|
434 |
+
table_left['pitches'] = table_left['pitches'].astype(int).astype(str)
|
435 |
+
# table_left['pa'] = table_left['pa'].astype(int).astype(str)
|
436 |
+
# table_left['k_percent'] = table_left['k_percent'].map('{:.1%}'.format)
|
437 |
+
# table_left['bb_percent'] = table_left['bb_percent'].map('{:.1%}'.format)
|
438 |
+
table_left['heart_zone_percent'] = table_left['heart_zone_percent'].map('{:.1%}'.format)
|
439 |
+
table_left['shadow_zone_percent'] = table_left['shadow_zone_percent'].map('{:.1%}'.format)
|
440 |
+
table_left['chase_zone_percent'] = table_left['chase_zone_percent'].map('{:.1%}'.format)
|
441 |
+
table_left['waste_zone_percent'] = table_left['waste_zone_percent'].map('{:.1%}'.format)
|
442 |
+
table_left['csw_percent'] = table_left['csw_percent'].map('{:.1%}'.format)
|
443 |
+
table_left['whiff_rate'] = table_left['whiff_rate'].map('{:.1%}'.format)
|
444 |
+
table_left['chase_percent'] = table_left['chase_percent'].map('{:.1%}'.format)
|
445 |
+
table_left['bip'] = table_left['bip'].astype(int).astype(str)
|
446 |
+
table_left['xwoba_percent_contact'] = table_left['xwoba_percent_contact'].map('{:.3f}'.format)
|
447 |
+
table_left.columns = ['Usage%','Pitches','Heart%','Shadow%','Chase%','Waste%','CSW%','Whiff%','O-Swing%','BBE','xwOBACON']
|
448 |
|
|
|
|
|
|
|
|
|
|
|
449 |
|
450 |
+
table_left = table_left.replace({'nan%':'—'})
|
451 |
+
table_left = table_left.replace({'nan':'—'})
|
452 |
+
table_left = table_left.T
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
453 |
|
|
|
454 |
|
455 |
+
table_right = df_summ[df_summ['batter_hand']=='R'][['pitch_percent',
|
456 |
+
'pitches',
|
457 |
+
'heart_zone_percent',
|
458 |
+
'shadow_zone_percent',
|
459 |
+
'chase_zone_percent',
|
460 |
+
'waste_zone_percent',
|
461 |
+
'csw_percent',
|
462 |
+
'whiff_rate',
|
463 |
+
'chase_percent',
|
464 |
+
'bip',
|
465 |
+
'xwoba_percent_contact'
|
466 |
+
]]
|
467 |
|
468 |
+
normalize = mcolors.Normalize(vmin=table_right['pitch_percent']*0.5,
|
469 |
+
vmax=table_right['pitch_percent']*1.5) # Define the range of values
|
470 |
|
|
|
|
|
|
|
|
|
|
|
471 |
|
472 |
+
df_colour_right = pd.DataFrame(data=[[get_color(x,normalize) for x in pivot_table_r.loc[0]],
|
473 |
+
[get_color(x,normalize) for x in pivot_table_r.loc[1]],
|
474 |
+
[get_color(x,normalize) for x in pivot_table_r.loc[2]]],)
|
|
|
|
|
475 |
|
476 |
|
|
|
|
|
|
|
|
|
|
|
477 |
|
478 |
+
table_right['pitch_percent'] = table_right['pitch_percent'].map('{:.1%}'.format)
|
479 |
+
table_right['pitches'] = table_right['pitches'].astype(int).astype(str)
|
480 |
+
# table_right['pa'] = table_right['pa'].astype(int).astype(str)
|
481 |
+
# table_right['k_percent'] = table_right['k_percent'].map('{:.1%}'.format)
|
482 |
+
# table_right['bb_percent'] = table_right['bb_percent'].map('{:.1%}'.format)
|
483 |
+
table_right['heart_zone_percent'] = table_right['heart_zone_percent'].map('{:.1%}'.format)
|
484 |
+
table_right['shadow_zone_percent'] = table_right['shadow_zone_percent'].map('{:.1%}'.format)
|
485 |
+
table_right['chase_zone_percent'] = table_right['chase_zone_percent'].map('{:.1%}'.format)
|
486 |
+
table_right['waste_zone_percent'] = table_right['waste_zone_percent'].map('{:.1%}'.format)
|
487 |
+
table_right['csw_percent'] = table_right['csw_percent'].map('{:.1%}'.format)
|
488 |
+
table_right['whiff_rate'] = table_right['whiff_rate'].map('{:.1%}'.format)
|
489 |
+
table_right['chase_percent'] = table_right['chase_percent'].map('{:.1%}'.format)
|
490 |
+
table_right['bip'] = table_right['bip'].astype(int).astype(str)
|
491 |
+
table_right['xwoba_percent_contact'] = table_right['xwoba_percent_contact'].map('{:.3f}'.format)
|
492 |
+
table_right.columns = ['Usage%','Pitches','Heart%','Shadow%','Chase%','Waste%','CSW%','Whiff%','O-Swing%','BBE','xwOBACON']
|
493 |
+
|
494 |
+
|
495 |
+
table_right = table_right.replace({'nan%':'—'})
|
496 |
+
table_right = table_right.replace({'nan':'—'})
|
497 |
+
table_right = table_right.T
|
498 |
+
|
499 |
+
|
500 |
+
import matplotlib.pyplot as plt
|
501 |
+
import seaborn as sns
|
502 |
+
import matplotlib.gridspec as gridspec
|
503 |
+
from matplotlib.gridspec import GridSpec
|
504 |
|
505 |
+
# Assuming you have a list of pitch locations called 'pitch_locations'
|
506 |
+
# where each location is a tuple of (x, y) coordinates
|
507 |
|
508 |
+
fig = plt.figure(figsize=(16, 9))
|
509 |
+
fig.set_facecolor('white')
|
510 |
+
sns.set_theme(style="whitegrid", palette=colour_palette)
|
511 |
+
gs = GridSpec(3, 5, height_ratios=[2,9,1],width_ratios=[2,9,0.5,9,2])
|
512 |
+
gs.update(hspace=0.2, wspace=0.2)
|
513 |
|
514 |
+
# Add subplots to the grid
|
515 |
+
axheader = fig.add_subplot(gs[0, :])
|
516 |
+
ax_left = fig.add_subplot(gs[1, 1])
|
517 |
+
ax_right = fig.add_subplot(gs[1, 3])
|
518 |
|
519 |
+
axfooter = fig.add_subplot(gs[-1, :])
|
520 |
|
|
|
|
|
|
|
|
|
521 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
522 |
|
523 |
+
if df_plot[df_plot['batter_hand']=='L'].shape[0] > 3:
|
524 |
+
sns.kdeplot(data=df_plot[df_plot['batter_hand']=='L'],
|
525 |
+
x='px',
|
526 |
+
y='pz',
|
527 |
+
cmap=cmap_sum,
|
528 |
+
shade=True,
|
529 |
+
ax=ax_left,
|
530 |
+
thresh=0.3,
|
531 |
+
bw_adjust=0.5)
|
532 |
+
else:
|
533 |
+
sns.scatterplot(data=df_plot[df_plot['batter_hand']=='L'],
|
534 |
+
x='px',
|
535 |
+
y='pz',
|
536 |
+
cmap=cmap_sum,
|
537 |
+
ax=ax_left,
|
538 |
+
s=125)
|
539 |
|
540 |
+
if df_plot[df_plot['batter_hand']=='R'].shape[0] > 3:
|
541 |
+
sns.kdeplot(data=df_plot[df_plot['batter_hand']=='R'],
|
542 |
+
x='px',
|
543 |
+
y='pz',
|
544 |
+
cmap=cmap_sum,
|
545 |
+
shade=True,
|
546 |
+
ax=ax_right,
|
547 |
+
thresh=0.3,
|
548 |
+
bw_adjust=0.5)
|
549 |
+
else:
|
550 |
+
sns.scatterplot(data=df_plot[df_plot['batter_hand']=='R'],
|
551 |
+
x='px',
|
552 |
+
y='pz',
|
553 |
+
cmap=cmap_sum,
|
554 |
+
ax=ax_right,
|
555 |
+
s=125)
|
556 |
|
557 |
+
draw_line(ax_left,alpha_spot=1,catcher_p = False)
|
558 |
+
draw_line(ax_right,alpha_spot=1,catcher_p = False)
|
559 |
|
560 |
+
ax_left.axis('off')
|
561 |
+
ax_right.axis('off')
|
562 |
|
563 |
+
ax_left.axis('square')
|
564 |
+
ax_right.axis('square')
|
|
|
565 |
|
566 |
+
ax_left.set_xlim(-2.75,2.75)
|
567 |
+
ax_right.set_xlim(-2.75,2.75)
|
|
|
|
|
|
|
|
|
568 |
|
569 |
+
ax_left.set_ylim(-0.5,5)
|
570 |
+
ax_right.set_ylim(-0.5,5)
|
571 |
|
572 |
|
573 |
+
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
574 |
|
575 |
+
import matplotlib.image as mpimg
|
576 |
+
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
|
|
|
577 |
|
578 |
+
# Load the image
|
579 |
+
img = mpimg.imread('left.png')
|
580 |
+
imagebox = OffsetImage(img, zoom=0.7) # adjust zoom as needed
|
581 |
+
ab = AnnotationBbox(imagebox, (1.25, -0.5), box_alignment=(0, 0), frameon=False)
|
582 |
+
ax_left.add_artist(ab)
|
583 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
584 |
|
585 |
+
# Load the image
|
586 |
+
img = mpimg.imread('right.png')
|
587 |
+
imagebox = OffsetImage(img, zoom=0.7) # adjust zoom as needed
|
588 |
+
# Create an AnnotationBbox
|
589 |
+
ab = AnnotationBbox(imagebox, (-1.25, -0.5), box_alignment=(1, 0), frameon=False)
|
590 |
|
591 |
+
ax_right.add_artist(ab)
|
592 |
|
593 |
|
594 |
+
from matplotlib.transforms import Bbox
|
595 |
+
# Create a transformation that converts from data coordinates to axes coordinates
|
596 |
+
trans = ax_left.transData + ax_left.transAxes.inverted()
|
597 |
+
|
598 |
+
# Calculate the bbox in axes coordinates
|
599 |
+
bbox_data = Bbox.from_bounds(-4.2, -0.5, 2.5, 5) # replace width and height with the desired values
|
600 |
+
bbox_axes = trans.transform_bbox(bbox_data)
|
601 |
+
|
602 |
+
|
603 |
+
table_left_plot = ax_left.table(cellText=table_left.reset_index().values,
|
604 |
+
loc='right',
|
605 |
+
cellLoc='center',
|
606 |
+
colWidths=[0.52,0.3],
|
607 |
+
bbox=bbox_axes.bounds,zorder=100)
|
608 |
+
|
609 |
+
|
610 |
+
min_font_size = 14
|
611 |
+
# Set table properties
|
612 |
+
table_left_plot.auto_set_font_size(False)
|
613 |
+
#table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10)))
|
614 |
+
table_left_plot.set_fontsize(min_font_size)
|
615 |
+
#table_left_plot.scale(1,3)
|
616 |
+
# Calculate the bbox in axes coordinates
|
617 |
+
bbox_data = Bbox.from_bounds(-0.75, 5, 2.5, 1) # replace width and height with the desired values
|
618 |
+
bbox_axes = trans.transform_bbox(bbox_data)
|
619 |
+
|
620 |
+
def format_as_percentage(val):
|
621 |
+
return f'{val * 100:.0f}%'
|
622 |
+
|
623 |
+
table_left_plot_pivot = ax_left.table(cellText=[[format_as_percentage(val) for val in row] for row in pivot_table_l.values],
|
624 |
+
colLabels =pivot_table_l.columns,
|
625 |
+
rowLabels =[' 0 ',' 1 ',' 2 '],
|
626 |
+
loc='center',
|
627 |
+
cellLoc='center',
|
628 |
+
colWidths=[0.3,0.3,0.30,0.3],
|
629 |
+
bbox=bbox_axes.bounds,zorder=100,cellColours =df_colour_left.values)
|
630 |
+
|
631 |
+
|
632 |
+
min_font_size = 11
|
633 |
+
# Set table properties
|
634 |
+
table_left_plot_pivot.auto_set_font_size(False)
|
635 |
+
#table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10)))
|
636 |
+
table_left_plot_pivot.set_fontsize(min_font_size)
|
637 |
|
|
|
|
|
638 |
|
|
|
|
|
|
|
|
|
639 |
|
|
|
|
|
640 |
|
641 |
+
# Create a transformation that converts from data coordinates to axes coordinates
|
642 |
+
trans = ax_right.transData + ax_right.transAxes.inverted()
|
643 |
|
644 |
+
# Calculate the bbox in axes coordinates
|
645 |
+
bbox_data = Bbox.from_bounds(1.7, -0.5, 2.5, 5) # replace width and height with the desired values
|
646 |
+
bbox_axes = trans.transform_bbox(bbox_data)
|
647 |
|
648 |
|
649 |
+
table_right_plot = ax_right.table(cellText=table_right.reset_index().values,
|
650 |
+
loc='right',
|
651 |
+
cellLoc='center',
|
652 |
+
colWidths=[0.52,0.3],
|
653 |
+
bbox=bbox_axes.bounds,zorder=100)
|
654 |
|
655 |
|
|
|
|
|
656 |
|
657 |
+
min_font_size = 14
|
658 |
+
# Set table properties
|
659 |
+
table_right_plot.auto_set_font_size(False)
|
660 |
+
#table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10)))
|
661 |
+
table_right_plot.set_fontsize(min_font_size)
|
662 |
+
table_right_plot.scale(0.5,3)
|
663 |
|
664 |
+
# Calculate the bbox in axes coordinates
|
665 |
+
# Create a transformation that converts from data coordinates to axes coordinates
|
666 |
+
trans = ax_right.transData + ax_right.transAxes.inverted()
|
667 |
+
bbox_data = Bbox.from_bounds(-0.75, 5, 2.5, 1) # replace width and height with the desired values
|
668 |
+
bbox_axes = trans.transform_bbox(bbox_data)
|
669 |
|
670 |
+
table_right_plot_pivot = ax_right.table(cellText=[[format_as_percentage(val) for val in row] for row in pivot_table_r.values],
|
671 |
+
colLabels =pivot_table_r.columns,
|
672 |
+
rowLabels =[' 0 ',' 1 ',' 2 '],
|
673 |
+
loc='center',
|
674 |
+
cellLoc='center',
|
675 |
+
colWidths=[0.3,0.3,0.30,0.3],
|
676 |
+
bbox=bbox_axes.bounds,zorder=100,cellColours =df_colour_right.values)
|
677 |
+
|
678 |
+
|
679 |
+
min_font_size = 11
|
680 |
+
# Set table properties
|
681 |
+
table_right_plot_pivot.auto_set_font_size(False)
|
682 |
+
#table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10)))
|
683 |
+
table_right_plot_pivot.set_fontsize(min_font_size)
|
684 |
+
|
685 |
+
from matplotlib.cm import ScalarMappable
|
686 |
+
from matplotlib.colors import Normalize
|
687 |
+
# Create a ScalarMappable with the same colormap and normalization
|
688 |
+
sm = ScalarMappable(cmap=cmap_sum, norm=Normalize(vmin=0, vmax=1))
|
689 |
+
|
690 |
+
#from mpl_toolkits.axes_grid1.inset_locator import inset_axes
|
691 |
+
#######################
|
692 |
+
# Create a new Subplot object for the colorbar
|
693 |
+
# Create a new Axes object for the colorbar at the bottom middle of the figure
|
694 |
+
cbar = fig.colorbar(sm, ax=axfooter, orientation='horizontal',aspect=100)
|
695 |
+
# cbar.ax.set_aspect(20)
|
696 |
+
|
697 |
+
# cbar = plt.colorbar(batter_plot, cax=ax12, orientation='vertical',shrink=1, cmap=cmap_hue)
|
698 |
+
|
699 |
+
# cbar = plt.colorbar(batter_plot, cax=ax12, orientation='vertical',shrink=1)
|
700 |
+
cbar.set_ticks([])
|
701 |
+
# # Create an inset axes for the colorbar
|
702 |
+
# cax = inset_axes(axfooter,
|
703 |
+
# width="50%", # width = 50% of parent_bbox width
|
704 |
+
# height="100%", # height : 5%
|
705 |
+
# loc='center')
|
706 |
+
|
707 |
+
# # Add the colorbar to the inset axes
|
708 |
+
# cbar = fig.colorbar(sm, cax=cax, orientation='horizontal')
|
709 |
+
# # Set the labels on the low and high ends of the colorbar
|
710 |
+
# # Set the xticks to only include the low and high ends of the colorbar
|
711 |
+
cbar.set_ticks([sm.norm.vmin, sm.norm.vmax])
|
712 |
+
|
713 |
+
# # Set the labels on the low and high ends of the colorbar
|
714 |
+
cbar.ax.set_xticklabels(['Least', 'Most'])
|
715 |
+
# # Place the xticks on top of the colorbar
|
716 |
+
cbar.ax.tick_params(labeltop=True, labelbottom=False, labelsize=14)
|
717 |
+
|
718 |
+
# # Get the labels
|
719 |
+
labels = cbar.ax.get_xticklabels()
|
720 |
+
|
721 |
+
# # Set the alignment of the labels
|
722 |
+
labels[0].set_horizontalalignment('left')
|
723 |
+
labels[-1].set_horizontalalignment('right')
|
724 |
+
# # Get the labels
|
725 |
+
labels = cbar.ax.get_xticklabels()
|
726 |
+
|
727 |
+
# # Set the font size of the labels
|
728 |
+
# for label in labels:
|
729 |
+
# label.set_fontsize(16)
|
730 |
+
|
731 |
+
# # Set the labels
|
732 |
+
cbar.ax.set_xticklabels(labels)
|
733 |
+
# # Remove the tick lines on the colorbar
|
734 |
+
cbar.ax.tick_params(length=0)
|
735 |
+
|
736 |
+
|
737 |
+
|
738 |
+
axfooter.text(x=0.02,y=0.5,s='By: Thomas Nestico\n @TJStats',fontname='Calibri',ha='left',fontsize=18,va='top')
|
739 |
+
axfooter.text(x=1-0.02,y=0.5,s='Data: MLB',ha='right',fontname='Calibri',fontsize=18,va='top')
|
740 |
+
|
741 |
+
axfooter.axis('off')
|
742 |
+
|
743 |
+
|
744 |
+
axheader.text(x=0.5,y=1.2,s=f"{df_plot['pitcher_name'].values[0]} - {df_plot['pitcher_hand'].values[0]}HP\n{season} {df_plot['pitch_description'].values[0]} Pitch Frequency",ha='center',fontsize=24,va='top')
|
745 |
+
axheader.axis('off')
|
746 |
|
747 |
|
|
|
748 |
import urllib
|
749 |
import urllib.request
|
750 |
import urllib.error
|
751 |
from urllib.error import HTTPError
|
752 |
|
753 |
try:
|
754 |
+
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/{df_plot["pitcher_id"].values[0]}/headshot/67/current.png'
|
755 |
test_mage = plt.imread(url)
|
756 |
except urllib.error.HTTPError as err:
|
757 |
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/d_people:generic:headshot:67:current.png/w_213,q_auto:best/v1/people/1/headshot/67/current.png'
|
758 |
+
imagebox = OffsetImage(test_mage, zoom = 0.4)
|
759 |
+
ab = AnnotationBbox(imagebox, (0.075, 0.4), frameon = False)
|
760 |
+
axheader.add_artist(ab)
|
|
|
761 |
|
762 |
+
player_bio = requests.get(url=f"https://statsapi.mlb.com/api/v1/people?personIds={df_plot['pitcher_id'].values[0]}&hydrate=currentTeam").json()
|
763 |
|
|
|
|
|
|
|
|
|
764 |
|
765 |
+
team_logos = pd.read_csv('team_logos.csv')
|
766 |
+
|
767 |
+
|
768 |
+
mlb_stats = MLB_Scrape()
|
769 |
+
teams_df = mlb_stats.get_teams()
|
770 |
+
team_logo_dict = teams_df.set_index(['team_id'])['parent_org_id'].to_dict()
|
771 |
|
772 |
|
773 |
if 'currentTeam' in player_bio['people'][0]:
|
|
|
779 |
# im = Image.open(BytesIO(response.content))
|
780 |
# im = plt.imread(team_logos[team_logos['id'] == player_bio['people'][0]['currentTeam']['parentOrgId']]['imageLink'].values[0])
|
781 |
# ax = fig.add_axes([0,0,1,0.85], anchor='C', zorder=1)
|
782 |
+
imagebox = OffsetImage(im, zoom = 0.3)
|
783 |
+
ab = AnnotationBbox(imagebox, (0.925, 0.40), frameon = False)
|
784 |
+
axheader.add_artist(ab)
|
785 |
except IndexError:
|
786 |
print()
|
|
|
|
|
|
|
|
|
|
|
|
|
787 |
|
788 |
|
789 |
+
ax_left.text(s='Against LHH',x=-2.95,y=4.65,fontsize=18,fontweight='bold',ha='center')
|
790 |
+
ax_right.text(s='Against RHH',x=2.95,y=4.65,fontsize=18,fontweight='bold',ha='center')
|
791 |
+
# Center the labels
|
|
|
792 |
|
793 |
+
ax_left.text(x=-1.72, y=5.08, s='Strikes', rotation=90,fontweight='bold')
|
794 |
+
ax_right.text(x=-1.72, y=5.08, s='Strikes', rotation=90,fontweight='bold')
|
795 |
|
796 |
+
ax_left.text(x=0, y=6.1, s='Balls',ha='center',fontweight='bold')
|
797 |
+
ax_right.text(x=0, y=6.1, s='Balls',ha='center',fontweight='bold')
|
798 |
+
#cbar.ax.set_xticklabels(cbar.ax.get_xticklabels(), ha='center')
|
799 |
+
fig.subplots_adjust(left=0.01, right=0.99, top=0.95, bottom=0.05)
|
800 |
+
return
|
801 |
|
802 |
app = App(app_ui, server)
|
left.png
ADDED
![]() |
pitcher_update.py
ADDED
@@ -0,0 +1,562 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import joblib
|
4 |
+
import math
|
5 |
+
import pickle
|
6 |
+
|
7 |
+
loaded_model = joblib.load('joblib_model/barrel_model.joblib')
|
8 |
+
in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
|
9 |
+
attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
|
10 |
+
xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
|
11 |
+
px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
|
12 |
+
pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
|
13 |
+
|
14 |
+
|
15 |
+
def percentile(n):
|
16 |
+
def percentile_(x):
|
17 |
+
return np.nanpercentile(x, n)
|
18 |
+
percentile_.__name__ = 'percentile_%s' % n
|
19 |
+
return percentile_
|
20 |
+
|
21 |
+
|
22 |
+
def df_update(df=pd.DataFrame()):
|
23 |
+
df.loc[df['sz_top']==0,'sz_top'] = np.nan
|
24 |
+
df.loc[df['sz_bot']==0,'sz_bot'] = np.nan
|
25 |
+
|
26 |
+
|
27 |
+
df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']]
|
28 |
+
if len(df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px']) > 0:
|
29 |
+
df.loc[(~df['x'].isnull())&(df['px'].isnull()),'px'] = px_model.predict(df.loc[(~df['x'].isnull())&(df['px'].isnull())][['x']])
|
30 |
+
df.loc[(~df['y'].isnull())&(df['pz'].isnull()),'pz'] = px_model.predict(df.loc[(~df['y'].isnull())&(df['pz'].isnull())][['y']]) + 3.2
|
31 |
+
|
32 |
+
|
33 |
+
# df['in_zone'] = [x < 10 if x > 0 else np.nan for x in df['zone']]
|
34 |
+
if len(df.loc[(~df['px'].isna())&
|
35 |
+
(df['in_zone'].isna())&
|
36 |
+
(~df['sz_top'].isna())]) > 0:
|
37 |
+
print('We found missing data')
|
38 |
+
df.loc[(~df['px'].isna())&
|
39 |
+
(df['in_zone'].isna())&
|
40 |
+
(~df['sz_top'].isna())&
|
41 |
+
(~df['pz'].isna())&
|
42 |
+
(~df['sz_bot'].isna())
|
43 |
+
,'in_zone'] = in_zone_model.predict(df.loc[(~df['px'].isna())&
|
44 |
+
(df['in_zone'].isna())&
|
45 |
+
(~df['sz_top'].isna())&
|
46 |
+
(~df['pz'].isna())&
|
47 |
+
(~df['sz_bot'].isna())][['px','pz','sz_top','sz_bot']].values)
|
48 |
+
hit_codes = ['single',
|
49 |
+
'double','home_run', 'triple']
|
50 |
+
|
51 |
+
ab_codes = ['single', 'strikeout', 'field_out',
|
52 |
+
'grounded_into_double_play', 'fielders_choice', 'force_out',
|
53 |
+
'double', 'field_error', 'home_run', 'triple',
|
54 |
+
'double_play',
|
55 |
+
'fielders_choice_out', 'strikeout_double_play',
|
56 |
+
'other_out','triple_play']
|
57 |
+
|
58 |
+
|
59 |
+
obp_true_codes = ['single', 'walk',
|
60 |
+
'double','home_run', 'triple',
|
61 |
+
'hit_by_pitch', 'intent_walk']
|
62 |
+
|
63 |
+
obp_codes = ['single', 'strikeout', 'walk', 'field_out',
|
64 |
+
'grounded_into_double_play', 'fielders_choice', 'force_out',
|
65 |
+
'double', 'sac_fly', 'field_error', 'home_run', 'triple',
|
66 |
+
'hit_by_pitch', 'double_play', 'intent_walk',
|
67 |
+
'fielders_choice_out', 'strikeout_double_play',
|
68 |
+
'sac_fly_double_play',
|
69 |
+
'other_out','triple_play']
|
70 |
+
|
71 |
+
|
72 |
+
contact_codes = ['In play, no out',
|
73 |
+
'Foul', 'In play, out(s)',
|
74 |
+
'In play, run(s)',
|
75 |
+
'Foul Bunt']
|
76 |
+
|
77 |
+
|
78 |
+
|
79 |
+
conditions_hit = [df.event_type.isin(hit_codes)]
|
80 |
+
choices_hit = [True]
|
81 |
+
df['hits'] = np.select(conditions_hit, choices_hit, default=False)
|
82 |
+
|
83 |
+
conditions_ab = [df.event_type.isin(ab_codes)]
|
84 |
+
choices_ab = [True]
|
85 |
+
df['ab'] = np.select(conditions_ab, choices_ab, default=False)
|
86 |
+
|
87 |
+
conditions_obp_true = [df.event_type.isin(obp_true_codes)]
|
88 |
+
choices_obp_true = [True]
|
89 |
+
df['on_base'] = np.select(conditions_obp_true, choices_obp_true, default=False)
|
90 |
+
|
91 |
+
conditions_obp = [df.event_type.isin(obp_codes)]
|
92 |
+
choices_obp = [True]
|
93 |
+
df['obp'] = np.select(conditions_obp, choices_obp, default=False)
|
94 |
+
|
95 |
+
bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
|
96 |
+
|
97 |
+
conditions_bip = [df.play_description.isin(bip_codes)]
|
98 |
+
choices_bip = [True]
|
99 |
+
df['bip'] = np.select(conditions_bip, choices_bip, default=False)
|
100 |
+
|
101 |
+
conditions = [
|
102 |
+
(df['launch_speed'].isna()),
|
103 |
+
(df['launch_speed']*1.5 - df['launch_angle'] >= 117 ) & (df['launch_speed'] + df['launch_angle'] >= 124) & (df['launch_speed'] > 98) & (df['launch_angle'] >= 8) & (df['launch_angle'] <= 50)
|
104 |
+
]
|
105 |
+
df['bip_div'] = ~df.launch_speed.isna()
|
106 |
+
choices = [False,True]
|
107 |
+
df['barrel'] = np.select(conditions, choices, default=np.nan)
|
108 |
+
df['barrel'] = loaded_model.predict(df[['launch_speed','launch_angle']].fillna(0).values)
|
109 |
+
conditions_ss = [
|
110 |
+
(df['launch_angle'].isna()),
|
111 |
+
(df['launch_angle'] >= 8 ) * (df['launch_angle'] <= 32 )
|
112 |
+
]
|
113 |
+
|
114 |
+
choices_ss = [False,True]
|
115 |
+
df['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan)
|
116 |
+
|
117 |
+
conditions_hh = [
|
118 |
+
(df['launch_speed'].isna()),
|
119 |
+
(df['launch_speed'] >= 94.5 )
|
120 |
+
]
|
121 |
+
|
122 |
+
choices_hh = [False,True]
|
123 |
+
df['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan)
|
124 |
+
|
125 |
+
|
126 |
+
conditions_tb = [
|
127 |
+
(df['event_type']=='single'),
|
128 |
+
(df['event_type']=='double'),
|
129 |
+
(df['event_type']=='triple'),
|
130 |
+
(df['event_type']=='home_run'),
|
131 |
+
]
|
132 |
+
|
133 |
+
choices_tb = [1,2,3,4]
|
134 |
+
|
135 |
+
df['tb'] = np.select(conditions_tb, choices_tb, default=np.nan)
|
136 |
+
|
137 |
+
conditions_woba = [
|
138 |
+
(df['event_type'].isin(['strikeout', 'field_out', 'sac_fly', 'force_out',
|
139 |
+
'grounded_into_double_play', 'fielders_choice', 'field_error',
|
140 |
+
'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play',
|
141 |
+
'sac_fly_double_play', 'other_out'])),
|
142 |
+
(df['event_type']=='walk'),
|
143 |
+
(df['event_type']=='hit_by_pitch'),
|
144 |
+
(df['event_type']=='single'),
|
145 |
+
(df['event_type']=='double'),
|
146 |
+
(df['event_type']=='triple'),
|
147 |
+
(df['event_type']=='home_run'),
|
148 |
+
]
|
149 |
+
|
150 |
+
choices_woba = [0,
|
151 |
+
0.696,
|
152 |
+
0.726,
|
153 |
+
0.883,
|
154 |
+
1.244,
|
155 |
+
1.569,
|
156 |
+
2.004]
|
157 |
+
|
158 |
+
df['woba'] = np.select(conditions_woba, choices_woba, default=np.nan)
|
159 |
+
|
160 |
+
|
161 |
+
woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch',
|
162 |
+
'double', 'sac_fly', 'force_out', 'home_run',
|
163 |
+
'grounded_into_double_play', 'fielders_choice', 'field_error',
|
164 |
+
'triple', 'sac_bunt', 'double_play',
|
165 |
+
'fielders_choice_out', 'strikeout_double_play',
|
166 |
+
'sac_fly_double_play', 'other_out']
|
167 |
+
|
168 |
+
|
169 |
+
|
170 |
+
|
171 |
+
|
172 |
+
|
173 |
+
conditions_woba_code = [
|
174 |
+
(df['event_type'].isin(woba_codes))
|
175 |
+
]
|
176 |
+
|
177 |
+
choices_woba_code = [1]
|
178 |
+
|
179 |
+
df['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan)
|
180 |
+
|
181 |
+
|
182 |
+
df['woba_contact'] = [df['woba'].values[x] if df['bip'].values[x] == 1 else np.nan for x in range(len(df['woba_codes']))]
|
183 |
+
|
184 |
+
#df['in_zone'] = [x < 10 if type(x) == int else np.nan for x in df['zone']]
|
185 |
+
|
186 |
+
# df['in_zone_2'] = in_zone_model.predict(df[['x','y','sz_bot','sz_top']].fillna(0).values)
|
187 |
+
# df['in_zone_3'] = df['in_zone_2'] < 10
|
188 |
+
# df.loc[df['in_zone'].isna(),'in_zone'] = df.loc[df['in_zone'].isna(),'in_zone_3'].fillna(0)
|
189 |
+
|
190 |
+
|
191 |
+
df['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df.play_code]
|
192 |
+
df['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df.play_code]
|
193 |
+
df['swings'] = [1 if x == True else 0 for x in df.is_swing]
|
194 |
+
|
195 |
+
|
196 |
+
df['out_zone'] = df.in_zone == False
|
197 |
+
df['zone_swing'] = (df.in_zone == True)&(df.swings == 1)
|
198 |
+
df['zone_contact'] = (df.in_zone == True)&(df.swings == 1)&(df.whiffs == 0)
|
199 |
+
df['ozone_swing'] = (df.in_zone==False)&(df.swings == 1)
|
200 |
+
df['ozone_contact'] = (df.in_zone==False)&(df.swings == 1)&(df.whiffs == 0)
|
201 |
+
|
202 |
+
df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.dropna().unique()])))
|
203 |
+
df['bb'] = df.event_type.isin(['walk','intent_walk'])
|
204 |
+
|
205 |
+
df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32)
|
206 |
+
df['bb_minus_k'] = df['bb'].astype(np.float32)-df['k'].astype(np.float32)
|
207 |
+
|
208 |
+
df['pa'] = [1 if isinstance(x, str) else 0 for x in df.event_type]
|
209 |
+
df['pitches'] = [1 if x else 0 for x in df.is_pitch]
|
210 |
+
|
211 |
+
|
212 |
+
df.loc[df['launch_speed'].isna(),'barrel'] = np.nan
|
213 |
+
|
214 |
+
|
215 |
+
pitch_cat = {'FA':'Fastball',
|
216 |
+
'FF':'Fastball',
|
217 |
+
'FT':'Fastball',
|
218 |
+
'FC':'Fastball',
|
219 |
+
'FS':'Off-Speed',
|
220 |
+
'FO':'Off-Speed',
|
221 |
+
'SI':'Fastball',
|
222 |
+
'ST':'Breaking',
|
223 |
+
'SL':'Breaking',
|
224 |
+
'CU':'Breaking',
|
225 |
+
'KC':'Breaking',
|
226 |
+
'SC':'Off-Speed',
|
227 |
+
'GY':'Off-Speed',
|
228 |
+
'SV':'Breaking',
|
229 |
+
'CS':'Breaking',
|
230 |
+
'CH':'Off-Speed',
|
231 |
+
'KN':'Off-Speed',
|
232 |
+
'EP':'Breaking',
|
233 |
+
'UN':np.nan,
|
234 |
+
'IN':np.nan,
|
235 |
+
'PO':np.nan,
|
236 |
+
'AB':np.nan,
|
237 |
+
'AS':np.nan,
|
238 |
+
'NP':np.nan}
|
239 |
+
#df['pitch_type'] = df['pitch_type'].map(pitch_cat).fillna('Unknown')
|
240 |
+
df['average'] = 'average'
|
241 |
+
|
242 |
+
df.loc[df['trajectory'] == 'bunt_popup','trajectory'] = 'popup'
|
243 |
+
df.loc[df['trajectory'] == 'bunt_grounder','trajectory'] = 'ground_ball'
|
244 |
+
df.loc[df['trajectory'] == '','trajectory'] = np.nan
|
245 |
+
df.loc[df['trajectory'] == 'bunt_line_drive','trajectory'] = 'line_drive'
|
246 |
+
df[['trajectory_fly_ball','trajectory_ground_ball','trajectory_line_drive','trajectory_popup']] = pd.get_dummies(df['trajectory'], prefix='trajectory')
|
247 |
+
|
248 |
+
df['attack_zone'] = np.nan
|
249 |
+
|
250 |
+
|
251 |
+
|
252 |
+
df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0,'attack_zone'] = attack_zone_model.predict(df.loc[df[['px','pz','sz_top','sz_bot']].isnull().sum(axis=1)==0][['px','pz','sz_top','sz_bot']])
|
253 |
+
|
254 |
+
|
255 |
+
|
256 |
+
df['heart'] = df['attack_zone'] == 0
|
257 |
+
df['shadow'] = df['attack_zone'] == 1
|
258 |
+
df['chase'] = df['attack_zone'] == 2
|
259 |
+
df['waste'] = df['attack_zone'] == 3
|
260 |
+
|
261 |
+
df['heart_swing'] = (df['attack_zone'] == 0)&(df['swings']==1)
|
262 |
+
df['shadow_swing'] = (df['attack_zone'] == 1)&(df['swings']==1)
|
263 |
+
df['chase_swing'] = (df['attack_zone'] == 2)&(df['swings']==1)
|
264 |
+
df['waste_swing'] = (df['attack_zone'] == 3)&(df['swings']==1)
|
265 |
+
|
266 |
+
df['heart_whiff'] = (df['attack_zone'] == 0)&(df['whiffs']==1)
|
267 |
+
df['shadow_whiff'] = (df['attack_zone'] == 1)&(df['whiffs']==1)
|
268 |
+
df['chase_whiff'] = (df['attack_zone'] == 2)&(df['whiffs']==1)
|
269 |
+
df['waste_whiff'] = (df['attack_zone'] == 3)&(df['whiffs']==1)
|
270 |
+
|
271 |
+
df['woba_pred'] = np.nan
|
272 |
+
df['woba_pred_contact'] = np.nan
|
273 |
+
|
274 |
+
if len(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'woba_pred']) > 0:
|
275 |
+
|
276 |
+
|
277 |
+
df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'woba_pred'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
|
278 |
+
|
279 |
+
## Assign a value of 0.696 to every walk in the dataset
|
280 |
+
df.loc[df['event_type'].isin(['walk']),'woba_pred'] = 0.696
|
281 |
+
|
282 |
+
## Assign a value of 0.726 to every hit by pitch in the dataset
|
283 |
+
df.loc[df['event_type'].isin(['hit_by_pitch']),'woba_pred'] = 0.726
|
284 |
+
|
285 |
+
## Assign a value of 0 to every Strikeout in the dataset
|
286 |
+
df.loc[df['event_type'].isin(['strikeout','strikeout_double_play']),'woba_pred'] = 0
|
287 |
+
|
288 |
+
|
289 |
+
df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0,'woba_pred_contact'] = [sum(x) for x in xwoba_model.predict_proba(df.loc[df[['launch_angle','launch_speed']].isnull().sum(axis=1)==0][['launch_angle','launch_speed']]) * ([0, 0.883,1.244,1.569,2.004])]
|
290 |
+
|
291 |
+
|
292 |
+
return df
|
293 |
+
|
294 |
+
def df_update_summ(df=pd.DataFrame()):
|
295 |
+
df_summ = df.groupby(['pitcher_id','pitcher_name']).agg(
|
296 |
+
pa = ('pa','sum'),
|
297 |
+
ab = ('ab','sum'),
|
298 |
+
obp_pa = ('obp','sum'),
|
299 |
+
hits = ('hits','sum'),
|
300 |
+
on_base = ('on_base','sum'),
|
301 |
+
k = ('k','sum'),
|
302 |
+
bb = ('bb','sum'),
|
303 |
+
bb_minus_k = ('bb_minus_k','sum'),
|
304 |
+
csw = ('csw','sum'),
|
305 |
+
bip = ('bip','sum'),
|
306 |
+
bip_div = ('bip_div','sum'),
|
307 |
+
tb = ('tb','sum'),
|
308 |
+
woba = ('woba','sum'),
|
309 |
+
woba_contact = ('woba_contact','sum'),
|
310 |
+
xwoba = ('woba_pred','sum'),
|
311 |
+
xwoba_contact = ('woba_pred_contact','sum'),
|
312 |
+
woba_codes = ('woba_codes','sum'),
|
313 |
+
hard_hit = ('hard_hit','sum'),
|
314 |
+
barrel = ('barrel','sum'),
|
315 |
+
sweet_spot = ('sweet_spot','sum'),
|
316 |
+
max_launch_speed = ('launch_speed','max'),
|
317 |
+
launch_speed_90 = ('launch_speed',percentile(90)),
|
318 |
+
launch_speed = ('launch_speed','mean'),
|
319 |
+
launch_angle = ('launch_angle','mean'),
|
320 |
+
pitches = ('is_pitch','sum'),
|
321 |
+
swings = ('swings','sum'),
|
322 |
+
in_zone = ('in_zone','sum'),
|
323 |
+
out_zone = ('out_zone','sum'),
|
324 |
+
whiffs = ('whiffs','sum'),
|
325 |
+
zone_swing = ('zone_swing','sum'),
|
326 |
+
zone_contact = ('zone_contact','sum'),
|
327 |
+
ozone_swing = ('ozone_swing','sum'),
|
328 |
+
ozone_contact = ('ozone_contact','sum'),
|
329 |
+
ground_ball = ('trajectory_ground_ball','sum'),
|
330 |
+
line_drive = ('trajectory_line_drive','sum'),
|
331 |
+
fly_ball =('trajectory_fly_ball','sum'),
|
332 |
+
pop_up = ('trajectory_popup','sum'),
|
333 |
+
attack_zone = ('attack_zone','count'),
|
334 |
+
heart = ('heart','sum'),
|
335 |
+
shadow = ('shadow','sum'),
|
336 |
+
chase = ('chase','sum'),
|
337 |
+
waste = ('waste','sum'),
|
338 |
+
heart_swing = ('heart_swing','sum'),
|
339 |
+
shadow_swing = ('shadow_swing','sum'),
|
340 |
+
chase_swing = ('chase_swing','sum'),
|
341 |
+
waste_swing = ('waste_swing','sum'),
|
342 |
+
).reset_index()
|
343 |
+
return df_summ
|
344 |
+
|
345 |
+
def df_update_summ_avg(df=pd.DataFrame()):
|
346 |
+
df_summ_avg = df.groupby(['average']).agg(
|
347 |
+
|
348 |
+
).reset_index()
|
349 |
+
return df_summ_avg
|
350 |
+
|
351 |
+
def df_summ_changes(df_summ=pd.DataFrame()):
|
352 |
+
df_summ['avg'] = [df_summ.hits[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
|
353 |
+
df_summ['obp'] = [df_summ.on_base[x]/df_summ.obp_pa[x] if df_summ.obp_pa[x] != 0 else np.nan for x in range(len(df_summ))]
|
354 |
+
df_summ['slg'] = [df_summ.tb[x]/df_summ.ab[x] if df_summ.ab[x] != 0 else np.nan for x in range(len(df_summ))]
|
355 |
+
|
356 |
+
df_summ['ops'] = df_summ['obp']+df_summ['slg']
|
357 |
+
|
358 |
+
df_summ['k_percent'] = [df_summ.k[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
|
359 |
+
df_summ['bb_percent'] =[df_summ.bb[x]/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
|
360 |
+
df_summ['bb_minus_k_percent'] =[(df_summ.bb_minus_k[x])/df_summ.pa[x] if df_summ.pa[x] != 0 else np.nan for x in range(len(df_summ))]
|
361 |
+
|
362 |
+
df_summ['bb_over_k_percent'] =[df_summ.bb[x]/df_summ.k[x] if df_summ.k[x] != 0 else np.nan for x in range(len(df_summ))]
|
363 |
+
|
364 |
+
|
365 |
+
|
366 |
+
|
367 |
+
df_summ['csw_percent'] =[df_summ.csw[x]/df_summ.pitches[x] if df_summ.pitches[x] != 0 else np.nan for x in range(len(df_summ))]
|
368 |
+
|
369 |
+
|
370 |
+
df_summ['sweet_spot_percent'] = [df_summ.sweet_spot[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
|
371 |
+
|
372 |
+
df_summ['woba_percent'] = [df_summ.woba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
|
373 |
+
df_summ['woba_percent_contact'] = [df_summ.woba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
|
374 |
+
#df_summ['hard_hit_percent'] = [df_summ.sweet_spot[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
|
375 |
+
df_summ['hard_hit_percent'] = [df_summ.hard_hit[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
|
376 |
+
|
377 |
+
|
378 |
+
df_summ['barrel_percent'] = [df_summ.barrel[x]/df_summ.bip_div[x] if df_summ.bip_div[x] != 0 else np.nan for x in range(len(df_summ))]
|
379 |
+
|
380 |
+
df_summ['zone_contact_percent'] = [df_summ.zone_contact[x]/df_summ.zone_swing[x] if df_summ.zone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
|
381 |
+
|
382 |
+
df_summ['zone_swing_percent'] = [df_summ.zone_swing[x]/df_summ.in_zone[x] if df_summ.in_zone[x] != 0 else np.nan for x in range(len(df_summ))]
|
383 |
+
|
384 |
+
df_summ['zone_percent'] = [df_summ.in_zone[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
|
385 |
+
|
386 |
+
df_summ['chase_percent'] = [df_summ.ozone_swing[x]/(df_summ.pitches[x] - df_summ.in_zone[x]) if (df_summ.pitches[x]- df_summ.in_zone[x]) != 0 else np.nan for x in range(len(df_summ))]
|
387 |
+
|
388 |
+
df_summ['chase_contact'] = [df_summ.ozone_contact[x]/df_summ.ozone_swing[x] if df_summ.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ))]
|
389 |
+
|
390 |
+
df_summ['swing_percent'] = [df_summ.swings[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
|
391 |
+
|
392 |
+
df_summ['whiff_rate'] = [df_summ.whiffs[x]/df_summ.swings[x] if df_summ.swings[x] != 0 else np.nan for x in range(len(df_summ))]
|
393 |
+
|
394 |
+
df_summ['swstr_rate'] = [df_summ.whiffs[x]/df_summ.pitches[x] if df_summ.pitches[x] > 0 else np.nan for x in range(len(df_summ))]
|
395 |
+
|
396 |
+
df_summ['ground_ball_percent'] = [df_summ.ground_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
|
397 |
+
|
398 |
+
df_summ['line_drive_percent'] = [df_summ.line_drive[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
|
399 |
+
|
400 |
+
df_summ['fly_ball_percent'] = [df_summ.fly_ball[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
|
401 |
+
|
402 |
+
df_summ['pop_up_percent'] = [df_summ.pop_up[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
|
403 |
+
|
404 |
+
|
405 |
+
|
406 |
+
df_summ['heart_zone_percent'] = [df_summ.heart[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
|
407 |
+
|
408 |
+
df_summ['shadow_zone_percent'] = [df_summ.shadow[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
|
409 |
+
|
410 |
+
df_summ['chase_zone_percent'] = [df_summ.chase[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
|
411 |
+
|
412 |
+
df_summ['waste_zone_percent'] = [df_summ.waste[x]/df_summ.attack_zone[x] if df_summ.attack_zone[x] != 0 else np.nan for x in range(len(df_summ))]
|
413 |
+
|
414 |
+
|
415 |
+
df_summ['heart_zone_swing_percent'] = [df_summ.heart_swing[x]/df_summ.heart[x] if df_summ.heart[x] != 0 else np.nan for x in range(len(df_summ))]
|
416 |
+
|
417 |
+
df_summ['shadow_zone_swing_percent'] = [df_summ.shadow_swing[x]/df_summ.shadow[x] if df_summ.shadow[x] != 0 else np.nan for x in range(len(df_summ))]
|
418 |
+
|
419 |
+
df_summ['chase_zone_swing_percent'] = [df_summ.chase_swing[x]/df_summ.chase[x] if df_summ.chase[x] != 0 else np.nan for x in range(len(df_summ))]
|
420 |
+
|
421 |
+
df_summ['waste_zone_swing_percent'] = [df_summ.waste_swing[x]/df_summ.waste[x] if df_summ.waste[x] != 0 else np.nan for x in range(len(df_summ))]
|
422 |
+
|
423 |
+
|
424 |
+
|
425 |
+
|
426 |
+
df_summ['xwoba_percent'] = [df_summ.xwoba[x]/df_summ.woba_codes[x] if df_summ.woba_codes[x] != 0 else np.nan for x in range(len(df_summ))]
|
427 |
+
df_summ['xwoba_percent_contact'] = [df_summ.xwoba_contact[x]/df_summ.bip[x] if df_summ.bip[x] != 0 else np.nan for x in range(len(df_summ))]
|
428 |
+
|
429 |
+
df_summ = df_summ.dropna(subset=['bip'])
|
430 |
+
return df_summ
|
431 |
+
|
432 |
+
def df_summ_filter_out(df_summ=pd.DataFrame(),batter_select = 0):
|
433 |
+
df_summ_filter = df_summ[df_summ['pa'] >= min(math.floor(df_summ.xs(batter_select,level=0)['pa']/10)*10,500)]
|
434 |
+
df_summ_filter_pct = df_summ_filter.rank(pct=True,ascending=True)
|
435 |
+
df_summ_player = df_summ.xs(batter_select,level=0)
|
436 |
+
df_summ_player_pct = df_summ_filter_pct.xs(batter_select,level=0)
|
437 |
+
return df_summ_filter,df_summ_filter_pct,df_summ_player,df_summ_player_pct
|
438 |
+
|
439 |
+
def df_summ_batter_pitch_up(df=pd.DataFrame()):
|
440 |
+
df_summ_batter_pitch = df.dropna(subset=['pitch_type']).groupby(['pitcher_id','pitcher_name','pitch_type']).agg(
|
441 |
+
pa = ('pa','sum'),
|
442 |
+
ab = ('ab','sum'),
|
443 |
+
obp_pa = ('obp','sum'),
|
444 |
+
hits = ('hits','sum'),
|
445 |
+
on_base = ('on_base','sum'),
|
446 |
+
k = ('k','sum'),
|
447 |
+
bb = ('bb','sum'),
|
448 |
+
bb_minus_k = ('bb_minus_k','sum'),
|
449 |
+
csw = ('csw','sum'),
|
450 |
+
bip = ('bip','sum'),
|
451 |
+
bip_div = ('bip_div','sum'),
|
452 |
+
tb = ('tb','sum'),
|
453 |
+
woba = ('woba','sum'),
|
454 |
+
woba_contact = ('woba_pred_contact','sum'),
|
455 |
+
xwoba = ('woba_pred','sum'),
|
456 |
+
xwoba_contact = ('woba_pred','sum'),
|
457 |
+
woba_codes = ('woba_codes','sum'),
|
458 |
+
hard_hit = ('hard_hit','sum'),
|
459 |
+
barrel = ('barrel','sum'),
|
460 |
+
sweet_spot = ('sweet_spot','sum'),
|
461 |
+
max_launch_speed = ('launch_speed','max'),
|
462 |
+
launch_speed_90 = ('launch_speed',percentile(90)),
|
463 |
+
launch_speed = ('launch_speed','mean'),
|
464 |
+
launch_angle = ('launch_angle','mean'),
|
465 |
+
pitches = ('is_pitch','sum'),
|
466 |
+
swings = ('swings','sum'),
|
467 |
+
in_zone = ('in_zone','sum'),
|
468 |
+
out_zone = ('out_zone','sum'),
|
469 |
+
whiffs = ('whiffs','sum'),
|
470 |
+
zone_swing = ('zone_swing','sum'),
|
471 |
+
zone_contact = ('zone_contact','sum'),
|
472 |
+
ozone_swing = ('ozone_swing','sum'),
|
473 |
+
ozone_contact = ('ozone_contact','sum'),
|
474 |
+
ground_ball = ('trajectory_ground_ball','sum'),
|
475 |
+
line_drive = ('trajectory_line_drive','sum'),
|
476 |
+
fly_ball =('trajectory_fly_ball','sum'),
|
477 |
+
pop_up = ('trajectory_popup','sum'),
|
478 |
+
attack_zone = ('attack_zone','count'),
|
479 |
+
heart = ('heart','sum'),
|
480 |
+
shadow = ('shadow','sum'),
|
481 |
+
chase = ('chase','sum'),
|
482 |
+
waste = ('waste','sum'),
|
483 |
+
heart_swing = ('heart_swing','sum'),
|
484 |
+
shadow_swing = ('shadow_swing','sum'),
|
485 |
+
chase_swing = ('chase_swing','sum'),
|
486 |
+
waste_swing = ('waste_swing','sum'),
|
487 |
+
).reset_index()
|
488 |
+
|
489 |
+
#return df_summ_batter_pitch
|
490 |
+
df_summ_batter_pitch['avg'] = [df_summ_batter_pitch.hits[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
491 |
+
df_summ_batter_pitch['obp'] = [df_summ_batter_pitch.on_base[x]/df_summ_batter_pitch.obp_pa[x] if df_summ_batter_pitch.obp_pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
492 |
+
df_summ_batter_pitch['slg'] = [df_summ_batter_pitch.tb[x]/df_summ_batter_pitch.ab[x] if df_summ_batter_pitch.ab[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
493 |
+
|
494 |
+
df_summ_batter_pitch['ops'] = df_summ_batter_pitch['obp']+df_summ_batter_pitch['slg']
|
495 |
+
|
496 |
+
df_summ_batter_pitch['k_percent'] = [df_summ_batter_pitch.k[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
497 |
+
df_summ_batter_pitch['bb_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
498 |
+
df_summ_batter_pitch['bb_minus_k_percent'] =[(df_summ_batter_pitch.bb_minus_k[x])/df_summ_batter_pitch.pa[x] if df_summ_batter_pitch.pa[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
499 |
+
|
500 |
+
df_summ_batter_pitch['bb_over_k_percent'] =[df_summ_batter_pitch.bb[x]/df_summ_batter_pitch.k[x] if df_summ_batter_pitch.k[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
501 |
+
|
502 |
+
|
503 |
+
|
504 |
+
|
505 |
+
df_summ_batter_pitch['csw_percent'] =[df_summ_batter_pitch.csw[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
506 |
+
|
507 |
+
|
508 |
+
df_summ_batter_pitch['sweet_spot_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
509 |
+
|
510 |
+
df_summ_batter_pitch['woba_percent'] = [df_summ_batter_pitch.woba[x]/df_summ_batter_pitch.woba_codes[x] if df_summ_batter_pitch.woba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
511 |
+
df_summ_batter_pitch['woba_percent_contact'] = [df_summ_batter_pitch.woba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
512 |
+
#df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.sweet_spot[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
513 |
+
df_summ_batter_pitch['hard_hit_percent'] = [df_summ_batter_pitch.hard_hit[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
514 |
+
|
515 |
+
|
516 |
+
df_summ_batter_pitch['barrel_percent'] = [df_summ_batter_pitch.barrel[x]/df_summ_batter_pitch.bip_div[x] if df_summ_batter_pitch.bip_div[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
517 |
+
|
518 |
+
df_summ_batter_pitch['zone_contact_percent'] = [df_summ_batter_pitch.zone_contact[x]/df_summ_batter_pitch.zone_swing[x] if df_summ_batter_pitch.zone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
519 |
+
|
520 |
+
df_summ_batter_pitch['zone_swing_percent'] = [df_summ_batter_pitch.zone_swing[x]/df_summ_batter_pitch.in_zone[x] if df_summ_batter_pitch.in_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
521 |
+
|
522 |
+
df_summ_batter_pitch['zone_percent'] = [df_summ_batter_pitch.in_zone[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
523 |
+
|
524 |
+
df_summ_batter_pitch['chase_percent'] = [df_summ_batter_pitch.ozone_swing[x]/(df_summ_batter_pitch.pitches[x] - df_summ_batter_pitch.in_zone[x]) if (df_summ_batter_pitch.pitches[x]- df_summ_batter_pitch.in_zone[x]) != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
525 |
+
|
526 |
+
df_summ_batter_pitch['chase_contact'] = [df_summ_batter_pitch.ozone_contact[x]/df_summ_batter_pitch.ozone_swing[x] if df_summ_batter_pitch.ozone_swing[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
527 |
+
|
528 |
+
df_summ_batter_pitch['swing_percent'] = [df_summ_batter_pitch.swings[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
529 |
+
|
530 |
+
df_summ_batter_pitch['whiff_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.swings[x] if df_summ_batter_pitch.swings[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
531 |
+
|
532 |
+
df_summ_batter_pitch['swstr_rate'] = [df_summ_batter_pitch.whiffs[x]/df_summ_batter_pitch.pitches[x] if df_summ_batter_pitch.pitches[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
533 |
+
|
534 |
+
df_summ_batter_pitch['heart_zone_percent'] = [df_summ_batter_pitch.heart[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
535 |
+
|
536 |
+
df_summ_batter_pitch['shadow_zone_percent'] = [df_summ_batter_pitch.shadow[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
537 |
+
|
538 |
+
df_summ_batter_pitch['chase_zone_percent'] = [df_summ_batter_pitch.chase[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
539 |
+
|
540 |
+
df_summ_batter_pitch['waste_zone_percent'] = [df_summ_batter_pitch.waste[x]/df_summ_batter_pitch.attack_zone[x] if df_summ_batter_pitch.attack_zone[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
541 |
+
|
542 |
+
|
543 |
+
df_summ_batter_pitch['heart_zone_swing_percent'] = [df_summ_batter_pitch.heart_swing[x]/df_summ_batter_pitch.heart[x] if df_summ_batter_pitch.heart[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
544 |
+
|
545 |
+
df_summ_batter_pitch['shadow_zone_swing_percent'] = [df_summ_batter_pitch.shadow_swing[x]/df_summ_batter_pitch.shadow[x] if df_summ_batter_pitch.shadow[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
546 |
+
|
547 |
+
df_summ_batter_pitch['chase_zone_swing_percent'] = [df_summ_batter_pitch.chase_swing[x]/df_summ_batter_pitch.chase[x] if df_summ_batter_pitch.chase[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
548 |
+
|
549 |
+
df_summ_batter_pitch['waste_zone_swing_percent'] = [df_summ_batter_pitch.waste_swing[x]/df_summ_batter_pitch.waste[x] if df_summ_batter_pitch.waste[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
550 |
+
|
551 |
+
|
552 |
+
|
553 |
+
|
554 |
+
df_summ_batter_pitch['xwoba_percent'] = [df_summ_batter_pitch.xwoba[x]/df_summ_batter_pitch.woba_codes[x] if df_summ_batter_pitch.woba_codes[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
555 |
+
df_summ_batter_pitch['xwoba_percent_contact'] = [df_summ_batter_pitch.xwoba_contact[x]/df_summ_batter_pitch.bip[x] if df_summ_batter_pitch.bip[x] != 0 else np.nan for x in range(len(df_summ_batter_pitch))]
|
556 |
+
|
557 |
+
|
558 |
+
|
559 |
+
|
560 |
+
df_summ_batter_pitch['bip'] = df_summ_batter_pitch['bip'].fillna(0)
|
561 |
+
|
562 |
+
return df_summ_batter_pitch
|
right.png
ADDED
![]() |