2024_rolling_pitcher / rolling_batter_functions.py
nesticot's picture
Upload 2 files
0fb0e06 verified
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib
from matplotlib.ticker import MaxNLocator
from matplotlib.gridspec import GridSpec
from scipy.stats import zscore
import math
import matplotlib
from adjustText import adjust_text
import matplotlib.ticker as mtick
import pandas as pd
from matplotlib.pyplot import text
import inflect
colour_palette = ['#FFB000','#648FFF','#785EF0',
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
plot_dict = {
'k':{'x_axis':'Plate Appearances','y_axis':'K%','title':'K%','x_value':'k','x_range':[0.0,0.1,0.2,0.3,0.4],'percent':True,'percentile_label':'k_percent','flip_p':True,'percentile':False,'avg_adjust':False},
'bb':{'x_axis':'Plate Appearances','y_axis':'BB%','title':'BB%','x_value':'bb','x_range':[0.0,0.1,0.2,0.3],'percent':True,'percentile_label':'bb_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'bb_minus_k':{'x_axis':'Plate Appearances','y_axis':'BB-K%','title':'BB-K%','x_value':'bb_minus_k','x_range':[-0.3,-0.2,-0.1,0,0.1,0.2],'percent':True,'percentile_label':'bb_minus_k_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'csw':{'x_axis':'Pitches','y_axis':'CSW%','title':'CSW%','x_value':'csw','x_range':[.2,.25,.3,.35,.4],'percent':True,'percentile_label':'csw_percent','flip_p':True,'percentile':False,'avg_adjust':False},
'woba':{'x_axis':'wOBA PA','y_axis':'wOBA','title':'wOBA','x_value':'woba','x_range':[.20,.30,.40,.50],'percent':False,'percentile_label':'woba_percent','flip_p':False,'percentile':False,'avg_adjust':True},
'launch_speed':{'x_axis':'Balls In Play','y_axis':'Exit Velocity','title':'Exit Velocity','x_value':'launch_speed','x_range':[85,90,95,100],'percent':False,'percentile_label':'launch_speed','flip_p':False,'percentile':False,'avg_adjust':False},
'launch_speed_90':{'x_axis':'Balls In Play','y_axis':'90th Percentile Exit Velocity','title':'90th Percentile Exit Velocity','x_value':'launch_speed','x_range':[95,100,105,110,115],'percent':False,'percentile_label':'launch_speed_90','flip_p':False,'percentile':True,'avg_adjust':False},
'hard_hit':{'x_axis':'Balls In Play','y_axis':'HardHit%','title':'HardHit%','x_value':'hard_hit','x_range':[0.2,0.3,0.4,0.5,0.6,0.7],'percent':True,'percentile_label':'hard_hit_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'sweet_spot':{'x_axis':'Balls In Play','y_axis':'SweetSpot%','title':'SweetSpot%','x_value':'sweet_spot','x_range':[0.2,0.3,0.4,0.5],'percent':True,'percentile_label':'sweet_spot_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'launch_angle':{'x_axis':'Balls In Play','y_axis':'Launch Angle','title':'Launch Angle','x_value':'launch_angle','x_range':[-20,-10,0,10,20],'percent':False,'percentile_label':'launch_angle','flip_p':False,'percentile':False,'avg_adjust':False},
'barrel':{'x_axis':'Balls In Play','y_axis':'Barrel%','title':'Barrel%','x_value':'barrel','x_range':[0,0.05,0.10,.15,.20],'percent':True,'percentile_label':'barrel_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'zone_percent':{'x_axis':'Pitches','y_axis':'Zone%','title':'Zone%','x_value':'in_zone','x_range':[0.3,0.4,0.5,0.6,0.7],'percent':True,'percentile_label':'zone_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'swing_percent':{'x_axis':'Pitches','y_axis':'Swing%','title':'Swing%','x_value':'swings','x_range':[0.2,0.3,0.4,0.5,0.6,0.7,0.8],'percent':True,'percentile_label':'swing_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'whiff_percent':{'x_axis':'Swings','y_axis':'Whiff%','title':'Whiff%','x_value':'whiffs','x_range':[0.0,0.1,0.2,0.3,0.4,0.5],'percent':True,'percentile_label':'whiff_rate','flip_p':True,'percentile':False,'avg_adjust':False},
'sw_str':{'x_axis':'Pitches','y_axis':'SwStr%','title':'SwStr%','x_value':'whiffs','x_range':[0.0,0.05,0.1,0.15,0.2,0.25],'percent':True,'percentile_label':'swstr_rate','flip_p':True,'percentile':False,'avg_adjust':False},
'zone_swing':{'x_axis':'In-Zone Pitches','y_axis':'Z-Swing%','title':'Z-Swing%','x_value':'zone_swing','x_range':[0.3,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1],'percent':True,'percentile_label':'zone_swing_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'zone_contact':{'x_axis':'In-Zone Swings','y_axis':'Z-Contact%','title':'Z-Contact%','x_value':'zone_contact','x_range':[0.5,0.6,0.7,0.8,0.9,1],'percent':True,'percentile_label':'zone_contact_percent','flip_p':False,'percentile':False,'avg_adjust':False},
'chase_percent':{'x_axis':'Out-of-Zone Pitches','y_axis':'O-Swing%','title':'O-Swing%','x_value':'ozone_swing','x_range':[0.0,0.1,0.2,0.3,0.4,0.5],'percent':True,'percentile_label':'chase_percent','flip_p':True,'percentile':False,'avg_adjust':False},
'chase_contact':{'x_axis':'Out-of-Zone Swings','y_axis':'O-Contact%','title':'O-Contact%','x_value':'ozone_contact','x_range':[0.2,0.3,0.4,0.5,0.6,0.7,0.8],'percent':True,'percentile_label':'chase_contact','flip_p':False,'percentile':False,'avg_adjust':False},}
level_dict = {'MLB':'MLB','AAA':'AAA','AA':'AA','A+':'A+','A':'A'}
woba_list = ['woba']
pa_list = ['k','bb','bb_minus_k']
balls_in_play_list = ['hard_hit','launch_speed','launch_speed_90','launch_angle','barrel','sweet_spot']
pitches_list = ['zone_percent','swing_percent','sw_str','csw']
swings_list = ['whiff_percent']
in_zone_pitches_list = ['zone_swing']
in_zone_swings_list = ['zone_contact']
out_zone_pitches_list = ['chase_percent']
out_zone_swings_list = ['chase_contact']
plot_dict_small = {
'k':'K%',
'bb':'BB%',
'bb_minus_k':'BB-K%',
'csw':'CSW%',
'woba':'wOBA',
'launch_speed':'Exit Velocity',
'launch_speed_90':'90th Percentile Exit Velocity',
'hard_hit':'HardHit%',
'sweet_spot':'SweetSpot%',
'launch_angle':'Launch Angle',
'zone_percent':'Zone%',
'barrel':'Barrel%',
'swing_percent':'Swing%',
'whiff_percent':'Whiff%',
'sw_str':'SwStr%',
'zone_swing':'Z-Swing%',
'zone_contact':'Z-Contact%',
'chase_percent':'O-Swing%',
'chase_contact':'O-Contact%',}
def rolling_plot(df,df_summ,player_id,stat_id,batter_dict,window_select,level_id):
season_title = df['game_date'].str[0:4].values[0]
sns.set_theme(style="whitegrid", palette="pastel")
if player_id == "":
fig = plt.figure(figsize=(12, 12))
fig.text(s='Please Select a Pitcher',x=0.5,y=0.5)
return
swing_min = int(window_select)
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
fig.set_facecolor('white')
#ax.set_facecolor('white')
#fig.patch.set_facecolor('lightblue')
print(stat_id)
if stat_id in pa_list:
print('we hAVE MADE IT TO THIS PART OF THE CODE')
if stat_id in pa_list:
elly_zone_df = df[(df.pa==1)&(df.batter_id == int(player_id))&(df.level==level_id)]
divisor_x = 'pa'
print('this is short')
print(elly_zone_df)
if stat_id in balls_in_play_list:
elly_zone_df = df[(df.bip)&(df.batter_id == int(player_id))&(df.level==level_id)]
divisor_x = 'bip'
#print('this is short')
if stat_id in balls_in_play_list:
elly_zone_df = df[(df.bip)&(df.batter_id == int(player_id))&(df.level==level_id)]
divisor_x = 'bip'
print('this is short')
if stat_id in pitches_list:
elly_zone_df = df[(df.pitches == 1)&(df.batter_id == int(player_id))&(df.level==level_id)]
divisor_x = 'pitches'
if stat_id in swings_list:
elly_zone_df = df[(df.swings == 1)&(df.batter_id == int(player_id))&(df.level==level_id)]
divisor_x = 'swings'
if stat_id in in_zone_pitches_list:
elly_zone_df = df[(df.in_zone)&(df.batter_id == int(player_id))&(df.level==level_id)]
divisor_x = 'in_zone'
if stat_id in in_zone_swings_list:
elly_zone_df = df[(df.zone_swing)&(df.batter_id == int(player_id))&(df.level==level_id)]
divisor_x = 'zone_swing'
if stat_id in out_zone_pitches_list:
elly_zone_df = df[(df.in_zone == False)&(df.batter_id == int(player_id))&(df.level==level_id)]
divisor_x = 'out_zone'
if stat_id in out_zone_swings_list:
elly_zone_df = df[(df.ozone_swing)&(df.batter_id == int(player_id))&(df.level==level_id)]
divisor_x = 'ozone_swing'
if stat_id in woba_list:
elly_zone_df = df[(df.woba_codes==1)&(df.batter_id == int(player_id))&(df.level==level_id)]
divisor_x = 'woba_codes'
# penguins = sns.load_dataset("penguins")
# sns.histplot(data=penguins, x="flipper_length_mm")
# print('we made it here:')
# print(int(player_id))
# print(stat_id)
# print(level_id)
# print(df[(df.batter_id == int(player_id))&(df.level==level_id)])
# print(df.columns)
# print(elly_zone_df[plot_dict[stat_id]["x_value"]].sum())
df_summ_new = df_summ.copy()
df_summ_new = df_summ_new.set_index('batter_id','batter_name','level')
df_summ_new = df_summ_new[df_summ_new[divisor_x] >= int(window_select)]
df_summ_new = df_summ_new[df_summ_new.level==level_id]
df_summ_rank = df_summ_new.rank(method='max',ascending=False)
df_summ_rank.columns = df_summ_rank.columns+['_rank']
df_summ_rank_percent = df_summ_new.rank(pct=True)
df_summ_rank_percent.columns = df_summ_rank_percent.columns+['_percent']
df_summ_new = df_summ_new.reset_index()
df_summ_rank = df_summ_rank.reset_index()
df_summ_rank_percent = df_summ_rank_percent.reset_index()
print('Table columns:')
df_summ_new.batter_id = df_summ_new.batter_id.astype(int)
df_summ_rank.batter_id = df_summ_rank.batter_id.astype(int)
df_summ_rank_percent.batter_id = df_summ_rank_percent.batter_id.astype(int)
print('Table columns2:')
df_summ_new = df_summ_new.merge(df_summ_rank,left_on=['batter_id'],right_on=['batter_id'],how='left',suffixes=['','_rank'])
df_summ_new = df_summ_new.merge(df_summ_rank_percent,left_on=['batter_id'],right_on=['batter_id'],how='left',suffixes=['','_percent'])
print(df_summ_new)
print(df_summ_rank)
print(df_summ_rank_percent)
#sns.scatterplot(x=data_df.launch_speed_90,y=data_df.zone_contact,color=colour_palette[0],s=75,label=int(player_id))
df_summ_new_select = df_summ_new[df_summ_new.batter_id == int(player_id)].reset_index(drop=True)
print('whiffing')
print(df)
print('Player _df:')
print(df_summ_new_select)
if len(df_summ_new_select) < 1:
ax.text(x=0.5,y=0.5,s='Please Select Different Parameters to Produce a plot',fontsize=18,ha='center')
return
p = inflect.engine()
df_summ_new_select = df_summ_new_select.loc[:,~df_summ_new_select.columns.duplicated(keep='last')].copy()
print('Table for the player:')
print(list(df_summ_new_select.columns))
print(plot_dict[stat_id]["percentile_label"])
print(plot_dict[stat_id]["percentile_label"]+'_percent')
print(df_summ_new_select)
print(1*plot_dict[stat_id]["flip_p"])
print(round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))
print((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100)
# print(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+'_percent'])
if plot_dict[stat_id]['percent']:
label_1=f'{level_id} Average {df[df.level == level_id][plot_dict[stat_id]["x_value"]].sum()/df[df.level == level_id][divisor_x].sum():.1%}'
label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%} ({p.ordinal(abs(int((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)'
#label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%}'
ax.yaxis.set_major_formatter(mtick.PercentFormatter(1))
else:
label_1=f'{level_id} Average {df[df.level == level_id][plot_dict[stat_id]["x_value"]].sum()/df[df.level == level_id][divisor_x].sum():.1f}'
label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1f} ({p.ordinal(abs(int((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)'
#label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1f}'
#ax.yaxis.set_major_formatter(mtick.int)
if plot_dict[stat_id]['percentile']:
label_1=f'{level_id} Average {df[df.level == level_id][plot_dict[stat_id]["x_value"]].quantile(0.9):.1f}'
label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].quantile(0.9):.1f} ({p.ordinal(abs(int((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)'
#label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%}'
#ax.yaxis.set_major_formatter(mtick.int)
if plot_dict[stat_id]['avg_adjust']:
label_1=f'{level_id} Average {df[df.level == level_id][plot_dict[stat_id]["x_value"]].sum()/df[df.level == level_id][divisor_x].sum():.3f}'
label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.3f} ({p.ordinal(abs(int((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)'
#label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%}'
#ax.yaxis.set_major_formatter(mtick.int)
print(plot_dict[stat_id]["x_value"])
print(divisor_x)
# df_summ_new = df_summ.copy()
# df_summ_new = df_summ_new[df_summ_new.balls_in_play >= int(window_select)]
# df_summ_new = df_summ_new[df_summ_new.level==level_id]
print('this is here:')
print(df_summ_new.head())
print(df_summ_new.columns)
if plot_dict[stat_id]["flip_p"] == False:
ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[2],linestyle='dotted',alpha=0.5)
ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.75),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[3],linestyle='dotted',alpha=0.5)
ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.25),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[4],linestyle='dotted',alpha=0.5)
ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.1),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[5],linestyle='dotted',alpha=0.5)
hard_hit_dates = [(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.9),
(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.75),
(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.25),
(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.1)]
hard_hit_text = ['90th %','75th %','25th %','10th %']
for i, x in enumerate(hard_hit_dates):
text(min(window_select+window_select/100,+window_select+1), x ,hard_hit_text[i], rotation=0, ha='left',
bbox=dict(facecolor='white',alpha=0.5, edgecolor=colour_palette[2+i], pad=2))
if plot_dict[stat_id]["flip_p"] == True:
ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.1),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[2],linestyle='dotted',alpha=0.5)
ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.25),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[3],linestyle='dotted',alpha=0.5)
ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.75),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[4],linestyle='dotted',alpha=0.5)
ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[5],linestyle='dotted',alpha=0.5)
hard_hit_dates = [(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.9),
(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.75),
(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.25),
(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.1)]
hard_hit_text = ['10th %','25th %','75th %','90th %']
for i, x in enumerate(hard_hit_dates):
text(min(window_select+window_select/100,window_select+window_select+3), x ,hard_hit_text[i], rotation=0, ha='left',
bbox=dict(facecolor='white',alpha=0.5, edgecolor=colour_palette[2+i], pad=2))
if plot_dict[stat_id]["percentile"] == False:
ax.hlines(y=df[df.level == level_id][plot_dict[stat_id]["x_value"]].sum()/df[df.level == level_id][divisor_x].sum(),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[1],linestyle='-.',label=label_1)
ax.hlines(y=elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum(),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[0],linestyle='--',label=label_2)
sns.lineplot(x=range(1,len(elly_zone_df)+1),y=elly_zone_df[plot_dict[stat_id]["x_value"]].fillna(0).rolling(window=swing_min).sum()/swing_min,color=colour_palette[0],linewidth=3,ax=ax)
if plot_dict[stat_id]["percentile"] == True:
ax.hlines(y=df[df.level == level_id][plot_dict[stat_id]["x_value"]].quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[1],linestyle='-.',label=label_1)
ax.hlines(y=elly_zone_df[plot_dict[stat_id]["x_value"]].fillna(0).quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[0],linestyle='--',label=label_2)
sns.lineplot(x=range(1,len(elly_zone_df)+1),y=elly_zone_df[plot_dict[stat_id]["x_value"]].fillna(0).rolling(window=swing_min).quantile(0.9),color=colour_palette[0],linewidth=3,ax=ax)
#ax.set_xlim(window_select,exit_velo_df_small.pitch.max())
#plt.yticks([0,0.2,0.4,0.6,0.8,1])
#ax.set_ylim(math.floor((min(df_summ.zone_contact)/5)*100)*5/100,1)
ax.set_xlim(math.floor(swing_min),len(elly_zone_df))
ax.set_title(f'{batter_dict[int(player_id)]} - {season_title} - {level_id} - {swing_min} {plot_dict[stat_id]["x_axis"]} Rolling {plot_dict[stat_id]["title"]}', fontsize=16,fontname='Century Gothic',)
#vals = ax.get_yticks()
ax.set_xlabel(plot_dict[stat_id]['x_axis'], fontsize=16,fontname='Century Gothic')
ax.set_ylabel(plot_dict[stat_id]['y_axis'], fontsize=16,fontname='Century Gothic')
#fig.axes[0].invert_yaxis()
#fig.subplots_adjust(wspace=.02, hspace=.02)
#ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
ax.set_yticks(plot_dict[stat_id]["x_range"])
#fig.colorbar(plot_dist, ax=ax)
#fig.colorbar(plot_dist)
#fig.axes[0].invert_yaxis()
ax.legend(fontsize='16')
fig.text(x=0.03,y=0.02,s='By: @TJStats',fontname='Century Gothic')
fig.text(x=1-0.03,y=0.02,s='Data: MLB',ha='right',fontname='Century Gothic')
fig.tight_layout()
return