import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import numpy as np from scipy.stats import gaussian_kde import matplotlib from matplotlib.ticker import MaxNLocator from matplotlib.gridspec import GridSpec from scipy.stats import zscore import math import matplotlib from adjustText import adjust_text import matplotlib.ticker as mtick import pandas as pd from matplotlib.pyplot import text import inflect colour_palette = ['#FFB000','#648FFF','#785EF0', '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] plot_dict = { 'k':{'x_axis':'Plate Appearances','y_axis':'K%','title':'K%','x_value':'k','x_range':[0.0,0.1,0.2,0.3,0.4],'percent':True,'percentile_label':'k_percent','flip_p':True,'percentile':False,'avg_adjust':False}, 'bb':{'x_axis':'Plate Appearances','y_axis':'BB%','title':'BB%','x_value':'bb','x_range':[0.0,0.1,0.2,0.3],'percent':True,'percentile_label':'bb_percent','flip_p':False,'percentile':False,'avg_adjust':False}, 'bb_minus_k':{'x_axis':'Plate Appearances','y_axis':'BB-K%','title':'BB-K%','x_value':'bb_minus_k','x_range':[-0.3,-0.2,-0.1,0,0.1,0.2],'percent':True,'percentile_label':'bb_minus_k_percent','flip_p':False,'percentile':False,'avg_adjust':False}, 'csw':{'x_axis':'Pitches','y_axis':'CSW%','title':'CSW%','x_value':'csw','x_range':[.2,.25,.3,.35,.4],'percent':True,'percentile_label':'csw_percent','flip_p':True,'percentile':False,'avg_adjust':False}, 'woba':{'x_axis':'wOBA PA','y_axis':'wOBA','title':'wOBA','x_value':'woba','x_range':[.20,.30,.40,.50],'percent':False,'percentile_label':'woba_percent','flip_p':False,'percentile':False,'avg_adjust':True}, 'launch_speed':{'x_axis':'Balls In Play','y_axis':'Exit Velocity','title':'Exit Velocity','x_value':'launch_speed','x_range':[85,90,95,100],'percent':False,'percentile_label':'launch_speed','flip_p':False,'percentile':False,'avg_adjust':False}, 'launch_speed_90':{'x_axis':'Balls In Play','y_axis':'90th Percentile Exit Velocity','title':'90th Percentile Exit Velocity','x_value':'launch_speed','x_range':[95,100,105,110,115],'percent':False,'percentile_label':'launch_speed_90','flip_p':False,'percentile':True,'avg_adjust':False}, 'hard_hit':{'x_axis':'Balls In Play','y_axis':'HardHit%','title':'HardHit%','x_value':'hard_hit','x_range':[0.2,0.3,0.4,0.5,0.6,0.7],'percent':True,'percentile_label':'hard_hit_percent','flip_p':False,'percentile':False,'avg_adjust':False}, 'sweet_spot':{'x_axis':'Balls In Play','y_axis':'SweetSpot%','title':'SweetSpot%','x_value':'sweet_spot','x_range':[0.2,0.3,0.4,0.5],'percent':True,'percentile_label':'sweet_spot_percent','flip_p':False,'percentile':False,'avg_adjust':False}, 'launch_angle':{'x_axis':'Balls In Play','y_axis':'Launch Angle','title':'Launch Angle','x_value':'launch_angle','x_range':[-20,-10,0,10,20],'percent':False,'percentile_label':'launch_angle','flip_p':False,'percentile':False,'avg_adjust':False}, 'barrel':{'x_axis':'Balls In Play','y_axis':'Barrel%','title':'Barrel%','x_value':'barrel','x_range':[0,0.05,0.10,.15,.20],'percent':True,'percentile_label':'barrel_percent','flip_p':False,'percentile':False,'avg_adjust':False}, 'zone_percent':{'x_axis':'Pitches','y_axis':'Zone%','title':'Zone%','x_value':'in_zone','x_range':[0.3,0.4,0.5,0.6,0.7],'percent':True,'percentile_label':'zone_percent','flip_p':False,'percentile':False,'avg_adjust':False}, 'swing_percent':{'x_axis':'Pitches','y_axis':'Swing%','title':'Swing%','x_value':'swings','x_range':[0.2,0.3,0.4,0.5,0.6,0.7,0.8],'percent':True,'percentile_label':'swing_percent','flip_p':False,'percentile':False,'avg_adjust':False}, 'whiff_percent':{'x_axis':'Swings','y_axis':'Whiff%','title':'Whiff%','x_value':'whiffs','x_range':[0.0,0.1,0.2,0.3,0.4,0.5],'percent':True,'percentile_label':'whiff_rate','flip_p':True,'percentile':False,'avg_adjust':False}, 'sw_str':{'x_axis':'Pitches','y_axis':'SwStr%','title':'SwStr%','x_value':'whiffs','x_range':[0.0,0.05,0.1,0.15,0.2,0.25],'percent':True,'percentile_label':'swstr_rate','flip_p':True,'percentile':False,'avg_adjust':False}, 'zone_swing':{'x_axis':'In-Zone Pitches','y_axis':'Z-Swing%','title':'Z-Swing%','x_value':'zone_swing','x_range':[0.3,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1],'percent':True,'percentile_label':'zone_swing_percent','flip_p':False,'percentile':False,'avg_adjust':False}, 'zone_contact':{'x_axis':'In-Zone Swings','y_axis':'Z-Contact%','title':'Z-Contact%','x_value':'zone_contact','x_range':[0.5,0.6,0.7,0.8,0.9,1],'percent':True,'percentile_label':'zone_contact_percent','flip_p':False,'percentile':False,'avg_adjust':False}, 'chase_percent':{'x_axis':'Out-of-Zone Pitches','y_axis':'O-Swing%','title':'O-Swing%','x_value':'ozone_swing','x_range':[0.0,0.1,0.2,0.3,0.4,0.5],'percent':True,'percentile_label':'chase_percent','flip_p':True,'percentile':False,'avg_adjust':False}, 'chase_contact':{'x_axis':'Out-of-Zone Swings','y_axis':'O-Contact%','title':'O-Contact%','x_value':'ozone_contact','x_range':[0.2,0.3,0.4,0.5,0.6,0.7,0.8],'percent':True,'percentile_label':'chase_contact','flip_p':False,'percentile':False,'avg_adjust':False},} level_dict = {'MLB':'MLB','AAA':'AAA','AA':'AA','A+':'A+','A':'A'} woba_list = ['woba'] pa_list = ['k','bb','bb_minus_k'] balls_in_play_list = ['hard_hit','launch_speed','launch_speed_90','launch_angle','barrel','sweet_spot'] pitches_list = ['zone_percent','swing_percent','sw_str','csw'] swings_list = ['whiff_percent'] in_zone_pitches_list = ['zone_swing'] in_zone_swings_list = ['zone_contact'] out_zone_pitches_list = ['chase_percent'] out_zone_swings_list = ['chase_contact'] plot_dict_small = { 'k':'K%', 'bb':'BB%', 'bb_minus_k':'BB-K%', 'csw':'CSW%', 'woba':'wOBA', 'launch_speed':'Exit Velocity', 'launch_speed_90':'90th Percentile Exit Velocity', 'hard_hit':'HardHit%', 'sweet_spot':'SweetSpot%', 'launch_angle':'Launch Angle', 'zone_percent':'Zone%', 'barrel':'Barrel%', 'swing_percent':'Swing%', 'whiff_percent':'Whiff%', 'sw_str':'SwStr%', 'zone_swing':'Z-Swing%', 'zone_contact':'Z-Contact%', 'chase_percent':'O-Swing%', 'chase_contact':'O-Contact%',} def rolling_plot(df,df_summ,player_id,stat_id,batter_dict,window_select,level_id): season_title = df['game_date'].str[0:4].values[0] sns.set_theme(style="whitegrid", palette="pastel") if player_id == "": fig = plt.figure(figsize=(12, 12)) fig.text(s='Please Select a Pitcher',x=0.5,y=0.5) return swing_min = int(window_select) fig, ax = plt.subplots(1, 1, figsize=(10, 10)) fig.set_facecolor('white') #ax.set_facecolor('white') #fig.patch.set_facecolor('lightblue') print(stat_id) if stat_id in pa_list: print('we hAVE MADE IT TO THIS PART OF THE CODE') if stat_id in pa_list: elly_zone_df = df[(df.pa==1)&(df.batter_id == int(player_id))&(df.level==level_id)] divisor_x = 'pa' print('this is short') print(elly_zone_df) if stat_id in balls_in_play_list: elly_zone_df = df[(df.bip)&(df.batter_id == int(player_id))&(df.level==level_id)] divisor_x = 'bip' #print('this is short') if stat_id in balls_in_play_list: elly_zone_df = df[(df.bip)&(df.batter_id == int(player_id))&(df.level==level_id)] divisor_x = 'bip' print('this is short') if stat_id in pitches_list: elly_zone_df = df[(df.pitches == 1)&(df.batter_id == int(player_id))&(df.level==level_id)] divisor_x = 'pitches' if stat_id in swings_list: elly_zone_df = df[(df.swings == 1)&(df.batter_id == int(player_id))&(df.level==level_id)] divisor_x = 'swings' if stat_id in in_zone_pitches_list: elly_zone_df = df[(df.in_zone)&(df.batter_id == int(player_id))&(df.level==level_id)] divisor_x = 'in_zone' if stat_id in in_zone_swings_list: elly_zone_df = df[(df.zone_swing)&(df.batter_id == int(player_id))&(df.level==level_id)] divisor_x = 'zone_swing' if stat_id in out_zone_pitches_list: elly_zone_df = df[(df.in_zone == False)&(df.batter_id == int(player_id))&(df.level==level_id)] divisor_x = 'out_zone' if stat_id in out_zone_swings_list: elly_zone_df = df[(df.ozone_swing)&(df.batter_id == int(player_id))&(df.level==level_id)] divisor_x = 'ozone_swing' if stat_id in woba_list: elly_zone_df = df[(df.woba_codes==1)&(df.batter_id == int(player_id))&(df.level==level_id)] divisor_x = 'woba_codes' # penguins = sns.load_dataset("penguins") # sns.histplot(data=penguins, x="flipper_length_mm") # print('we made it here:') # print(int(player_id)) # print(stat_id) # print(level_id) # print(df[(df.batter_id == int(player_id))&(df.level==level_id)]) # print(df.columns) # print(elly_zone_df[plot_dict[stat_id]["x_value"]].sum()) df_summ_new = df_summ.copy() df_summ_new = df_summ_new.set_index('batter_id','batter_name','level') df_summ_new = df_summ_new[df_summ_new[divisor_x] >= int(window_select)] df_summ_new = df_summ_new[df_summ_new.level==level_id] df_summ_rank = df_summ_new.rank(method='max',ascending=False) df_summ_rank.columns = df_summ_rank.columns+['_rank'] df_summ_rank_percent = df_summ_new.rank(pct=True) df_summ_rank_percent.columns = df_summ_rank_percent.columns+['_percent'] df_summ_new = df_summ_new.reset_index() df_summ_rank = df_summ_rank.reset_index() df_summ_rank_percent = df_summ_rank_percent.reset_index() print('Table columns:') df_summ_new.batter_id = df_summ_new.batter_id.astype(int) df_summ_rank.batter_id = df_summ_rank.batter_id.astype(int) df_summ_rank_percent.batter_id = df_summ_rank_percent.batter_id.astype(int) print('Table columns2:') df_summ_new = df_summ_new.merge(df_summ_rank,left_on=['batter_id'],right_on=['batter_id'],how='left',suffixes=['','_rank']) df_summ_new = df_summ_new.merge(df_summ_rank_percent,left_on=['batter_id'],right_on=['batter_id'],how='left',suffixes=['','_percent']) print(df_summ_new) print(df_summ_rank) print(df_summ_rank_percent) #sns.scatterplot(x=data_df.launch_speed_90,y=data_df.zone_contact,color=colour_palette[0],s=75,label=int(player_id)) df_summ_new_select = df_summ_new[df_summ_new.batter_id == int(player_id)].reset_index(drop=True) print('whiffing') print(df) print('Player _df:') print(df_summ_new_select) if len(df_summ_new_select) < 1: ax.text(x=0.5,y=0.5,s='Please Select Different Parameters to Produce a plot',fontsize=18,ha='center') return p = inflect.engine() df_summ_new_select = df_summ_new_select.loc[:,~df_summ_new_select.columns.duplicated(keep='last')].copy() print('Table for the player:') print(list(df_summ_new_select.columns)) print(plot_dict[stat_id]["percentile_label"]) print(plot_dict[stat_id]["percentile_label"]+'_percent') print(df_summ_new_select) print(1*plot_dict[stat_id]["flip_p"]) print(round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2)) print((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100) # print(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+'_percent']) if plot_dict[stat_id]['percent']: label_1=f'{level_id} Average {df[df.level == level_id][plot_dict[stat_id]["x_value"]].sum()/df[df.level == level_id][divisor_x].sum():.1%}' label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%} ({p.ordinal(abs(int((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)' #label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%}' ax.yaxis.set_major_formatter(mtick.PercentFormatter(1)) else: label_1=f'{level_id} Average {df[df.level == level_id][plot_dict[stat_id]["x_value"]].sum()/df[df.level == level_id][divisor_x].sum():.1f}' label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1f} ({p.ordinal(abs(int((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)' #label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1f}' #ax.yaxis.set_major_formatter(mtick.int) if plot_dict[stat_id]['percentile']: label_1=f'{level_id} Average {df[df.level == level_id][plot_dict[stat_id]["x_value"]].quantile(0.9):.1f}' label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].quantile(0.9):.1f} ({p.ordinal(abs(int((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)' #label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%}' #ax.yaxis.set_major_formatter(mtick.int) if plot_dict[stat_id]['avg_adjust']: label_1=f'{level_id} Average {df[df.level == level_id][plot_dict[stat_id]["x_value"]].sum()/df[df.level == level_id][divisor_x].sum():.3f}' label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.3f} ({p.ordinal(abs(int((1*plot_dict[stat_id]["flip_p"]-round(df_summ_new_select[plot_dict[stat_id]["percentile_label"]+"_percent"][0],2))*100)))} Percentile)' #label_2=f'{batter_dict[int(player_id)]} Average {elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum():.1%}' #ax.yaxis.set_major_formatter(mtick.int) print(plot_dict[stat_id]["x_value"]) print(divisor_x) # df_summ_new = df_summ.copy() # df_summ_new = df_summ_new[df_summ_new.balls_in_play >= int(window_select)] # df_summ_new = df_summ_new[df_summ_new.level==level_id] print('this is here:') print(df_summ_new.head()) print(df_summ_new.columns) if plot_dict[stat_id]["flip_p"] == False: ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[2],linestyle='dotted',alpha=0.5) ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.75),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[3],linestyle='dotted',alpha=0.5) ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.25),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[4],linestyle='dotted',alpha=0.5) ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.1),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[5],linestyle='dotted',alpha=0.5) hard_hit_dates = [(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.9), (df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.75), (df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.25), (df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.1)] hard_hit_text = ['90th %','75th %','25th %','10th %'] for i, x in enumerate(hard_hit_dates): text(min(window_select+window_select/100,+window_select+1), x ,hard_hit_text[i], rotation=0, ha='left', bbox=dict(facecolor='white',alpha=0.5, edgecolor=colour_palette[2+i], pad=2)) if plot_dict[stat_id]["flip_p"] == True: ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.1),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[2],linestyle='dotted',alpha=0.5) ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.25),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[3],linestyle='dotted',alpha=0.5) ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.75),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[4],linestyle='dotted',alpha=0.5) ax.hlines(y=(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[5],linestyle='dotted',alpha=0.5) hard_hit_dates = [(df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.9), (df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.75), (df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.25), (df_summ_new[plot_dict[stat_id]["percentile_label"]]).quantile(0.1)] hard_hit_text = ['10th %','25th %','75th %','90th %'] for i, x in enumerate(hard_hit_dates): text(min(window_select+window_select/100,window_select+window_select+3), x ,hard_hit_text[i], rotation=0, ha='left', bbox=dict(facecolor='white',alpha=0.5, edgecolor=colour_palette[2+i], pad=2)) if plot_dict[stat_id]["percentile"] == False: ax.hlines(y=df[df.level == level_id][plot_dict[stat_id]["x_value"]].sum()/df[df.level == level_id][divisor_x].sum(),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[1],linestyle='-.',label=label_1) ax.hlines(y=elly_zone_df[plot_dict[stat_id]["x_value"]].sum()/elly_zone_df[divisor_x].sum(),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[0],linestyle='--',label=label_2) sns.lineplot(x=range(1,len(elly_zone_df)+1),y=elly_zone_df[plot_dict[stat_id]["x_value"]].fillna(0).rolling(window=swing_min).sum()/swing_min,color=colour_palette[0],linewidth=3,ax=ax) if plot_dict[stat_id]["percentile"] == True: ax.hlines(y=df[df.level == level_id][plot_dict[stat_id]["x_value"]].quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[1],linestyle='-.',label=label_1) ax.hlines(y=elly_zone_df[plot_dict[stat_id]["x_value"]].fillna(0).quantile(0.9),xmin=swing_min,xmax=len(elly_zone_df),color=colour_palette[0],linestyle='--',label=label_2) sns.lineplot(x=range(1,len(elly_zone_df)+1),y=elly_zone_df[plot_dict[stat_id]["x_value"]].fillna(0).rolling(window=swing_min).quantile(0.9),color=colour_palette[0],linewidth=3,ax=ax) #ax.set_xlim(window_select,exit_velo_df_small.pitch.max()) #plt.yticks([0,0.2,0.4,0.6,0.8,1]) #ax.set_ylim(math.floor((min(df_summ.zone_contact)/5)*100)*5/100,1) ax.set_xlim(math.floor(swing_min),len(elly_zone_df)) ax.set_title(f'{batter_dict[int(player_id)]} - {season_title} - {level_id} - {swing_min} {plot_dict[stat_id]["x_axis"]} Rolling {plot_dict[stat_id]["title"]}', fontsize=16,fontname='Century Gothic',) #vals = ax.get_yticks() ax.set_xlabel(plot_dict[stat_id]['x_axis'], fontsize=16,fontname='Century Gothic') ax.set_ylabel(plot_dict[stat_id]['y_axis'], fontsize=16,fontname='Century Gothic') #fig.axes[0].invert_yaxis() #fig.subplots_adjust(wspace=.02, hspace=.02) #ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) ax.set_yticks(plot_dict[stat_id]["x_range"]) #fig.colorbar(plot_dist, ax=ax) #fig.colorbar(plot_dist) #fig.axes[0].invert_yaxis() ax.legend(fontsize='16') fig.text(x=0.03,y=0.02,s='By: @TJStats',fontname='Century Gothic') fig.text(x=1-0.03,y=0.02,s='Data: MLB',ha='right',fontname='Century Gothic') fig.tight_layout() return