import pandas as pd import numpy as np import json from matplotlib.ticker import FuncFormatter from matplotlib.ticker import MaxNLocator import math from matplotlib.patches import Ellipse import matplotlib.transforms as transforms import matplotlib.colors import matplotlib.colors as mcolors import seaborn as sns import matplotlib.pyplot as plt import requests font_properties = {'family': 'calibi', 'size': 12} font_properties_titles = {'family': 'calibi', 'size': 20} font_properties_axes = {'family': 'calibi', 'size': 16} colour_palette = ['#FFB000','#648FFF','#785EF0', '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] season_start = '2024-03-20' season_end = '2024-09-29' season_fg=2024 chad_fg = requests.get(f'https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=0&season={season_fg}&season={season_fg}&month=1000&season1={season_fg}&ind=0&pageitems=2000000000&pagenum=1&ind=0&rost=0&players=&type=36&postseason=&sortdir=default&sortstat=sp_pitching').json() cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) chadwick_df_small = pd.DataFrame(data={ 'key_mlbam':[x['xMLBAMID'] for x in chad_fg['data']], 'key_fangraphs':[x['playerid'] for x in chad_fg['data']], 'Name':[x['PlayerName'] for x in chad_fg['data']], }) pitcher_dicts = chadwick_df_small.set_index('key_mlbam')['Name'].sort_values().to_dict() mlb_fg_dicts = chadwick_df_small.set_index('key_mlbam')['key_fangraphs'].sort_values().to_dict() ### DF UPDATE CODE ### def df_update_code(df): print('Starting') #df = pd.read_csv('2024_spring_data.csv',index_col=[0]) print('Starting') df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5 df['t'] = (df['vy_f'] - df['vy0']) / df['ay'] df['vz_f'] = (df['vz0']) + (df['az'] * df['t']) df['vaa'] = -np.arctan(df['vz_f'] / df['vy_f']) * (180 / np.pi) #df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5 #df['t'] = (df['vy_f'] - df['vy0']) / df['ay'] df['vx_f'] = (df['vx0']) + (df['ax'] * df['t']) df['haa'] = -np.arctan(df['vx_f'] / df['vy_f']) * (180 / np.pi) end_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', 'double', 'sac_fly', 'force_out', 'home_run', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'triple', 'sac_bunt', 'double_play', 'intent_walk', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'catcher_interf', 'other_out'] df['pa'] = df.event_type.isin(end_codes) #df['pa'] = 1 df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.fillna('None').unique()]))) df['bb'] = df.event_type.isin(list(filter(None, [x if 'walk' in x else '' for x in df.event_type.fillna('None').unique()]))) df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32) df = df.drop_duplicates(subset=['play_id']) df = df.dropna(subset=['start_speed']) swing_codes = ['Swinging Strike', 'In play, no out', 'Foul', 'In play, out(s)', 'In play, run(s)', 'Swinging Strike (Blocked)', 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout'] swings_in = ['Swinging Strike', 'In play, no out', 'Foul', 'In play, out(s)', 'In play, run(s)', 'Swinging Strike (Blocked)', 'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout'] swing_strike_codes = ['Swinging Strike', 'Swinging Strike (Blocked)','Missed Bunt','Foul Tip','Swinging Pitchout'] contact_codes = ['In play, no out', 'Foul', 'In play, out(s)', 'In play, run(s)', 'Foul Bunt'] codes_in = ['In play, out(s)', 'Swinging Strike', 'Ball', 'Foul', 'In play, no out', 'Called Strike', 'Foul Tip', 'In play, run(s)', 'Hit By Pitch', 'Ball In Dirt', 'Pitchout', 'Swinging Strike (Blocked)', 'Foul Bunt', 'Missed Bunt', 'Foul Pitchout', 'Intent Ball', 'Swinging Pitchout'] df['in_zone'] = df['zone'] < 10 df = df.drop_duplicates(subset=['play_id']) df_codes = df[df.play_description.isin(codes_in)].dropna(subset=['in_zone']) df_codes['bip'] = ~df_codes.launch_speed.isna() conditions = [ (df_codes['launch_speed'].isna()), (df_codes['launch_speed']*1.5 - df_codes['launch_angle'] >= 117 ) & (df_codes['launch_speed'] + df_codes['launch_angle'] >= 124) & (df_codes['launch_speed'] > 98) & (df_codes['launch_angle'] >= 8) & (df_codes['launch_angle'] <= 50) ] choices = [False,True] df_codes['barrel'] = np.select(conditions, choices, default=np.nan) conditions_ss = [ (df_codes['launch_angle'].isna()), (df_codes['launch_angle'] >= 8 ) * (df_codes['launch_angle'] <= 32 ) ] choices_ss = [False,True] df_codes['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan) conditions_hh = [ (df_codes['launch_speed'].isna()), (df_codes['launch_speed'] >= 94.5 ) ] choices_hh = [False,True] df_codes['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan) conditions_tb = [ (df_codes['event_type']=='single'), (df_codes['event_type']=='double'), (df_codes['event_type']=='triple'), (df_codes['event_type']=='home_run'), ] choices_tb = [1,2,3,4] df_codes['tb'] = np.select(conditions_tb, choices_tb, default=np.nan) conditions_woba = [ (df_codes['event_type']=='walk'), (df_codes['event_type']=='hit_by_pitch'), (df_codes['event_type']=='single'), (df_codes['event_type']=='double'), (df_codes['event_type']=='triple'), (df_codes['event_type']=='home_run'), ] choices_woba = [0.705, 0.688, 0.897, 1.233, 1.612, 2.013] df_codes['woba'] = np.select(conditions_woba, choices_woba, default=np.nan) woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', 'double', 'sac_fly', 'force_out', 'home_run', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'triple', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out'] conditions_woba_code = [ (df_codes['event_type'].isin(woba_codes)) ] choices_woba_code = [1] df_codes['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan) #df_codes['barrel'] = (df_codes.launch_speed >= 98) & (df_codes.launch_angle >= (26 - (-98 + df_codes.launch_speed))) & (df_codes.launch_angle <= 30 + (-98 + df_codes.launch_speed)) & (df_codes.launch_angle >= 8) & (df_codes.launch_angle <= 50) #df_codes['barrel'] = (df_codes.launch_speed >= 98) & (df_codes.launch_angle >= (26 - (-98 + df_codes.launch_speed))) & (df_codes.launch_angle <= 30 + (-98 + df_codes.launch_speed)) & (df_codes.launch_angle >= 8) & (df_codes.launch_angle <= 50) df_codes['pitches'] = 1 df_codes['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df_codes.play_code] df_codes['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df_codes.play_code] df_codes['swings'] = [1 if x in swings_in else 0 for x in df_codes.play_description] df_codes['out_zone'] = df_codes.in_zone == False df_codes['zone_swing'] = (df_codes.in_zone == True)&(df_codes.swings == 1) df_codes['zone_contact'] = (df_codes.in_zone == True)&(df_codes.swings == 1)&(df_codes.whiffs == 0) df_codes['ozone_swing'] = (df_codes.in_zone==False)&(df_codes.swings == 1) df_codes['ozone_contact'] = (df_codes.in_zone==False)&(df_codes.swings == 1)&(df_codes.whiffs == 0) return df_codes ### GET COLOURS## def get_color(value,normalize,cmap_sum): color = cmap_sum(normalize(value)) return mcolors.to_hex(color) ### PERCENTILE ### def percentile(n): def percentile_(x): return x.quantile(n) percentile_.__name__ = 'percentile_{:02.0f}'.format(n*100) return percentile_ ### TJ STUFF+ DF CLEAN ### def df_clean(df): df_copy = df.copy() df_copy.loc[df_copy['pitcher_hand'] == 'L','hb'] *= -1 df_copy.loc[df_copy['pitcher_hand'] == 'L','x0'] *= -1 df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] = 360 - df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] df_copy['pitch_l'] = [1 if x == 'L' else 0 for x in df_copy['pitcher_hand']] df_copy['bat_l'] = [1 if x == 'L' else 0 for x in df_copy['batter_hand']] #df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "KN", "CS", "SC", "FA"])].reset_index(drop=True) #df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "CS", "SC", "FA"])].reset_index(drop=True) df_copy['pitch_type'] = df_copy['pitch_type'].replace({'FT':'SI', #'KC':'CU', 'SV':'SL', 'FO':'FS'}) df_copy_fb_sum = df_copy[df_copy.pitch_type.isin(["FF", "FC", "SI"])].groupby(['pitcher_id']).agg( fb_velo = ('start_speed','mean'), fb_max_ivb = ('ivb',percentile(0.9)), fb_max_x = ('hb',percentile(0.9)), fb_min_x = ('hb',percentile(0.1)), fb_max_velo = ('start_speed',percentile(0.9)), fb_axis = ('spin_direction','mean'), ) df_copy = df_copy.merge(df_copy_fb_sum,left_on='pitcher_id',right_index=True,how='left') df_copy['fb_velo_diff'] = df_copy['start_speed']- df_copy['fb_velo'] df_copy['fb_max_ivb_diff'] = df_copy['ivb']- df_copy['fb_max_ivb'] df_copy['fb_max_hb_diff'] = -abs(df_copy['hb']- df_copy['fb_max_x']) df_copy['fb_min_hb_diff'] = df_copy['hb']- df_copy['fb_min_x'] df_copy['fb_max_velo_diff'] = df_copy['start_speed']- df_copy['fb_max_velo'] df_copy['fb_axis_diff'] = df_copy['spin_direction']- df_copy['fb_axis'] # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_velo_diff'] = 0 # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_ivb_diff'] = 0 # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_hb_diff'] = 0 # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_min_hb_diff'] = 0 # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_velo_diff'] = 0 # df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_axis_diff'] = 0 df_copy['max_speed'] = df_copy.groupby(['pitcher_id'])['start_speed'].transform('max') df_copy['max_speed_diff'] = df_copy['start_speed'] - df_copy['max_speed'] df_copy['max_ivb'] = df_copy.groupby(['pitcher_id'])['ivb'].transform('max') df_copy['max_ivb_diff'] = df_copy['ivb'] - df_copy['max_ivb'] df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5 df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay'] df_copy['vz_f'] = (df_copy['vz0']) + (df_copy['az'] * df_copy['t']) df_copy['vaa'] = -np.arctan(df_copy['vz_f'] / df_copy['vy_f']) * (180 / np.pi) #df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5 #df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay'] df_copy['vx_f'] = (df_copy['vx0']) + (df_copy['ax'] * df_copy['t']) df_copy['haa'] = -np.arctan(df_copy['vx_f'] / df_copy['vy_f']) * (180 / np.pi) # df_copy['x_diff'] = df_copy['x0'] - df_copy['px'] # df_copy['z_diff'] = df_copy['z0'] - df_copy['pz'] # df_copy['vaa'] = np.arctan(df_copy['z_diff'] / df_copy['release_pos_y']) * 360 / np.pi # df_copy['haa'] = np.arctan(-df_copy['x_diff'] / df_copy['release_pos_y']) * 360 / np.pi df_copy = df_copy.dropna(subset=['pitch_type']).fillna(0) return df_copy ### PITCH COLOURS ### pitch_colours = { 'Four-Seam Fastball':'#FF007D',#BC136F 'Sinker':'#98165D',#DC267F 'Cutter':'#BE5FA0', 'Changeup':'#F79E70',#F75233 'Splitter':'#FE6100',#F75233 'Screwball':'#F08223', 'Forkball':'#FFB000', 'Slider':'#67E18D',#1BB999#785EF0 'Sweeper':'#1BB999',#37CD85#904039 'Slurve':'#376748',#785EF0#549C07#BEABD8 'Knuckle Curve':'#311D8B', 'Curveball':'#3025CE', 'Slow Curve':'#274BFC', 'Eephus':'#648FFF', 'Knuckleball':'#867A08', 'Pitch Out':'#472C30', 'Other':'#9C8975', } ### PITCH ELLIPSE ### def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs): """ Create a plot of the covariance confidence ellipse of *x* and *y*. Parameters ---------- x, y : array-like, shape (n, ) Input data. ax : matplotlib.axes.Axes The axes object to draw the ellipse into. n_std : float The number of standard deviations to determine the ellipse's radiuses. **kwargs Forwarded to `~matplotlib.patches.Ellipse` Returns ------- matplotlib.patches.Ellipse """ if x.size != y.size: raise ValueError("x and y must be the same size") try: cov = np.cov(x, y) pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) # Using a special case to obtain the eigenvalues of this # two-dimensional dataset. ell_radius_x = np.sqrt(1 + pearson) ell_radius_y = np.sqrt(1 - pearson) ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2, facecolor=facecolor,linewidth=2,linestyle='--', **kwargs) # Calculating the standard deviation of x from # the squareroot of the variance and multiplying # with the given number of standard deviations. scale_x = np.sqrt(cov[0, 0]) * n_std mean_x = np.mean(x) # calculating the standard deviation of y ... scale_y = np.sqrt(cov[1, 1]) * n_std mean_y = np.mean(y) transf = transforms.Affine2D() \ .rotate_deg(45) \ .scale(scale_x, scale_y) \ .translate(mean_x, mean_y) ellipse.set_transform(transf + ax.transData) except ValueError: return return ax.add_patch(ellipse) # DEFINE STRIKE ZONE strike_zone = pd.DataFrame({ 'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9], 'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5] }) ### STRIKE ZONE ### def draw_line(axis,alpha_spot=1,catcher_p = True): axis.plot(strike_zone['PlateLocSide'], strike_zone['PlateLocHeight'], color='black', linewidth=1.3,zorder=3,alpha=alpha_spot,) # ax.plot([-0.2833333, -0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) # ax.plot([0.2833333, 0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) # ax.plot([-0.85, 0.85], [2.2, 2.2], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) # ax.plot([-0.85, 0.85], [2.9, 2.9], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) if catcher_p: # Add dashed line # Add home plate axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1) else: axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([0, 0.9], [-.35, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1) axis.plot([0.9, 0.708], [-0.1,0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1) ### FANGRAPHS STATS DICT ### fangraphs_stats_dict = {'IP':{'table_header':'$\\bf{IP}$','format':'.1f',} , 'TBF':{'table_header':'$\\bf{PA}$','format':'.0f',} , 'AVG':{'table_header':'$\\bf{AVG}$','format':'.3f',} , 'K/9':{'table_header':'$\\bf{K\/9}$','format':'.2f',} , 'BB/9':{'table_header':'$\\bf{BB\/9}$','format':'.2f',} , 'K/BB':{'table_header':'$\\bf{K\/BB}$','format':'.2f',} , 'HR/9':{'table_header':'$\\bf{HR\/9}$','format':'.2f',} , 'K%':{'table_header':'$\\bf{K\%}$','format':'.1%',} , 'BB%':{'table_header':'$\\bf{BB\%}$','format':'.1%',} , 'K-BB%':{'table_header':'$\\bf{K-BB\%}$','format':'.1%',} , 'WHIP':{'table_header':'$\\bf{WHIP}$','format':'.2f',} , 'BABIP':{'table_header':'$\\bf{BABIP}$','format':'.3f',} , 'LOB%':{'table_header':'$\\bf{LOB\%}$','format':'.1%',} , 'xFIP':{'table_header':'$\\bf{xFIP}$','format':'.2f',} , 'FIP':{'table_header':'$\\bf{FIP}$','format':'.2f',} , 'H':{'table_header':'$\\bf{H}$','format':'.0f',} , '2B':{'table_header':'$\\bf{2B}$','format':'.0f',} , '3B':{'table_header':'$\\bf{3B}$','format':'.0f',} , 'R':{'table_header':'$\\bf{R}$','format':'.0f',} , 'ER':{'table_header':'$\\bf{ER}$','format':'.0f',} , 'HR':{'table_header':'$\\bf{HR}$','format':'.0f',} , 'BB':{'table_header':'$\\bf{BB}$','format':'.0f',} , 'IBB':{'table_header':'$\\bf{IBB}$','format':'.0f',} , 'HBP':{'table_header':'$\\bf{HBP}$','format':'.0f',} , 'SO':{'table_header':'$\\bf{SO}$','format':'.0f',} , 'OBP':{'table_header':'$\\bf{OBP}$','format':'.0f',} , 'SLG':{'table_header':'$\\bf{SLG}$','format':'.0f',} , 'ERA':{'table_header':'$\\bf{ERA}$','format':'.2f',} , 'wOBA':{'table_header':'$\\bf{wOBA}$','format':'.3f',} , 'G':{'table_header':'$\\bf{G}$','format':'.0f',} } ## Fangraphs Table ### FANGRAPHS SPLITS SCRAPE ### split_dict = {'all':[], 'left':['5'], 'right':['6'] } def fangraphs_scrape(pitcher_id=808967, split='all', start_date='2024-03-20', end_date='2024-09-29'): url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders" payload = { "strPlayerId": str(mlb_fg_dicts[pitcher_id]), "strSplitArr": split_dict[split], "strGroup": "season", "strPosition": "P", "strType": "2", "strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')), "strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')), "strSplitTeams": False, "dctFilters": [], "strStatType": "player", "strAutoPt": False, "arrPlayerId": [], "strSplitArrPitch": [], "arrWxTemperature": None, "arrWxPressure": None, "arrWxAirDensity": None, "arrWxElevation": None, "arrWxWindSpeed": None } json_payload = json.dumps(payload) headers = {'Content-Type': 'application/json'} response = requests.post(url, data=json_payload, headers=headers) data_pull = response.json()['data'][0] payload_advanced = { "strPlayerId": str(mlb_fg_dicts[pitcher_id]), "strSplitArr": split_dict[split], "strGroup": "season", "strPosition": "P", "strType": "1", "strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')), "strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')), "strSplitTeams": False, "dctFilters": [], "strStatType": "player", "strAutoPt": False, "arrPlayerId": [], "strSplitArrPitch": [], "arrWxTemperature": None, "arrWxPressure": None, "arrWxAirDensity": None, "arrWxElevation": None, "arrWxWindSpeed": None } json_payload_advanced = json.dumps(payload_advanced) headers = {'Content-Type': 'application/json'} response_advanced = requests.post(url, data=json_payload_advanced, headers=headers) data_pull_advanced = response_advanced.json()['data'][0] data_pull.update(data_pull_advanced) return data_pull ### FANGRAPHS TABLE PLOT ### def fangraphs_table(data, stats, ax): fg_values = [data[x] if x in data else '---' for x in stats] df_fg = pd.DataFrame(data=dict(zip(stats,fg_values)),index=[0]) df_fg.loc[0] = [format(df_fg[x][0],fangraphs_stats_dict[x]['format']) if df_fg[x][0] != '---' else '---' for x in df_fg] table_fg = ax.table(cellText=df_fg.values, colLabels=df_fg.columns, cellLoc='center', bbox=[0.04, 0.2, 0.92, 0.8]) min_font_size = 20 table_fg.set_fontsize(min_font_size) new_column_names = [fangraphs_stats_dict[x]['table_header'] if x in data else '---' for x in stats] # #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%'] for i, col_name in enumerate(new_column_names): table_fg.get_celld()[(0, i)].get_text().set_text(col_name) ax.axis('off') return table_fg ### VELOCITY KDES ### def velocity_kdes(df, ax, gs, gs_list, fig): sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False) # Get the list of items ordered from most to least frequent items_in_order = sorted_value_counts.index.tolist() # Create the inner subplot inside the outer subplot import matplotlib.gridspec as gridspec ax.axis ('off') #ax.set_ylabel('Pitch Velocity Distribution', fontdict=font_properties_axes) ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20}) inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order),1, subplot_spec=gs[2,gs_list]) ax_top = [] for inner in inner_grid_1: ax_top.append(fig.add_subplot(inner)) ax_number = 0 for i in items_in_order[0:]: if np.unique(df[df['pitch_type']==i]['start_speed']).size == 1: # Check if all values are the same print('just') ax_top[ax_number].plot([np.unique(df[df['pitch_type']==i]['start_speed']),np.unique(df[df['pitch_type']==i]['start_speed'])],[0,1], linewidth=4, color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]],zorder=20) # ax_top[ax_number].plot(np.unique(df_melt[df_melt['Player']==i]['value']), [0.5]*len(np.unique(df_melt[df_melt['Player']==i]['value'])), linewidth=4) else: sns.kdeplot(df[df['pitch_type']==i]['start_speed'],ax=ax_top[ax_number],fill=True, clip=(df[df['pitch_type']==i]['start_speed'].min(),df[df['pitch_type']==i]['start_speed'].max()), color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]]) ax_top[ax_number].set_xlim(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5) ax_top[ax_number].set_xlabel('') ax_top[ax_number].set_ylabel('') if ax_number < len(items_in_order)-1: ax_top[ax_number].spines['top'].set_visible(False) ax_top[ax_number].spines['right'].set_visible(False) ax_top[ax_number].spines['left'].set_visible(False) ax_top[ax_number].tick_params(axis='x', colors='none') ax_top[ax_number].set_xticks(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5)) ax_top[ax_number].set_yticks([]) ax_top[ax_number].grid(axis='x', linestyle='--') ax_top[ax_number].text(-0.01, 0.5, i, transform=ax_top[ax_number].transAxes, fontsize=14, va='center', ha='right') ax_number = ax_number + 1 ax_top[-1].spines['top'].set_visible(False) ax_top[-1].spines['right'].set_visible(False) ax_top[-1].spines['left'].set_visible(False) ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5))) ax_top[-1].set_xlabel('Velocity (mph)') ### TJ STUFF+ ROLLING ### def tj_stuff_roling(df, window, ax): ## Velocity Plot sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False) # Get the list of items ordered from most to least frequent items_in_order = sorted_value_counts.index.tolist() for i in items_in_order: if max(df[df['pitch_type']==i]['pitch_type_count_each']) >= window: sns.lineplot(x=range(1,max(df[df['pitch_type']==i]['pitch_type_count_each'])+1), y=df[df['pitch_type']==i]['tj_stuff_plus'].rolling(window).sum()/window, color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]], ax=ax,linewidth=3) # Adjust x-axis limits to start from 1 ax.set_xlim(window,max(df['pitch_type_count_each'])) ax.set_ylim(70,130) #ax.get_legend().remove() ax.set_xlabel('Pitches', fontdict=font_properties_axes) ax.set_ylabel('tjStuff+', fontdict=font_properties_axes) ax.set_title(f"{window} Pitch Rolling tjStuff+",fontdict=font_properties_titles) # ax.axis('square') # ax.set_xlim(left=1) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ### BREAK PLOT ### def break_plot(df, ax): label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique() j = 0 for label in label_labels: subset = df[df['pitch_description'] == label] print(label) if len(subset) > 4: if df['pitcher_hand'].values[0] == 'R': subset['hb'] = subset['hb']*1 if df['pitcher_hand'].values[0] == 'L': subset['hb'] = subset['hb']*1 subset['ivb'] = subset['ivb']*1 try: confidence_ellipse(subset['hb'], subset['ivb'], ax=ax,edgecolor = pitch_colours[label],n_std=2,facecolor= pitch_colours[label],alpha=0.2) except ValueError: return j=j+1 else: j=j+1 if df['pitcher_hand'].values[0] == 'R': sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2) if df['pitcher_hand'].values[0] == 'L': sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2) ax.set_xlim((-25,25)) ax.set_ylim((-25,25)) ax.hlines(y=0,xmin=-50,xmax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1) ax.vlines(x=0,ymin=-50,ymax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1) ax.set_xlabel('Horizontal Break (in)', fontdict=font_properties_axes) ax.set_ylabel('Induced Vertical Break (in)', fontdict=font_properties_axes) ax.set_title("Pitch Breaks",fontdict=font_properties_titles) ax.get_legend().remove() # ax1.set_xticklabels(ax1.get_xticks(), fontdict=font_properties) ax.set_xticklabels(ax.get_xticks(), fontdict=font_properties) # ax1.set_yticklabels(ax1.get_yticks(), fontdict=font_properties) ax.set_yticklabels(ax.get_yticks(), fontdict=font_properties) #ax1.set_aspect('equal', adjustable='box') if df['pitcher_hand'].values[0] == 'R': ax.text(-24.5,-24.5,s='← Glove Side',fontstyle='italic',ha='left',va='bottom', bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) ax.text(24.5,-24.5,s='Arm Side →',fontstyle='italic',ha='right',va='bottom', bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) #ax.invert_xaxis() if df['pitcher_hand'].values[0] == 'L': ax.invert_xaxis() ax.text(24.5,-24.5,s='← Arm Side',fontstyle='italic',ha='left',va='bottom', bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) ax.text(-24.5,-24.5,s='Glove Side →',fontstyle='italic',ha='right',va='bottom', bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) ax.set_aspect('equal', adjustable='box') #ax1.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) ### TABLE SUMMARY ### def table_summary(df, pitcher_id, ax, df_group, df_group_all, statcast_pitch_summary): cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) ax.axis('off') df_group['spin_direction_adj'] = [(x + 180) for x in df_group['spin_direction']] #(((df_group.groupby('pitch_description').mean()[['spin_direction_adj']] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 ) clock_time = ((df_group.groupby('pitch_description').mean()['spin_direction_adj']) %360 // 30 )+ (((df_group.groupby('pitch_description').mean()['spin_direction_adj'] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 ) # print('Clocks') # print(clock_time) clock_time = (clock_time.astype(int) + clock_time%1*60/100).round(2).astype(str).str.replace('.',':').str.replace(':0',':00').str.replace(':3',':30').str.replace('0:','12:').str.replace('112:','10:').to_frame() df_group = df_group.merge(right=clock_time,left_on='pitch_description',right_index=True,suffixes=['','_clock']) plot_table = df_group[df_group['pitcher_id']==pitcher_id].sort_values( by=['pitches'],ascending=False)[['pitch_description','pitches','start_speed','ivb', 'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release', 'extension','tj_stuff_plus','spin_direction_adj_clock','zone_percent','chase_percent','whiff_rate']] # if df['pitcher_hand'].values[0] == 'L': # plot_table['hb'] = plot_table['hb']*-1 #if df['pitcher_hand'].values[0] == 'R': plot_table['horizontal_release'] = plot_table['horizontal_release']*-1 plot_table['pitch_percent'] = plot_table['pitches'] / plot_table['pitches'].sum() plot_table = plot_table[['pitch_description','pitches','pitch_percent','start_speed','ivb', 'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release', 'extension','spin_direction_adj_clock','tj_stuff_plus','zone_percent','chase_percent','whiff_rate']] plot_table_all = pd.DataFrame(data={'pitch_description': 'All', 'pitches': plot_table['pitches'].sum(), 'pitch_percent': 1.0, 'start_speed': '—', 'ivb': '—', 'hb': '—', 'spin_rate': '—', 'vaa': '—', 'haa': '—', 'vertical_release': '—', 'horizontal_release': '—', 'extension': df['extension'].mean(), 'spin_direction_adj_clock': '—', 'tj_stuff_plus': df[df['pitcher_id']==pitcher_id]['tj_stuff_plus'].mean(), 'zone_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['zone_percent'].values[0], 'chase_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['chase_percent'].values[0], 'whiff_rate': df_group_all[df_group_all['pitcher_id']==pitcher_id]['whiff_rate'].values[0], },index=[0] ) plot_table = pd.concat([plot_table,plot_table_all]).fillna('—') plt.rcParams['font.family'] = 'Calibri' table = ax.table(cellText=plot_table.values, colLabels=plot_table.columns, cellLoc='center', colWidths=[2.3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], bbox=[0.04, 0, 0.92, 0.8]) min_font_size = 14 # Set table properties table.auto_set_font_size(False) #table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10))) table.set_fontsize(min_font_size) table.scale(1, 0.5) min_font_size = 18 # Set font size for values # Adjust the font size as needed for i in range(len(plot_table)+1): for j in range(len(plot_table.columns)): if i > 0: # Skip the header row cell = table.get_celld()[i, j] cell.set_fontsize(min_font_size) for i in range(len(plot_table)): if table.get_celld()[(i+1, 0)].get_text().get_text() != 'All': table.get_celld()[(i+1, 0)].set_facecolor(pitch_colours[table.get_celld()[(i+1, 0)].get_text().get_text()]) # Header cell color if table.get_celld()[(i+1, 0)].get_text().get_text() in ['Split-Finger','Slider','Changeup']: table.get_celld()[(i+1, 0)].set_text_props(color='#000000',fontweight='bold') else: table.get_celld()[(i+1, 0)].set_text_props(color='#ffffff',fontweight='bold') if table.get_celld()[(i+1, 0)].get_text().get_text() == 'Four-Seam Fastball': table.get_celld()[(i+1, 0)].get_text().set_text('4-Seam') select_df = statcast_pitch_summary[statcast_pitch_summary['pitch_description'] == plot_table['pitch_description'].values[i]] normalize = mcolors.Normalize(vmin=select_df['start_speed'].mean()-select_df.pitch_velocity_std.mean(), vmax=select_df['start_speed'].mean()+select_df.pitch_velocity_std.mean()) # Define the range of values if table.get_celld()[(i+1, 3)].get_text().get_text() != '—': table.get_celld()[(i+1, 3)].set_facecolor(get_color(float(table.get_celld()[(i+1, 3)].get_text().get_text()),normalize,cmap_sum)) # Header cell color cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) normalize = mcolors.Normalize(vmin=select_df['extension'].mean()*0.9, vmax=select_df['extension'].mean()*1.1) if table.get_celld()[(i+1,11)].get_text().get_text() != '—': table.get_celld()[(i+1,11)].set_facecolor(get_color(float(table.get_celld()[(i+1, 11)].get_text().get_text()),normalize,cmap_sum)) # Header cell color cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) normalize = mcolors.Normalize(vmin=80, vmax=120) print(normalize) if table.get_celld()[(i+1,13)].get_text().get_text() != '—': table.get_celld()[(i+1,13)].set_facecolor(get_color(float(table.get_celld()[(i+1, 13)].get_text().get_text()),normalize,cmap_sum)) # Header cell color cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) normalize = mcolors.Normalize(vmin=select_df['zone_percent'].mean()*0.7, vmax=select_df['zone_percent'].mean()*1.3) if table.get_celld()[(i+1,14)].get_text().get_text() != '—': table.get_celld()[(i+1,14)].set_facecolor(get_color(float(table.get_celld()[(i+1, 14)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) normalize = mcolors.Normalize(vmin=select_df['chase_percent'].mean()*0.7, vmax=select_df['chase_percent'].mean()*1.3) if table.get_celld()[(i+1,15)].get_text().get_text() != '—': table.get_celld()[(i+1,15)].set_facecolor(get_color(float(table.get_celld()[(i+1, 15)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) normalize = mcolors.Normalize(vmin=select_df['whiff_rate'].mean()*0.7, vmax=select_df['whiff_rate'].mean()*1.3) if table.get_celld()[(i+1,16)].get_text().get_text() != '—': table.get_celld()[(i+1,16)].set_facecolor(get_color(float(table.get_celld()[(i+1, 16)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color table.get_celld()[(len(plot_table), 0)].set_text_props(color='#000000',fontweight='bold') new_column_names = ['$\\bf{Pitch\ Name}$', '$\\bf{Count}$', '$\\bf{Pitch\%}$', '$\\bf{Velocity}$', '$\\bf{iVB}$', '$\\bf{HB}$', '$\\bf{Spin}$', '$\\bf{VAA}$', '$\\bf{HAA}$', '$\\bf{vRel}$', '$\\bf{hRel}$', '$\\bf{Ext.}$', '$\\bf{Axis}$', '$\\bf{tjStuff+}$', '$\\bf{Zone\%}$', '$\\bf{Chase\%}$', '$\\bf{Whiff\%}$', ] for i, col_name in enumerate(new_column_names): table.get_celld()[(0, i)].get_text().set_text(col_name) float_list = ['start_speed','ivb', 'hb', 'vaa', 'haa', 'vertical_release','horizontal_release', 'extension'] for fl in float_list: # Subset of column names subset_columns = [fl] # Get the list of column indices column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] # # print(column_indices) for row_l in range(1,len(plot_table)+1): # print(row_l) if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': # print() # print(fl) table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) percent_list = ['pitch_percent','zone_percent','chase_percent','whiff_rate'] for fl in percent_list: # Subset of column names subset_columns = [fl] # Get the list of column indices column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] # # print(column_indices) for row_l in range(1,len(plot_table)+1): # print(row_l) if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': # print(fl) table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1%}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) int_list = ['tj_stuff_plus','spin_rate'] for fl in int_list: # Subset of column names subset_columns = [fl] # Get the list of column indices column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] # # print(column_indices) for row_l in range(1,len(plot_table)+1): # print(row_l) if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': # print(fl) table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.0f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) return table ### GROUED IVB CREATION ### def group_ivb_update(df, agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description']): grouped_ivb = df.groupby(agg_list).agg( pitches = ('start_speed','count'), start_speed = ('start_speed','mean'), ivb = ('ivb','mean'), hb = ('hb','mean'), spin_rate = ('spin_rate','mean'), vaa = ('vaa','mean'), haa = ('haa','mean'), horizontal_release = ('x0','mean'), vertical_release = ('z0','mean'), extension = ('extension','mean'), spin_direction = ('spin_direction','mean'), tj_stuff_plus = ('tj_stuff_plus','mean'), swings = ('swings','sum'), in_zone = ('in_zone','sum'), out_zone = ('out_zone','sum'), whiffs = ('whiffs','sum'), zone_swing = ('zone_swing','sum'), zone_contact = ('zone_contact','sum'), ozone_swing = ('ozone_swing','sum'), ozone_contact = ('ozone_contact','sum'), ).reset_index() grouped_ivb['zone_contact_percent'] = [grouped_ivb.zone_contact[x]/grouped_ivb.zone_swing[x] if grouped_ivb.zone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))] grouped_ivb['zone_swing_percent'] = [grouped_ivb.zone_swing[x]/grouped_ivb.in_zone[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] grouped_ivb['zone_percent'] = [grouped_ivb.in_zone[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] grouped_ivb['chase_percent'] = [grouped_ivb.ozone_swing[x]/(grouped_ivb.pitches[x] - grouped_ivb.in_zone[x]) if (grouped_ivb.pitches[x]- grouped_ivb.in_zone[x]) != 0 else np.nan for x in range(len(grouped_ivb))] grouped_ivb['chase_contact'] = [grouped_ivb.ozone_contact[x]/grouped_ivb.ozone_swing[x] if grouped_ivb.ozone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))] grouped_ivb['swing_percent'] = [grouped_ivb.swings[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] grouped_ivb['whiff_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.swings[x] if grouped_ivb.swings[x] != 0 else np.nan for x in range(len(grouped_ivb))] grouped_ivb['swstr_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] return grouped_ivb ####LHH def location_plot(df,ax,hand): label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique() j = 0 for label in label_labels: subset = df[(df['pitch_description'] == label)&(df['batter_hand'] == hand)] print(label) if len(subset) >= 5: confidence_ellipse(subset['px'], subset['pz'], ax=ax,edgecolor = pitch_colours[label],n_std=1.5,facecolor= pitch_colours[label],alpha=0.3) j=j+1 else: j=j+1 pitch_location_group = df[(df['batter_hand'] == hand)].groupby(['pitch_description']).agg( pitches = ('start_speed','count'), px = ('px','mean'), pz = ('pz','mean')).reset_index() pitch_location_group['pitch_percent'] = pitch_location_group['pitches']/pitch_location_group['pitches'].sum() ## Location Plot sns.scatterplot(ax=ax,x=pitch_location_group['px'], y=pitch_location_group['pz'], hue=pitch_location_group['pitch_description'], palette=pitch_colours,ec='black', s=pitch_location_group['pitch_percent']*750, linewidth=2, zorder=2) ax.axis('square') draw_line(ax,alpha_spot=0.75,catcher_p=False) ax.axis('off') ax.set_xlim((-2.75,2.75)) ax.set_ylim((-0.5,5)) if len(pitch_location_group['px'])>0: ax.get_legend().remove() ax.grid(False) ax.set_title(f"Pitch Locations vs {hand}HB\n{pitch_location_group['pitches'].sum()} Pitches",fontdict=font_properties_titles)