Spaces:
Running
Running
import pandas as pd | |
import numpy as np | |
import json | |
from matplotlib.ticker import FuncFormatter | |
from matplotlib.ticker import MaxNLocator | |
import math | |
from matplotlib.patches import Ellipse | |
import matplotlib.transforms as transforms | |
import matplotlib.colors | |
import matplotlib.colors as mcolors | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import requests | |
font_properties = {'family': 'calibi', 'size': 12} | |
font_properties_titles = {'family': 'calibi', 'size': 20} | |
font_properties_axes = {'family': 'calibi', 'size': 16} | |
colour_palette = ['#FFB000','#648FFF','#785EF0', | |
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] | |
season_start = '2024-03-20' | |
season_end = '2024-09-29' | |
season_fg=2024 | |
chad_fg = requests.get(f'https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&qual=0&season={season_fg}&season={season_fg}&month=1000&season1={season_fg}&ind=0&pageitems=2000000000&pagenum=1&ind=0&rost=0&players=&type=36&postseason=&sortdir=default&sortstat=sp_pitching').json() | |
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) | |
chadwick_df_small = pd.DataFrame(data={ | |
'key_mlbam':[x['xMLBAMID'] for x in chad_fg['data']], | |
'key_fangraphs':[x['playerid'] for x in chad_fg['data']], | |
'Name':[x['PlayerName'] for x in chad_fg['data']], | |
}) | |
pitcher_dicts = chadwick_df_small.set_index('key_mlbam')['Name'].sort_values().to_dict() | |
mlb_fg_dicts = chadwick_df_small.set_index('key_mlbam')['key_fangraphs'].sort_values().to_dict() | |
### DF UPDATE CODE ### | |
def df_update_code(df): | |
print('Starting') | |
#df = pd.read_csv('2024_spring_data.csv',index_col=[0]) | |
print('Starting') | |
df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5 | |
df['t'] = (df['vy_f'] - df['vy0']) / df['ay'] | |
df['vz_f'] = (df['vz0']) + (df['az'] * df['t']) | |
df['vaa'] = -np.arctan(df['vz_f'] / df['vy_f']) * (180 / np.pi) | |
#df['vy_f'] = -(df['vy0']**2 - (2 * df['ay'] * (df['y0'] - 17/12)))**0.5 | |
#df['t'] = (df['vy_f'] - df['vy0']) / df['ay'] | |
df['vx_f'] = (df['vx0']) + (df['ax'] * df['t']) | |
df['haa'] = -np.arctan(df['vx_f'] / df['vy_f']) * (180 / np.pi) | |
end_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', | |
'double', 'sac_fly', 'force_out', 'home_run', | |
'grounded_into_double_play', 'fielders_choice', 'field_error', | |
'triple', 'sac_bunt', 'double_play', 'intent_walk', | |
'fielders_choice_out', 'strikeout_double_play', | |
'sac_fly_double_play', 'catcher_interf', 'other_out'] | |
df['pa'] = df.event_type.isin(end_codes) | |
#df['pa'] = 1 | |
df['k'] = df.event_type.isin(list(filter(None, [x if 'strikeout' in x else '' for x in df.event_type.fillna('None').unique()]))) | |
df['bb'] = df.event_type.isin(list(filter(None, [x if 'walk' in x else '' for x in df.event_type.fillna('None').unique()]))) | |
df['k_minus_bb'] = df['k'].astype(np.float32)-df['bb'].astype(np.float32) | |
df = df.drop_duplicates(subset=['play_id']) | |
df = df.dropna(subset=['start_speed']) | |
swing_codes = ['Swinging Strike', 'In play, no out', | |
'Foul', 'In play, out(s)', | |
'In play, run(s)', 'Swinging Strike (Blocked)', | |
'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout'] | |
swings_in = ['Swinging Strike', 'In play, no out', | |
'Foul', 'In play, out(s)', | |
'In play, run(s)', 'Swinging Strike (Blocked)', | |
'Foul Bunt','Foul Tip', 'Missed Bunt','Foul Pitchout','Swinging Pitchout'] | |
swing_strike_codes = ['Swinging Strike', | |
'Swinging Strike (Blocked)','Missed Bunt','Foul Tip','Swinging Pitchout'] | |
contact_codes = ['In play, no out', | |
'Foul', 'In play, out(s)', | |
'In play, run(s)', | |
'Foul Bunt'] | |
codes_in = ['In play, out(s)', | |
'Swinging Strike', | |
'Ball', | |
'Foul', | |
'In play, no out', | |
'Called Strike', | |
'Foul Tip', | |
'In play, run(s)', | |
'Hit By Pitch', | |
'Ball In Dirt', | |
'Pitchout', | |
'Swinging Strike (Blocked)', | |
'Foul Bunt', | |
'Missed Bunt', | |
'Foul Pitchout', | |
'Intent Ball', | |
'Swinging Pitchout'] | |
df['in_zone'] = df['zone'] < 10 | |
df = df.drop_duplicates(subset=['play_id']) | |
df_codes = df[df.play_description.isin(codes_in)].dropna(subset=['in_zone']) | |
df_codes['bip'] = ~df_codes.launch_speed.isna() | |
conditions = [ | |
(df_codes['launch_speed'].isna()), | |
(df_codes['launch_speed']*1.5 - df_codes['launch_angle'] >= 117 ) & (df_codes['launch_speed'] + df_codes['launch_angle'] >= 124) & (df_codes['launch_speed'] > 98) & (df_codes['launch_angle'] >= 8) & (df_codes['launch_angle'] <= 50) | |
] | |
choices = [False,True] | |
df_codes['barrel'] = np.select(conditions, choices, default=np.nan) | |
conditions_ss = [ | |
(df_codes['launch_angle'].isna()), | |
(df_codes['launch_angle'] >= 8 ) * (df_codes['launch_angle'] <= 32 ) | |
] | |
choices_ss = [False,True] | |
df_codes['sweet_spot'] = np.select(conditions_ss, choices_ss, default=np.nan) | |
conditions_hh = [ | |
(df_codes['launch_speed'].isna()), | |
(df_codes['launch_speed'] >= 94.5 ) | |
] | |
choices_hh = [False,True] | |
df_codes['hard_hit'] = np.select(conditions_hh, choices_hh, default=np.nan) | |
conditions_tb = [ | |
(df_codes['event_type']=='single'), | |
(df_codes['event_type']=='double'), | |
(df_codes['event_type']=='triple'), | |
(df_codes['event_type']=='home_run'), | |
] | |
choices_tb = [1,2,3,4] | |
df_codes['tb'] = np.select(conditions_tb, choices_tb, default=np.nan) | |
conditions_woba = [ | |
(df_codes['event_type']=='walk'), | |
(df_codes['event_type']=='hit_by_pitch'), | |
(df_codes['event_type']=='single'), | |
(df_codes['event_type']=='double'), | |
(df_codes['event_type']=='triple'), | |
(df_codes['event_type']=='home_run'), | |
] | |
choices_woba = [0.705, | |
0.688, | |
0.897, | |
1.233, | |
1.612, | |
2.013] | |
df_codes['woba'] = np.select(conditions_woba, choices_woba, default=np.nan) | |
woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', | |
'double', 'sac_fly', 'force_out', 'home_run', | |
'grounded_into_double_play', 'fielders_choice', 'field_error', | |
'triple', 'sac_bunt', 'double_play', | |
'fielders_choice_out', 'strikeout_double_play', | |
'sac_fly_double_play', 'other_out'] | |
conditions_woba_code = [ | |
(df_codes['event_type'].isin(woba_codes)) | |
] | |
choices_woba_code = [1] | |
df_codes['woba_codes'] = np.select(conditions_woba_code, choices_woba_code, default=np.nan) | |
#df_codes['barrel'] = (df_codes.launch_speed >= 98) & (df_codes.launch_angle >= (26 - (-98 + df_codes.launch_speed))) & (df_codes.launch_angle <= 30 + (-98 + df_codes.launch_speed)) & (df_codes.launch_angle >= 8) & (df_codes.launch_angle <= 50) | |
#df_codes['barrel'] = (df_codes.launch_speed >= 98) & (df_codes.launch_angle >= (26 - (-98 + df_codes.launch_speed))) & (df_codes.launch_angle <= 30 + (-98 + df_codes.launch_speed)) & (df_codes.launch_angle >= 8) & (df_codes.launch_angle <= 50) | |
df_codes['pitches'] = 1 | |
df_codes['whiffs'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')) else 0 for x in df_codes.play_code] | |
df_codes['csw'] = [1 if ((x == 'S')|(x == 'W')|(x =='T')|(x == 'C')) else 0 for x in df_codes.play_code] | |
df_codes['swings'] = [1 if x in swings_in else 0 for x in df_codes.play_description] | |
df_codes['out_zone'] = df_codes.in_zone == False | |
df_codes['zone_swing'] = (df_codes.in_zone == True)&(df_codes.swings == 1) | |
df_codes['zone_contact'] = (df_codes.in_zone == True)&(df_codes.swings == 1)&(df_codes.whiffs == 0) | |
df_codes['ozone_swing'] = (df_codes.in_zone==False)&(df_codes.swings == 1) | |
df_codes['ozone_contact'] = (df_codes.in_zone==False)&(df_codes.swings == 1)&(df_codes.whiffs == 0) | |
return df_codes | |
### GET COLOURS## | |
def get_color(value,normalize,cmap_sum): | |
color = cmap_sum(normalize(value)) | |
return mcolors.to_hex(color) | |
### PERCENTILE ### | |
def percentile(n): | |
def percentile_(x): | |
return x.quantile(n) | |
percentile_.__name__ = 'percentile_{:02.0f}'.format(n*100) | |
return percentile_ | |
### TJ STUFF+ DF CLEAN ### | |
def df_clean(df): | |
df_copy = df.copy() | |
df_copy = df_copy[(df_copy['spin_rate']>0)&(df_copy['extension']>0)] | |
df_copy.loc[df_copy['pitcher_hand'] == 'L','hb'] *= -1 | |
df_copy.loc[df_copy['pitcher_hand'] == 'L','x0'] *= -1 | |
df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] = 360 - df_copy.loc[df_copy['pitcher_hand'] == 'L','spin_direction'] | |
df_copy['pitch_l'] = [1 if x == 'L' else 0 for x in df_copy['pitcher_hand']] | |
df_copy['bat_l'] = [1 if x == 'L' else 0 for x in df_copy['batter_hand']] | |
#df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "KN", "CS", "SC", "FA"])].reset_index(drop=True) | |
#df_copy = df_copy[~df_copy.pitch_type.isin(["EP", "PO", "CS", "SC", "FA"])].reset_index(drop=True) | |
df_copy['pitch_type'] = df_copy['pitch_type'].replace({'FT':'SI', | |
'FA':'FF', | |
'SV':'SL', | |
'FO':'FS'}) | |
df_copy_fb_sum = df_copy[df_copy.pitch_type.isin(["FF", "FC", "SI"])].groupby(['pitcher_id']).agg( | |
fb_velo = ('start_speed','mean'), | |
fb_max_ivb = ('ivb',percentile(0.9)), | |
fb_max_x = ('hb',percentile(0.9)), | |
fb_min_x = ('hb',percentile(0.1)), | |
fb_max_velo = ('start_speed',percentile(0.9)), | |
fb_axis = ('spin_direction','mean'), | |
) | |
df_copy = df_copy.merge(df_copy_fb_sum,left_on='pitcher_id',right_index=True,how='left') | |
df_copy['fb_velo_diff'] = df_copy['start_speed']- df_copy['fb_velo'] | |
df_copy['fb_max_ivb_diff'] = df_copy['ivb']- df_copy['fb_max_ivb'] | |
df_copy['fb_max_hb_diff'] = -abs(df_copy['hb']- df_copy['fb_max_x']) | |
df_copy['fb_min_hb_diff'] = df_copy['hb']- df_copy['fb_min_x'] | |
df_copy['fb_max_velo_diff'] = df_copy['start_speed']- df_copy['fb_max_velo'] | |
df_copy['fb_axis_diff'] = df_copy['spin_direction']- df_copy['fb_axis'] | |
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_velo_diff'] = 0 | |
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_ivb_diff'] = 0 | |
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_hb_diff'] = 0 | |
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_min_hb_diff'] = 0 | |
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_max_velo_diff'] = 0 | |
# df_copy.loc[df_copy.pitch_type.isin(["FF", "FC", "SI"]),'fb_axis_diff'] = 0 | |
df_copy['max_speed'] = df_copy.groupby(['pitcher_id'])['start_speed'].transform('max') | |
df_copy['max_speed_diff'] = df_copy['start_speed'] - df_copy['max_speed'] | |
df_copy['max_ivb'] = df_copy.groupby(['pitcher_id'])['ivb'].transform('max') | |
df_copy['max_ivb_diff'] = df_copy['ivb'] - df_copy['max_ivb'] | |
df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5 | |
df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay'] | |
df_copy['vz_f'] = (df_copy['vz0']) + (df_copy['az'] * df_copy['t']) | |
df_copy['vaa'] = -np.arctan(df_copy['vz_f'] / df_copy['vy_f']) * (180 / np.pi) | |
#df_copy['vy_f'] = -(df_copy['vy0']**2 - (2 * df_copy['ay'] * (df_copy['y0'] - 17/12)))**0.5 | |
#df_copy['t'] = (df_copy['vy_f'] - df_copy['vy0']) / df_copy['ay'] | |
df_copy['vx_f'] = (df_copy['vx0']) + (df_copy['ax'] * df_copy['t']) | |
df_copy['haa'] = -np.arctan(df_copy['vx_f'] / df_copy['vy_f']) * (180 / np.pi) | |
# df_copy['x_diff'] = df_copy['x0'] - df_copy['px'] | |
# df_copy['z_diff'] = df_copy['z0'] - df_copy['pz'] | |
# df_copy['vaa'] = np.arctan(df_copy['z_diff'] / df_copy['release_pos_y']) * 360 / np.pi | |
# df_copy['haa'] = np.arctan(-df_copy['x_diff'] / df_copy['release_pos_y']) * 360 / np.pi | |
df_copy = df_copy.dropna(subset=['pitch_type'])#.fillna(0) | |
return df_copy | |
### PITCH COLOURS ### | |
pitch_colours = { | |
'Four-Seam Fastball':'#FF007D',#BC136F | |
'Fastball':'#FF007D', | |
'Sinker':'#98165D',#DC267F | |
'Cutter':'#BE5FA0', | |
'Changeup':'#F79E70',#F75233 | |
'Splitter':'#FE6100',#F75233 | |
'Screwball':'#F08223', | |
'Forkball':'#FFB000', | |
'Slider':'#67E18D',#1BB999#785EF0 | |
'Sweeper':'#1BB999',#37CD85#904039 | |
'Slurve':'#376748',#785EF0#549C07#BEABD8 | |
'Knuckle Curve':'#311D8B', | |
'Curveball':'#3025CE', | |
'Slow Curve':'#274BFC', | |
'Eephus':'#648FFF', | |
'Knuckle Ball':'#867A08', | |
'Pitch Out':'#472C30', | |
'Other':'#9C8975', | |
} | |
### PITCH ELLIPSE ### | |
def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs): | |
""" | |
Create a plot of the covariance confidence ellipse of *x* and *y*. | |
Parameters | |
---------- | |
x, y : array-like, shape (n, ) | |
Input data. | |
ax : matplotlib.axes.Axes | |
The axes object to draw the ellipse into. | |
n_std : float | |
The number of standard deviations to determine the ellipse's radiuses. | |
**kwargs | |
Forwarded to `~matplotlib.patches.Ellipse` | |
Returns | |
------- | |
matplotlib.patches.Ellipse | |
""" | |
if x.size != y.size: | |
raise ValueError("x and y must be the same size") | |
try: | |
cov = np.cov(x, y) | |
pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) | |
# Using a special case to obtain the eigenvalues of this | |
# two-dimensional dataset. | |
ell_radius_x = np.sqrt(1 + pearson) | |
ell_radius_y = np.sqrt(1 - pearson) | |
ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2, | |
facecolor=facecolor,linewidth=2,linestyle='--', **kwargs) | |
# Calculating the standard deviation of x from | |
# the squareroot of the variance and multiplying | |
# with the given number of standard deviations. | |
scale_x = np.sqrt(cov[0, 0]) * n_std | |
mean_x = np.mean(x) | |
# calculating the standard deviation of y ... | |
scale_y = np.sqrt(cov[1, 1]) * n_std | |
mean_y = np.mean(y) | |
transf = transforms.Affine2D() \ | |
.rotate_deg(45) \ | |
.scale(scale_x, scale_y) \ | |
.translate(mean_x, mean_y) | |
ellipse.set_transform(transf + ax.transData) | |
except ValueError: | |
return | |
return ax.add_patch(ellipse) | |
# DEFINE STRIKE ZONE | |
strike_zone = pd.DataFrame({ | |
'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9], | |
'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5] | |
}) | |
### STRIKE ZONE ### | |
def draw_line(axis,alpha_spot=1,catcher_p = True): | |
axis.plot(strike_zone['PlateLocSide'], strike_zone['PlateLocHeight'], color='black', linewidth=1.3,zorder=3,alpha=alpha_spot,) | |
# ax.plot([-0.2833333, -0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) | |
# ax.plot([0.2833333, 0.2833333], [1.6, 3.5], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) | |
# ax.plot([-0.85, 0.85], [2.2, 2.2], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) | |
# ax.plot([-0.85, 0.85], [2.9, 2.9], color='black', linestyle='dashed',alpha=alpha_spot,zorder=3) | |
if catcher_p: | |
# Add dashed line | |
# Add home plate | |
axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
else: | |
axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
axis.plot([0, 0.9], [-.35, -0.1], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
axis.plot([0.9, 0.708], [-0.1,0.4], color='black', linewidth=1,alpha=alpha_spot,zorder=1) | |
### FANGRAPHS STATS DICT ### | |
fangraphs_stats_dict = {'IP':{'table_header':'$\\bf{IP}$','format':'.1f',} , | |
'TBF':{'table_header':'$\\bf{PA}$','format':'.0f',} , | |
'AVG':{'table_header':'$\\bf{AVG}$','format':'.3f',} , | |
'K/9':{'table_header':'$\\bf{K\/9}$','format':'.2f',} , | |
'BB/9':{'table_header':'$\\bf{BB\/9}$','format':'.2f',} , | |
'K/BB':{'table_header':'$\\bf{K\/BB}$','format':'.2f',} , | |
'HR/9':{'table_header':'$\\bf{HR\/9}$','format':'.2f',} , | |
'K%':{'table_header':'$\\bf{K\%}$','format':'.1%',} , | |
'BB%':{'table_header':'$\\bf{BB\%}$','format':'.1%',} , | |
'K-BB%':{'table_header':'$\\bf{K-BB\%}$','format':'.1%',} , | |
'WHIP':{'table_header':'$\\bf{WHIP}$','format':'.2f',} , | |
'BABIP':{'table_header':'$\\bf{BABIP}$','format':'.3f',} , | |
'LOB%':{'table_header':'$\\bf{LOB\%}$','format':'.1%',} , | |
'xFIP':{'table_header':'$\\bf{xFIP}$','format':'.2f',} , | |
'FIP':{'table_header':'$\\bf{FIP}$','format':'.2f',} , | |
'H':{'table_header':'$\\bf{H}$','format':'.0f',} , | |
'2B':{'table_header':'$\\bf{2B}$','format':'.0f',} , | |
'3B':{'table_header':'$\\bf{3B}$','format':'.0f',} , | |
'R':{'table_header':'$\\bf{R}$','format':'.0f',} , | |
'ER':{'table_header':'$\\bf{ER}$','format':'.0f',} , | |
'HR':{'table_header':'$\\bf{HR}$','format':'.0f',} , | |
'BB':{'table_header':'$\\bf{BB}$','format':'.0f',} , | |
'IBB':{'table_header':'$\\bf{IBB}$','format':'.0f',} , | |
'HBP':{'table_header':'$\\bf{HBP}$','format':'.0f',} , | |
'SO':{'table_header':'$\\bf{SO}$','format':'.0f',} , | |
'OBP':{'table_header':'$\\bf{OBP}$','format':'.0f',} , | |
'SLG':{'table_header':'$\\bf{SLG}$','format':'.0f',} , | |
'ERA':{'table_header':'$\\bf{ERA}$','format':'.2f',} , | |
'wOBA':{'table_header':'$\\bf{wOBA}$','format':'.3f',} , | |
'G':{'table_header':'$\\bf{G}$','format':'.0f',} } | |
## Fangraphs Table | |
### FANGRAPHS SPLITS SCRAPE ### | |
split_dict = {'all':[], | |
'left':['5'], | |
'right':['6'] | |
} | |
def fangraphs_scrape(pitcher_id=808967, | |
split='all', | |
start_date='2024-03-20', | |
end_date='2024-09-29'): | |
url = "https://www.fangraphs.com/api/leaders/splits/splits-leaders" | |
payload = { | |
"strPlayerId": str(mlb_fg_dicts[pitcher_id]), | |
"strSplitArr": split_dict[split], | |
"strGroup": "season", | |
"strPosition": "P", | |
"strType": "2", | |
"strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')), | |
"strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')), | |
"strSplitTeams": False, | |
"dctFilters": [], | |
"strStatType": "player", | |
"strAutoPt": False, | |
"arrPlayerId": [], | |
"strSplitArrPitch": [], | |
"arrWxTemperature": None, | |
"arrWxPressure": None, | |
"arrWxAirDensity": None, | |
"arrWxElevation": None, | |
"arrWxWindSpeed": None | |
} | |
json_payload = json.dumps(payload) | |
headers = {'Content-Type': 'application/json'} | |
response = requests.post(url, data=json_payload, headers=headers) | |
data_pull = response.json()['data'][0] | |
payload_advanced = { | |
"strPlayerId": str(mlb_fg_dicts[pitcher_id]), | |
"strSplitArr": split_dict[split], | |
"strGroup": "season", | |
"strPosition": "P", | |
"strType": "1", | |
"strStartDate": str(pd.to_datetime(start_date).strftime('%Y-%m-%d')), | |
"strEndDate": str(pd.to_datetime(end_date).strftime('%Y-%m-%d')), | |
"strSplitTeams": False, | |
"dctFilters": [], | |
"strStatType": "player", | |
"strAutoPt": False, | |
"arrPlayerId": [], | |
"strSplitArrPitch": [], | |
"arrWxTemperature": None, | |
"arrWxPressure": None, | |
"arrWxAirDensity": None, | |
"arrWxElevation": None, | |
"arrWxWindSpeed": None | |
} | |
json_payload_advanced = json.dumps(payload_advanced) | |
headers = {'Content-Type': 'application/json'} | |
response_advanced = requests.post(url, data=json_payload_advanced, headers=headers) | |
data_pull_advanced = response_advanced.json()['data'][0] | |
data_pull.update(data_pull_advanced) | |
return data_pull | |
### FANGRAPHS TABLE PLOT ### | |
def fangraphs_table(data, | |
stats, | |
ax): | |
fg_values = [data[x] if x in data else '---' for x in stats] | |
df_fg = pd.DataFrame(data=dict(zip(stats,fg_values)),index=[0]) | |
df_fg.loc[0] = [format(df_fg[x][0],fangraphs_stats_dict[x]['format']) if df_fg[x][0] != '---' else '---' for x in df_fg] | |
table_fg = ax.table(cellText=df_fg.values, colLabels=df_fg.columns, cellLoc='center', | |
bbox=[0.04, 0.2, 0.92, 0.8]) | |
min_font_size = 20 | |
table_fg.set_fontsize(min_font_size) | |
new_column_names = [fangraphs_stats_dict[x]['table_header'] if x in data else '---' for x in stats] | |
# #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%'] | |
for i, col_name in enumerate(new_column_names): | |
table_fg.get_celld()[(0, i)].get_text().set_text(col_name) | |
ax.axis('off') | |
return table_fg | |
### VELOCITY KDES ### | |
def velocity_kdes(df, | |
ax, | |
gs, | |
gs_list, | |
fig): | |
sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False) | |
# Get the list of items ordered from most to least frequent | |
items_in_order = sorted_value_counts.index.tolist() | |
# Create the inner subplot inside the outer subplot | |
import matplotlib.gridspec as gridspec | |
ax.axis ('off') | |
#ax.set_ylabel('Pitch Velocity Distribution', fontdict=font_properties_axes) | |
ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20}) | |
inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order),1, subplot_spec=gs[2,gs_list]) | |
ax_top = [] | |
for inner in inner_grid_1: | |
ax_top.append(fig.add_subplot(inner)) | |
ax_number = 0 | |
for i in items_in_order[0:]: | |
if np.unique(df[df['pitch_type']==i]['start_speed']).size == 1: # Check if all values are the same | |
print('just') | |
ax_top[ax_number].plot([np.unique(df[df['pitch_type']==i]['start_speed']),np.unique(df[df['pitch_type']==i]['start_speed'])],[0,1], linewidth=4, | |
color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]],zorder=20) | |
# ax_top[ax_number].plot(np.unique(df_melt[df_melt['Player']==i]['value']), [0.5]*len(np.unique(df_melt[df_melt['Player']==i]['value'])), linewidth=4) | |
else: | |
sns.kdeplot(df[df['pitch_type']==i]['start_speed'],ax=ax_top[ax_number],fill=True, | |
clip=(df[df['pitch_type']==i]['start_speed'].min(),df[df['pitch_type']==i]['start_speed'].max()), | |
color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]]) | |
ax_top[ax_number].set_xlim(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5) | |
ax_top[ax_number].set_xlabel('') | |
ax_top[ax_number].set_ylabel('') | |
if ax_number < len(items_in_order)-1: | |
ax_top[ax_number].spines['top'].set_visible(False) | |
ax_top[ax_number].spines['right'].set_visible(False) | |
ax_top[ax_number].spines['left'].set_visible(False) | |
ax_top[ax_number].tick_params(axis='x', colors='none') | |
ax_top[ax_number].set_xticks(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5)) | |
ax_top[ax_number].set_yticks([]) | |
ax_top[ax_number].grid(axis='x', linestyle='--') | |
ax_top[ax_number].text(-0.01, 0.5, i, transform=ax_top[ax_number].transAxes, | |
fontsize=14, va='center', ha='right') | |
ax_number = ax_number + 1 | |
ax_top[-1].spines['top'].set_visible(False) | |
ax_top[-1].spines['right'].set_visible(False) | |
ax_top[-1].spines['left'].set_visible(False) | |
ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min()/5)*5,math.ceil(df['start_speed'].max()/5)*5,5))) | |
ax_top[-1].set_xlabel('Velocity (mph)') | |
### TJ STUFF+ ROLLING ### | |
def tj_stuff_roling(df, | |
window, | |
ax): | |
## Velocity Plot | |
sorted_value_counts = df['pitch_type'].value_counts().sort_values(ascending=False) | |
# Get the list of items ordered from most to least frequent | |
items_in_order = sorted_value_counts.index.tolist() | |
for i in items_in_order: | |
if max(df[df['pitch_type']==i]['pitch_type_count_each']) >= window: | |
sns.lineplot(x=range(1,max(df[df['pitch_type']==i]['pitch_type_count_each'])+1), | |
y=df[df['pitch_type']==i]['tj_stuff_plus'].rolling(window).sum()/window, | |
color=pitch_colours[df[df['pitch_type']==i]['pitch_description'].values[0]], | |
ax=ax,linewidth=3) | |
# Adjust x-axis limits to start from 1 | |
ax.set_xlim(window,max(df['pitch_type_count_each'])) | |
ax.set_ylim(70,130) | |
#ax.get_legend().remove() | |
ax.set_xlabel('Pitches', fontdict=font_properties_axes) | |
ax.set_ylabel('tjStuff+', fontdict=font_properties_axes) | |
ax.set_title(f"{window} Pitch Rolling tjStuff+",fontdict=font_properties_titles) | |
# ax.axis('square') | |
# ax.set_xlim(left=1) | |
ax.axhline(y=100,linestyle='--',alpha=0.7,color='k') | |
ax.xaxis.set_major_locator(MaxNLocator(integer=True)) | |
### BREAK PLOT ### | |
def break_plot(df, | |
ax): | |
label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique() | |
j = 0 | |
for label in label_labels: | |
subset = df[df['pitch_description'] == label] | |
print(label) | |
if len(subset) > 4: | |
if df['pitcher_hand'].values[0] == 'R': | |
subset['hb'] = subset['hb']*1 | |
if df['pitcher_hand'].values[0] == 'L': | |
subset['hb'] = subset['hb']*1 | |
subset['ivb'] = subset['ivb']*1 | |
try: | |
confidence_ellipse(subset['hb'], subset['ivb'], ax=ax,edgecolor = pitch_colours[label],n_std=2,facecolor= pitch_colours[label],alpha=0.2) | |
except ValueError: | |
return | |
j=j+1 | |
else: | |
j=j+1 | |
if df['pitcher_hand'].values[0] == 'R': | |
sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2) | |
if df['pitcher_hand'].values[0] == 'L': | |
sns.scatterplot(ax=ax,x=df.hb*1,y=df.ivb*1,hue=df.pitch_description,palette=pitch_colours,ec='black',alpha=1,zorder=2) | |
ax.set_xlim((-25,25)) | |
ax.set_ylim((-25,25)) | |
ax.hlines(y=0,xmin=-50,xmax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1) | |
ax.vlines(x=0,ymin=-50,ymax=50,color=colour_palette[8],alpha=0.5,linestyles='--',zorder=1) | |
ax.set_xlabel('Horizontal Break (in)', fontdict=font_properties_axes) | |
ax.set_ylabel('Induced Vertical Break (in)', fontdict=font_properties_axes) | |
ax.set_title("Pitch Breaks",fontdict=font_properties_titles) | |
ax.get_legend().remove() | |
# ax1.set_xticklabels(ax1.get_xticks(), fontdict=font_properties) | |
ax.set_xticklabels(ax.get_xticks(), fontdict=font_properties) | |
# ax1.set_yticklabels(ax1.get_yticks(), fontdict=font_properties) | |
ax.set_yticklabels(ax.get_yticks(), fontdict=font_properties) | |
#ax1.set_aspect('equal', adjustable='box') | |
if df['pitcher_hand'].values[0] == 'R': | |
ax.text(-24.5,-24.5,s='← Glove Side',fontstyle='italic',ha='left',va='bottom', | |
bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) | |
ax.text(24.5,-24.5,s='Arm Side →',fontstyle='italic',ha='right',va='bottom', | |
bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) | |
#ax.invert_xaxis() | |
if df['pitcher_hand'].values[0] == 'L': | |
ax.invert_xaxis() | |
ax.text(24.5,-24.5,s='← Arm Side',fontstyle='italic',ha='left',va='bottom', | |
bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) | |
ax.text(-24.5,-24.5,s='Glove Side →',fontstyle='italic',ha='right',va='bottom', | |
bbox=dict(facecolor='white', edgecolor='black'),fontsize=12,zorder=3) | |
ax.set_aspect('equal', adjustable='box') | |
#ax1.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) | |
ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) | |
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x))) | |
### TABLE SUMMARY ### | |
def table_summary(df, | |
pitcher_id, | |
ax, | |
df_group, | |
df_group_all, | |
statcast_pitch_summary): | |
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) | |
ax.axis('off') | |
df_group['spin_direction_adj'] = [(x + 180) for x in df_group['spin_direction']] | |
#(((df_group.groupby('pitch_description').mean()[['spin_direction_adj']] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 ) | |
clock_time = ((df_group.groupby('pitch_description').mean()['spin_direction_adj']) %360 // 30 )+ (((df_group.groupby('pitch_description').mean()['spin_direction_adj'] %360 % 30 / 30 /100 *60).round(2) *10).round(0)//1.5/4 ) | |
# print('Clocks') | |
# print(clock_time) | |
clock_time = (clock_time.astype(int) + clock_time%1*60/100).round(2).astype(str).str.replace('.',':').str.replace(':0',':00').str.replace(':3',':30').str.replace('0:','12:').str.replace('112:','10:').to_frame() | |
df_group = df_group.merge(right=clock_time,left_on='pitch_description',right_index=True,suffixes=['','_clock']) | |
plot_table = df_group[df_group['pitcher_id']==pitcher_id].sort_values( | |
by=['pitches'],ascending=False)[['pitch_description','pitches','start_speed','ivb', | |
'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release', | |
'extension','tj_stuff_plus','spin_direction_adj_clock','zone_percent','chase_percent','whiff_rate','xwobacon']] | |
# if df['pitcher_hand'].values[0] == 'L': | |
# plot_table['hb'] = plot_table['hb']*-1 | |
#if df['pitcher_hand'].values[0] == 'R': | |
plot_table['horizontal_release'] = plot_table['horizontal_release']*-1 | |
plot_table['pitch_percent'] = plot_table['pitches'] / plot_table['pitches'].sum() | |
plot_table = plot_table[['pitch_description','pitches','pitch_percent','start_speed','ivb', | |
'hb', 'spin_rate','vaa', 'haa', 'vertical_release','horizontal_release', | |
'extension','spin_direction_adj_clock','tj_stuff_plus','zone_percent','chase_percent','whiff_rate','xwobacon']] | |
plot_table_all = pd.DataFrame(data={'pitch_description': 'All', | |
'pitches': plot_table['pitches'].sum(), | |
'pitch_percent': 1.0, | |
'start_speed': '—', | |
'ivb': '—', | |
'hb': '—', | |
'spin_rate': '—', | |
'vaa': '—', | |
'haa': '—', | |
'vertical_release': '—', | |
'horizontal_release': '—', | |
'extension': df['extension'].mean(), | |
'spin_direction_adj_clock': '—', | |
'tj_stuff_plus': df[df['pitcher_id']==pitcher_id]['tj_stuff_plus'].mean(), | |
'zone_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['zone_percent'].values[0], | |
'chase_percent': df_group_all[df_group_all['pitcher_id']==pitcher_id]['chase_percent'].values[0], | |
'whiff_rate': df_group_all[df_group_all['pitcher_id']==pitcher_id]['whiff_rate'].values[0], | |
'xwobacon': df_group_all[df_group_all['pitcher_id']==pitcher_id]['xwobacon'].values[0], | |
},index=[0] | |
) | |
print('LOOK HERE') | |
print(plot_table) | |
plot_table = pd.concat([plot_table,plot_table_all]).fillna('—') | |
plt.rcParams['font.family'] = 'Calibri' | |
table = ax.table(cellText=plot_table.values, colLabels=plot_table.columns, cellLoc='center', | |
colWidths=[2.3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], bbox=[0.04, 0, 0.92, 0.8]) | |
min_font_size = 14 | |
# Set table properties | |
table.auto_set_font_size(False) | |
#table.set_fontsize(min(min_font_size,max(min_font_size/((len(label_labels)/4)),10))) | |
table.set_fontsize(min_font_size) | |
table.scale(1, 0.5) | |
min_font_size = 18 | |
# Set font size for values | |
# Adjust the font size as needed | |
for i in range(len(plot_table)+1): | |
for j in range(len(plot_table.columns)): | |
if i > 0: # Skip the header row | |
cell = table.get_celld()[i, j] | |
cell.set_fontsize(min_font_size) | |
for i in range(len(plot_table)): | |
if table.get_celld()[(i+1, 0)].get_text().get_text() != 'All': | |
table.get_celld()[(i+1, 0)].set_facecolor(pitch_colours[table.get_celld()[(i+1, 0)].get_text().get_text()]) # Header cell color | |
if table.get_celld()[(i+1, 0)].get_text().get_text() in ['Split-Finger','Slider','Changeup']: | |
table.get_celld()[(i+1, 0)].set_text_props(color='#000000',fontweight='bold') | |
else: | |
table.get_celld()[(i+1, 0)].set_text_props(color='#ffffff',fontweight='bold') | |
if table.get_celld()[(i+1, 0)].get_text().get_text() == 'Four-Seam Fastball': | |
table.get_celld()[(i+1, 0)].get_text().set_text('4-Seam') | |
print('LOOK HERE 3') | |
print(statcast_pitch_summary) | |
select_df = statcast_pitch_summary[statcast_pitch_summary['pitch_description'] == plot_table['pitch_description'].values[i]] | |
normalize = mcolors.Normalize(vmin=select_df['start_speed'].mean()-select_df.pitch_velocity_std.mean(), | |
vmax=select_df['start_speed'].mean()+select_df.pitch_velocity_std.mean()) # Define the range of values | |
if table.get_celld()[(i+1, 3)].get_text().get_text() != '—': | |
table.get_celld()[(i+1, 3)].set_facecolor(get_color(float(table.get_celld()[(i+1, 3)].get_text().get_text()),normalize,cmap_sum)) # Header cell color | |
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) | |
normalize = mcolors.Normalize(vmin=select_df['extension'].mean()*0.9, vmax=select_df['extension'].mean()*1.1) | |
if table.get_celld()[(i+1,11)].get_text().get_text() != '—': | |
table.get_celld()[(i+1,11)].set_facecolor(get_color(float(table.get_celld()[(i+1, 11)].get_text().get_text()),normalize,cmap_sum)) # Header cell color | |
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) | |
normalize = mcolors.Normalize(vmin=80, vmax=120) | |
print(normalize) | |
if table.get_celld()[(i+1,13)].get_text().get_text() != '—': | |
table.get_celld()[(i+1,13)].set_facecolor(get_color(float(table.get_celld()[(i+1, 13)].get_text().get_text()),normalize,cmap_sum)) # Header cell color | |
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) | |
normalize = mcolors.Normalize(vmin=select_df['zone_percent'].mean()*0.7, vmax=select_df['zone_percent'].mean()*1.3) | |
if table.get_celld()[(i+1,14)].get_text().get_text() != '—': | |
table.get_celld()[(i+1,14)].set_facecolor(get_color(float(table.get_celld()[(i+1, 14)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color | |
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) | |
normalize = mcolors.Normalize(vmin=select_df['chase_percent'].mean()*0.7, vmax=select_df['chase_percent'].mean()*1.3) | |
if table.get_celld()[(i+1,15)].get_text().get_text() != '—': | |
table.get_celld()[(i+1,15)].set_facecolor(get_color(float(table.get_celld()[(i+1, 15)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color | |
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',]) | |
normalize = mcolors.Normalize(vmin=select_df['whiff_rate'].mean()*0.7, vmax=select_df['whiff_rate'].mean()*1.3) | |
if table.get_celld()[(i+1,16)].get_text().get_text() != '—': | |
table.get_celld()[(i+1,16)].set_facecolor(get_color(float(table.get_celld()[(i+1, 16)].get_text().get_text().strip('%')),normalize,cmap_sum)) # Header cell color | |
print("LOOK HERE") | |
print(select_df) | |
cmap_sum_r = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#FFB000','#FFFFFF','#648FFF',]) | |
normalize = mcolors.Normalize(vmin=select_df['xwobacon'].mean()*0.7, vmax=select_df['xwobacon'].mean()*1.3) | |
if table.get_celld()[(i+1,17)].get_text().get_text() != '—': | |
table.get_celld()[(i+1,17)].set_facecolor(get_color(float(table.get_celld()[(i+1, 17)].get_text().get_text().strip('%')),normalize,cmap_sum_r)) # Header cell color | |
table.get_celld()[(len(plot_table), 0)].set_text_props(color='#000000',fontweight='bold') | |
new_column_names = ['$\\bf{Pitch\ Name}$', | |
'$\\bf{Count}$', | |
'$\\bf{Pitch\%}$', | |
'$\\bf{Velocity}$', | |
'$\\bf{iVB}$', | |
'$\\bf{HB}$', | |
'$\\bf{Spin}$', | |
'$\\bf{VAA}$', | |
'$\\bf{HAA}$', | |
'$\\bf{vRel}$', | |
'$\\bf{hRel}$', | |
'$\\bf{Ext.}$', | |
'$\\bf{Axis}$', | |
'$\\bf{tjStuff+}$', | |
'$\\bf{Zone\%}$', | |
'$\\bf{Chase\%}$', | |
'$\\bf{Whiff\%}$', | |
'$\\bf{xwOBA}$\n$\\bf{Contact}$', | |
] | |
for i, col_name in enumerate(new_column_names): | |
table.get_celld()[(0, i)].get_text().set_text(col_name) | |
float_list = ['start_speed','ivb', | |
'hb', 'vaa', 'haa', 'vertical_release','horizontal_release', 'extension'] | |
for fl in float_list: | |
# Subset of column names | |
subset_columns = [fl] | |
# Get the list of column indices | |
column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] | |
# # print(column_indices) | |
for row_l in range(1,len(plot_table)+1): | |
# print(row_l) | |
if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': | |
# print() | |
# print(fl) | |
table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) | |
float_3_list = ['xwobacon'] | |
for fl in float_3_list: | |
# Subset of column names | |
subset_columns = [fl] | |
# Get the list of column indices | |
column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] | |
# # print(column_indices) | |
for row_l in range(1,len(plot_table)+1): | |
# print(row_l) | |
if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': | |
# print() | |
# print(fl) | |
table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.3f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) | |
percent_list = ['pitch_percent','zone_percent','chase_percent','whiff_rate'] | |
for fl in percent_list: | |
# Subset of column names | |
subset_columns = [fl] | |
# Get the list of column indices | |
column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] | |
# # print(column_indices) | |
for row_l in range(1,len(plot_table)+1): | |
# print(row_l) | |
if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': | |
# print(fl) | |
table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.1%}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) | |
int_list = ['tj_stuff_plus','spin_rate'] | |
for fl in int_list: | |
# Subset of column names | |
subset_columns = [fl] | |
# Get the list of column indices | |
column_indices = [plot_table.columns.get_loc(col) for col in subset_columns] | |
# # print(column_indices) | |
for row_l in range(1,len(plot_table)+1): | |
# print(row_l) | |
if table.get_celld()[(row_l,column_indices[0])].get_text().get_text() != '—': | |
# print(fl) | |
table.get_celld()[(row_l,column_indices[0])].get_text().set_text('{:,.0f}'.format(float(table.get_celld()[(row_l,column_indices[0])].get_text().get_text().strip('%')))) | |
return table | |
### GROUED IVB CREATION ### | |
def group_ivb_update(df, | |
agg_list=['pitcher_id','pitcher_name','pitcher_hand','pitch_type','pitch_description']): | |
#df.loc[df['launch_speed']==0,'launch_speed'] = np.nan | |
grouped_ivb = df.groupby(agg_list).agg( | |
pitches = ('start_speed','count'), | |
start_speed = ('start_speed','mean'), | |
ivb = ('ivb','mean'), | |
hb = ('hb','mean'), | |
spin_rate = ('spin_rate','mean'), | |
vaa = ('vaa','mean'), | |
haa = ('haa','mean'), | |
horizontal_release = ('x0','mean'), | |
vertical_release = ('z0','mean'), | |
extension = ('extension','mean'), | |
spin_direction = ('spin_direction','mean'), | |
tj_stuff_plus = ('tj_stuff_plus','mean'), | |
swings = ('swings','sum'), | |
in_zone = ('in_zone','sum'), | |
out_zone = ('out_zone','sum'), | |
whiffs = ('whiffs','sum'), | |
zone_swing = ('zone_swing','sum'), | |
zone_contact = ('zone_contact','sum'), | |
ozone_swing = ('ozone_swing','sum'), | |
ozone_contact = ('ozone_contact','sum'), | |
woba_pred = ('woba_pred','sum'), | |
bip = ('launch_speed','count'), | |
).reset_index() | |
grouped_ivb['zone_contact_percent'] = [grouped_ivb.zone_contact[x]/grouped_ivb.zone_swing[x] if grouped_ivb.zone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
grouped_ivb['zone_swing_percent'] = [grouped_ivb.zone_swing[x]/grouped_ivb.in_zone[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
grouped_ivb['zone_percent'] = [grouped_ivb.in_zone[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
grouped_ivb['chase_percent'] = [grouped_ivb.ozone_swing[x]/(grouped_ivb.pitches[x] - grouped_ivb.in_zone[x]) if (grouped_ivb.pitches[x]- grouped_ivb.in_zone[x]) != 0 else np.nan for x in range(len(grouped_ivb))] | |
grouped_ivb['chase_contact'] = [grouped_ivb.ozone_contact[x]/grouped_ivb.ozone_swing[x] if grouped_ivb.ozone_swing[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
grouped_ivb['swing_percent'] = [grouped_ivb.swings[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
grouped_ivb['whiff_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.swings[x] if grouped_ivb.swings[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
grouped_ivb['swstr_rate'] = [grouped_ivb.whiffs[x]/grouped_ivb.pitches[x] if grouped_ivb.pitches[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
grouped_ivb['xwobacon'] = [grouped_ivb.woba_pred[x]/grouped_ivb.bip[x] if grouped_ivb.bip[x] != 0 else np.nan for x in range(len(grouped_ivb))] | |
return grouped_ivb | |
####LHH | |
def location_plot(df,ax,hand): | |
label_labels = df.sort_values(by=['prop','pitch_type'],ascending=[False,True]).pitch_description.unique() | |
j = 0 | |
for label in label_labels: | |
subset = df[(df['pitch_description'] == label)&(df['batter_hand'] == hand)] | |
print(label) | |
if len(subset) >= 5: | |
confidence_ellipse(subset['px'], subset['pz'], ax=ax,edgecolor = pitch_colours[label],n_std=1.5,facecolor= pitch_colours[label],alpha=0.3) | |
j=j+1 | |
else: | |
j=j+1 | |
pitch_location_group = df[(df['batter_hand'] == hand)].groupby(['pitch_description']).agg( | |
pitches = ('start_speed','count'), | |
px = ('px','mean'), | |
pz = ('pz','mean')).reset_index() | |
pitch_location_group['pitch_percent'] = pitch_location_group['pitches']/pitch_location_group['pitches'].sum() | |
## Location Plot | |
sns.scatterplot(ax=ax,x=pitch_location_group['px'], | |
y=pitch_location_group['pz'], | |
hue=pitch_location_group['pitch_description'], | |
palette=pitch_colours,ec='black', | |
s=pitch_location_group['pitch_percent']*750, | |
linewidth=2, | |
zorder=2) | |
ax.axis('square') | |
draw_line(ax,alpha_spot=0.75,catcher_p=False) | |
ax.axis('off') | |
ax.set_xlim((-2.75,2.75)) | |
ax.set_ylim((-0.5,5)) | |
if len(pitch_location_group['px'])>0: | |
ax.get_legend().remove() | |
ax.grid(False) | |
ax.set_title(f"Pitch Locations vs {hand}HB\n{pitch_location_group['pitches'].sum()} Pitches",fontdict=font_properties_titles) | |