Spaces:
Running
Running
from shiny import App, Inputs, Outputs, Session, reactive, render, req, ui | |
import datasets | |
from datasets import load_dataset | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import numpy as np | |
from scipy.stats import gaussian_kde | |
import matplotlib | |
from matplotlib.ticker import MaxNLocator | |
from matplotlib.gridspec import GridSpec | |
from scipy.stats import zscore | |
import math | |
import matplotlib | |
from adjustText import adjust_text | |
import matplotlib.ticker as mtick | |
from shinywidgets import output_widget, render_widget | |
import pandas as pd | |
from configure import base_url | |
import shinyswatch | |
from datetime import datetime, timedelta | |
year_input = 2024 | |
### Import Datasets | |
dataset = load_dataset('nesticot/mlb_data', data_files=['mlb_pitch_data_2024.csv' ]) | |
dataset_train = dataset['train'] | |
df_2023_mlb = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True) | |
# from api_scraper import MLB_Scrape | |
# mlb_stats = MLB_Scrape() | |
# schedule_spring = mlb_stats.get_schedule(year_input=2024, | |
# sport_id=1, | |
# start_date='2024-01-01', | |
# end_date='2024-12-31', | |
# final=False, | |
# regular=True, | |
# spring=False) | |
# schedule_spring = schedule_spring.drop_duplicates(subset=['game_id']) | |
# schedule_spring = schedule_spring[(schedule_spring['date']==(datetime.today() - timedelta(hours=8)).date())] | |
# data = mlb_stats.get_data(schedule_spring.game_id[:].values) | |
# df_2023_new = mlb_stats.get_data_df(data_list = data) | |
# df_2023 = pd.concat([df_2023_mlb,df_2023_new]) | |
# df_2023 = df_2023.drop_duplicates(subset=['play_id'],keep='last') | |
# df_2023_mlb = pd.concat([df_2023_mlb,df_2023_new]) | |
### Import Datasets | |
dataset = load_dataset('nesticot/mlb_data', data_files=['aaa_pitch_data_2024.csv' ]) | |
dataset_train = dataset['train'] | |
df_2023_aaa = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True) | |
df_2023_mlb['level'] = 'MLB' | |
df_2023_aaa['level'] = 'AAA' | |
df_2023 = pd.concat([df_2023_mlb,df_2023_aaa]) | |
# df_2023 = pd.concat([df_2023_mlb]) | |
#print(df_2023) | |
### Normalize Hit Locations | |
import joblib | |
swing_model = joblib.load('swing.joblib') | |
no_swing_model = joblib.load('no_swing.joblib') | |
# Now you can use the loaded model for prediction or any other task | |
batter_dict = df_2023.sort_values('batter_name').set_index('batter_id')['batter_name'].to_dict() | |
## Make Predictions | |
## Define Features and Target | |
features = ['px','pz','strikes','balls'] | |
## Set up 2023 Data for Prediction of Run Expectancy | |
df_model_2023_no_swing = df_2023[df_2023.is_swing != 1].dropna(subset=features) | |
df_model_2023_swing = df_2023[df_2023.is_swing == 1].dropna(subset=features) | |
import xgboost as xgb | |
df_model_2023_no_swing['y_pred'] = no_swing_model.predict(xgb.DMatrix(df_model_2023_no_swing[features])) | |
df_model_2023_swing['y_pred'] = swing_model.predict(xgb.DMatrix(df_model_2023_swing[features])) | |
df_model_2023 = pd.concat([df_model_2023_no_swing,df_model_2023_swing]) | |
import joblib | |
# # Dump the model to a file named 'model.joblib' | |
# model = joblib.load('xtb_model.joblib') | |
# ## Create a Dataset to calculate xRV/100 Pitches | |
# df_model_2023['pitcher_name'] = df_model_2023.pitcher.map(pitcher_dict) | |
# df_model_2023['player_team'] = df_model_2023.batter.map(team_player_dict) | |
df_model_2023_group = df_model_2023.groupby(['batter_id','batter_name','level']).agg( | |
pitches = ('start_speed','count'), | |
y_pred = ('y_pred','mean'), | |
) | |
## Minimum 500 pitches faced | |
#min_pitches = 300 | |
#df_model_2023_group = df_model_2023_group[df_model_2023_group.pitches >= min_pitches] | |
## Calculate 20-80 Scale | |
df_model_2023_group['decision_value'] = zscore(df_model_2023_group['y_pred']) | |
df_model_2023_group['decision_value'] = (50+df_model_2023_group['decision_value']*10) | |
## Create a Dataset to calculate xRV/100 for Pitches Taken | |
df_model_2023_group_no_swing = df_model_2023[df_model_2023.is_swing!=1].groupby(['batter_id','batter_name','level']).agg( | |
pitches = ('start_speed','count'), | |
y_pred = ('y_pred','mean') | |
) | |
# Select Pitches with 500 total pitches | |
df_model_2023_group_no_swing = df_model_2023_group_no_swing[df_model_2023_group_no_swing.index.get_level_values(1).isin(df_model_2023_group.index.get_level_values(1))] | |
## Calculate 20-80 Scale | |
df_model_2023_group_no_swing['iz_awareness'] = zscore(df_model_2023_group_no_swing['y_pred']) | |
df_model_2023_group_no_swing['iz_awareness'] = (((50+df_model_2023_group_no_swing['iz_awareness']*10))) | |
## Create a Dataset for xRV/100 Pitches Swung At | |
df_model_2023_group_swing = df_model_2023[df_model_2023.is_swing==1].groupby(['batter_id','batter_name','level']).agg( | |
pitches = ('start_speed','count'), | |
y_pred = ('y_pred','mean') | |
) | |
# Select Pitches with 500 total pitches | |
df_model_2023_group_swing = df_model_2023_group_swing[df_model_2023_group_swing.index.get_level_values(1).isin(df_model_2023_group.index.get_level_values(1))] | |
## Calculate 20-80 Scale | |
df_model_2023_group_swing['oz_awareness'] = zscore(df_model_2023_group_swing['y_pred']) | |
df_model_2023_group_swing['oz_awareness'] = (((50+df_model_2023_group_swing['oz_awareness']*10))) | |
## Create df for plotting | |
# Merge Datasets | |
df_model_2023_group_swing_plus_no = df_model_2023_group_swing.merge(df_model_2023_group_no_swing,left_index=True,right_index=True,suffixes=['_swing','_no_swing']) | |
df_model_2023_group_swing_plus_no['pitches'] = df_model_2023_group_swing_plus_no.pitches_swing + df_model_2023_group_swing_plus_no.pitches_no_swing | |
# Calculate xRV/100 Pitches | |
df_model_2023_group_swing_plus_no['y_pred'] = (df_model_2023_group_swing_plus_no.y_pred_swing*df_model_2023_group_swing_plus_no.pitches_swing + \ | |
df_model_2023_group_swing_plus_no.y_pred_no_swing*df_model_2023_group_swing_plus_no.pitches_no_swing) / \ | |
df_model_2023_group_swing_plus_no.pitches | |
df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no.merge(right=df_model_2023_group, | |
left_index=True, | |
right_index=True, | |
suffixes=['','_y']) | |
df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no.reset_index() | |
team_dict = df_2023.groupby(['batter_name'])[['batter_id','batter_team']].tail().set_index('batter_id')['batter_team'].to_dict() | |
df_model_2023_group_swing_plus_no['team'] = df_model_2023_group_swing_plus_no['batter_id'].map(team_dict) | |
df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no.set_index(['batter_id','batter_name','level','team']) | |
df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no[df_model_2023_group_swing_plus_no['pitches']>=50] | |
df_model_2023_group_swing_plus_no_copy = df_model_2023_group_swing_plus_no.copy() | |
import matplotlib | |
colour_palette = ['#FFB000','#648FFF','#785EF0', | |
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] | |
cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[1],'#ffffff',colour_palette[0]]) | |
cmap_hue2 = matplotlib.colors.LinearSegmentedColormap.from_list("",['#ffffff',colour_palette[0]]) | |
from matplotlib.pyplot import text | |
import inflect | |
from scipy.stats import percentileofscore | |
p = inflect.engine() | |
def server(input,output,session): | |
def scatter_plot(): | |
if input.batter_id() is "": | |
fig = plt.figure(figsize=(12, 12)) | |
fig.text(s='Please Select a Batter',x=0.5,y=0.5) | |
return | |
print(df_model_2023_group_swing_plus_no_copy) | |
print(input.level_list()) | |
df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no_copy[df_model_2023_group_swing_plus_no_copy.index.get_level_values(2) == input.level_list()] | |
print('this one') | |
print(df_model_2023_group_swing_plus_no) | |
batter_select_id = int(input.batter_id()) | |
# batter_select_name = 'Edouard Julien' | |
#max(1,int(input.pitch_min())) | |
plot_min = max(50,int(input.pitch_min())) | |
df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no[df_model_2023_group_swing_plus_no.pitches >= plot_min] | |
## Plot In-Zone vs Out-of-Zone Awareness | |
sns.set_theme(style="whitegrid", palette="pastel") | |
# fig, ax = plt.subplots(1,1,figsize=(12,12)) | |
fig = plt.figure(figsize=(12,12)) | |
gs = GridSpec(3, 3, height_ratios=[0.6,10,0.2], width_ratios=[0.25,0.50,0.25]) | |
axheader = fig.add_subplot(gs[0, :]) | |
#ax10 = fig.add_subplot(gs[1, 0]) | |
ax = fig.add_subplot(gs[1, :]) # Subplot at the top-right position | |
#ax12 = fig.add_subplot(gs[1, 2]) | |
axfooter1 = fig.add_subplot(gs[-1, 0]) | |
axfooter2 = fig.add_subplot(gs[-1, 1]) | |
axfooter3 = fig.add_subplot(gs[-1, 2]) | |
cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[1],colour_palette[3],colour_palette[0]]) | |
norm = plt.Normalize(df_model_2023_group_swing_plus_no['y_pred'].min()*100, df_model_2023_group_swing_plus_no['y_pred'].max()*100) | |
sns.scatterplot( | |
x=df_model_2023_group_swing_plus_no['y_pred_swing']*100, | |
y=df_model_2023_group_swing_plus_no['y_pred_no_swing']*100, | |
hue=df_model_2023_group_swing_plus_no['y_pred']*100, | |
size=df_model_2023_group_swing_plus_no['pitches_swing']/df_model_2023_group_swing_plus_no['pitches'], | |
palette=cmap_hue,ax=ax) | |
sm = plt.cm.ScalarMappable(cmap=cmap_hue, norm=norm) | |
cbar = plt.colorbar(sm, cax=axfooter2, orientation='horizontal',shrink=1) | |
cbar.set_label('Decision Value xRV/100 Pitches',fontsize=12) | |
ax.axhline(y=df_model_2023_group_swing_plus_no['y_pred_no_swing'].mean()*100,color='gray',linewidth=3,linestyle='dotted',alpha=0.4) | |
ax.axvline(x=df_model_2023_group_swing_plus_no['y_pred_swing'].mean()*100,color='gray',linewidth=3,linestyle='dotted',alpha=0.4) | |
x_lim_min = (math.floor((df_model_2023_group_swing_plus_no['y_pred_swing'].min()*100*100)/5))*5/100 | |
x_lim_max = (math.ceil((df_model_2023_group_swing_plus_no['y_pred_swing'].max()*100*100)/5))*5/100 | |
y_lim_min = (math.floor((df_model_2023_group_swing_plus_no['y_pred_no_swing'].min()*100*100)/5))*5/100 | |
y_lim_max = (math.ceil((df_model_2023_group_swing_plus_no['y_pred_no_swing'].max()*100*100)/5))*5/100 | |
ax.set_xlim(x_lim_min,x_lim_max) | |
ax.set_ylim(y_lim_min,y_lim_max) | |
ax.tick_params(axis='both', which='major', labelsize=12) | |
ax.set_xlabel('Out-of-Zone Awareness Value xRV/100 Swings',fontsize=16) | |
ax.set_ylabel('In-Zone Awareness Value xRV/100 Takes',fontsize=16) | |
ax.get_legend().remove() | |
ts=[] | |
# thresh = 0.5 | |
# thresh_2 = -0.9 | |
# for i in range(len(df_model_2023_group_swing_plus_no)): | |
# if (df_model_2023_group_swing_plus_no['y_pred'].values[i]*100) >= thresh or \ | |
# (df_model_2023_group_swing_plus_no['y_pred'].values[i]*100) <= thresh_2 or \ | |
# (str(df_model_2023_group_swing_plus_no.index.get_level_values(0).values[i]) in (input.name_list())) : | |
# ts.append(ax.text(x=df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]*100, | |
# y=df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]*100, | |
# s=df_model_2023_group_swing_plus_no.index.get_level_values(1).values[i], | |
# fontsize=8)) | |
thresh = 0.5 | |
thresh_2 = -0.9 | |
for i in range(len(df_model_2023_group_swing_plus_no)): | |
if (df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]) >= df_model_2023_group_swing_plus_no['y_pred_swing'].quantile(0.98) or \ | |
(df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]) <= df_model_2023_group_swing_plus_no['y_pred_swing'].quantile(0.02) or \ | |
(df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]) >= df_model_2023_group_swing_plus_no['y_pred_no_swing'].quantile(0.98) or \ | |
(df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]) <= df_model_2023_group_swing_plus_no['y_pred_no_swing'].quantile(0.02) or \ | |
(df_model_2023_group_swing_plus_no['y_pred'].values[i]) >= df_model_2023_group_swing_plus_no['y_pred'].quantile(0.98) or \ | |
(df_model_2023_group_swing_plus_no['y_pred'].values[i]) <= df_model_2023_group_swing_plus_no['y_pred'].quantile(0.02) or \ | |
(str(df_model_2023_group_swing_plus_no.index.get_level_values(0).values[i]) in (input.name_list())) : | |
ts.append(ax.text(x=df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]*100, | |
y=df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]*100, | |
s=df_model_2023_group_swing_plus_no.index.get_level_values(1).values[i], | |
fontsize=8)) | |
ax.text(x=x_lim_min+abs(x_lim_min)*0.02,y=y_lim_max-abs(y_lim_max-y_lim_min)*0.02,s=f'Min. {plot_min} Pitches',fontsize='10',fontstyle='oblique',va='top', | |
bbox=dict(facecolor='white', edgecolor='black')) | |
# ax.text(x=x_lim_min+abs(x_lim_min)*0.02,y=y_lim_max-abs(y_lim_max-y_lim_min)*0.06,s=f'Labels for Batters with\nDescion Value xRV/100 > {thresh:.2f}\nDescion Value xRV/100 < {thresh_2:.2f}',fontsize='10',fontstyle='oblique',va='top', | |
# bbox=dict(facecolor='white', edgecolor='black')) | |
ax.text(x=x_lim_min+abs(x_lim_min)*0.02,y=y_lim_max-abs(y_lim_max-y_lim_min)*0.06,s=f'Point Size Represents Swing%',fontsize='10',fontstyle='oblique',va='top', | |
bbox=dict(facecolor='white', edgecolor='black')) | |
adjust_text(ts, | |
arrowprops=dict(arrowstyle="-", color=colour_palette[4], lw=1),ax=ax) | |
axfooter1.axis('off') | |
axfooter3.axis('off') | |
axheader.axis('off') | |
axheader.text(s=f'{input.level_list()} In-Zone vs Out-of-Zone Awareness Value',fontsize=24,x=0.5,y=0,va='top',ha='center') | |
axfooter1.text(0.05, -0.5,"By: Thomas Nestico\n @TJStats",ha='left', va='bottom',fontsize=12) | |
axfooter3.text(0.95, -0.5, "Data: MLB",ha='right', va='bottom',fontsize=12) | |
fig.subplots_adjust(left=0.01, right=0.99, top=0.975, bottom=0.025) | |
def dv_plot(): | |
if input.batter_id() is "": | |
fig = plt.figure(figsize=(12, 12)) | |
fig.text(s='Please Select a Batter',x=0.5,y=0.5) | |
return | |
player_select = int(input.batter_id()) | |
player_select_full = batter_dict[player_select] | |
df_will = df_model_2023[df_model_2023.batter_id == player_select].sort_values(by=['game_date','start_time']) | |
df_will = df_will[df_will['level']==input.level_list()] | |
# df_will['y_pred'] = df_will['y_pred'] - df_will['y_pred'].mean() | |
win = max(1,int(input.rolling_window())) | |
sns.set_theme(style="whitegrid", palette="pastel") | |
#fig, ax = plt.subplots(1, 1, figsize=(10, 10),dpi=300) | |
from matplotlib.gridspec import GridSpec | |
# fig,ax = plt.subplots(figsize=(12, 12),dpi=150) | |
fig = plt.figure(figsize=(12,12)) | |
gs = GridSpec(3, 3, height_ratios=[0.3,10,0.2], width_ratios=[0.01,2,0.01]) | |
axheader = fig.add_subplot(gs[0, :]) | |
ax10 = fig.add_subplot(gs[1, 0]) | |
ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position | |
ax12 = fig.add_subplot(gs[1, 2]) | |
axfooter1 = fig.add_subplot(gs[-1, :]) | |
axheader.axis('off') | |
ax10.axis('off') | |
ax12.axis('off') | |
axfooter1.axis('off') | |
sns.lineplot( x= range(win,len(df_will.y_pred.rolling(window=win).mean())+1), | |
y= df_will.y_pred.rolling(window=win).mean().dropna()*100, | |
color=colour_palette[0],linewidth=2,ax=ax,zorder=100) | |
ax.hlines(y=df_will.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[0],linestyle='--', | |
label=f'{player_select_full} Average: {df_will.y_pred.mean()*100:.2} xRV/100 ({p.ordinal(int(np.around(percentileofscore(df_model_2023_group_swing_plus_no.y_pred,df_will.y_pred.mean(), kind="strict"))))} Percentile)') | |
# ax.hlines(y=df_model_2023.y_pred.std()*100,xmin=win,xmax=len(df_will)) | |
# sns.scatterplot( x= [976], | |
# y= df_will.y_pred.rolling(window=win).mean().min()*100, | |
# color=colour_palette[0],linewidth=2,ax=ax,zorder=100,s=100,edgecolor=colour_palette[7]) | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[1],linestyle='-.',alpha=1, | |
label = f'{input.level_list()} Average: {df_model_2023_group_swing_plus_no.y_pred.mean()*100:.2f} xRV/100') | |
ax.legend() | |
hard_hit_dates = [df_model_2023_group_swing_plus_no.y_pred.quantile(0.9)*100, | |
df_model_2023_group_swing_plus_no.y_pred.quantile(0.75)*100, | |
df_model_2023_group_swing_plus_no.y_pred.quantile(0.25)*100, | |
df_model_2023_group_swing_plus_no.y_pred.quantile(0.1)*100] | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.9)*100,xmin=win,xmax=len(df_will),color=colour_palette[2],linestyle='dotted',alpha=0.5,zorder=1) | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.75)*100,xmin=win,xmax=len(df_will),color=colour_palette[3],linestyle='dotted',alpha=0.5,zorder=1) | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.25)*100,xmin=win,xmax=len(df_will),color=colour_palette[4],linestyle='dotted',alpha=0.5,zorder=1) | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.1)*100,xmin=win,xmax=len(df_will),color=colour_palette[5],linestyle='dotted',alpha=0.5,zorder=1) | |
hard_hit_text = ['90th %','75th %','25th %','10th %'] | |
for i, x in enumerate(hard_hit_dates): | |
ax.text(min(win+win/1000,win+win+5), x ,hard_hit_text[i], rotation=0,va='center', ha='left', | |
bbox=dict(facecolor='white',alpha=0.7, edgecolor=colour_palette[2+i], pad=2),zorder=1100) | |
# # Annotate with an arrow | |
# ax.annotate('June 6, 2023\nSeason Worst Decision Value', xy=(976, df_will.y_pred.rolling(window=win).mean().min()*100-0.03), | |
# xytext=(976 - 150, df_will.y_pred.rolling(window=win).mean().min()*100 - 0.2), | |
# arrowprops=dict(facecolor=colour_palette[7], shrink=0.01),zorder=150,fontsize=10, | |
# bbox=dict(facecolor='white', edgecolor='black'),va='top') | |
ax.set_xlim(win,len(df_will)) | |
#ax.set_ylim(-1.5,1.5) | |
ax.set_yticks([-1.5,-1,-0.5,0,0.5,1,1.5]) | |
ax.set_xlabel('Pitch') | |
ax.set_ylabel('Expected Run Value Added per 100 Pitches (xRV/100)') | |
axheader.text(s=f'{player_select_full} - {win} Pitch Rolling Swing Decision Expected Run Value Added\n{input.level_list()} - {year_input}',x=0.5,y=-0.8,ha='center',va='bottom',fontsize=14) | |
axfooter1.text(.05, 0.2, "By: Thomas Nestico",ha='left', va='bottom',fontsize=12) | |
axfooter1.text(0.95, 0.2, "Data: MLB",ha='right', va='bottom',fontsize=12) | |
fig.subplots_adjust(left=0.01, right=0.99, top=0.98, bottom=0.02) | |
#fig.set_facecolor(colour_palette[5]) | |
def iz_plot(): | |
if input.batter_id() is "": | |
fig = plt.figure(figsize=(12, 12)) | |
fig.text(s='Please Select a Batter',x=0.5,y=0.5) | |
return | |
player_select = int(input.batter_id()) | |
player_select_full = batter_dict[player_select] | |
df_will = df_model_2023[df_model_2023.batter_id == player_select].sort_values(by=['game_date','start_time']) | |
df_will = df_will[df_will['level']==input.level_list()] | |
df_will = df_will[df_will['is_swing'] != 1] | |
win = max(1,int(input.rolling_window())) | |
sns.set_theme(style="whitegrid", palette="pastel") | |
#fig, ax = plt.subplots(1, 1, figsize=(10, 10),dpi=300) | |
from matplotlib.gridspec import GridSpec | |
# fig,ax = plt.subplots(figsize=(12, 12),dpi=150) | |
fig = plt.figure(figsize=(12,12)) | |
gs = GridSpec(3, 3, height_ratios=[0.3,10,0.2], width_ratios=[0.01,2,0.01]) | |
axheader = fig.add_subplot(gs[0, :]) | |
ax10 = fig.add_subplot(gs[1, 0]) | |
ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position | |
ax12 = fig.add_subplot(gs[1, 2]) | |
axfooter1 = fig.add_subplot(gs[-1, :]) | |
axheader.axis('off') | |
ax10.axis('off') | |
ax12.axis('off') | |
axfooter1.axis('off') | |
sns.lineplot( x= range(win,len(df_will.y_pred.rolling(window=win).mean())+1), | |
y= df_will.y_pred.rolling(window=win).mean().dropna()*100, | |
color=colour_palette[0],linewidth=2,ax=ax,zorder=100) | |
ax.hlines(y=df_will.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[0],linestyle='--', | |
label=f'{player_select_full} Average: {df_will.y_pred.mean()*100:.2} xRV/100 ({p.ordinal(int(np.around(percentileofscore(df_model_2023_group_swing_plus_no.y_pred_no_swing,df_will.y_pred.mean(), kind="strict"))))} Percentile)') | |
# ax.hlines(y=df_model_2023.y_pred_no_swing.std()*100,xmin=win,xmax=len(df_will)) | |
# sns.scatterplot( x= [976], | |
# y= df_will.y_pred.rolling(window=win).mean().min()*100, | |
# color=colour_palette[0],linewidth=2,ax=ax,zorder=100,s=100,edgecolor=colour_palette[7]) | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[1],linestyle='-.',alpha=1, | |
label = f'{input.level_list()} Average: {df_model_2023_group_swing_plus_no.y_pred_no_swing.mean()*100:.2} xRV/100') | |
ax.legend() | |
hard_hit_dates = [df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.9)*100, | |
df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.75)*100, | |
df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.25)*100, | |
df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.1)*100] | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.9)*100,xmin=win,xmax=len(df_will),color=colour_palette[2],linestyle='dotted',alpha=0.5,zorder=1) | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.75)*100,xmin=win,xmax=len(df_will),color=colour_palette[3],linestyle='dotted',alpha=0.5,zorder=1) | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.25)*100,xmin=win,xmax=len(df_will),color=colour_palette[4],linestyle='dotted',alpha=0.5,zorder=1) | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.1)*100,xmin=win,xmax=len(df_will),color=colour_palette[5],linestyle='dotted',alpha=0.5,zorder=1) | |
hard_hit_text = ['90th %','75th %','25th %','10th %'] | |
for i, x in enumerate(hard_hit_dates): | |
ax.text(min(win+win/1000,win+win+5), x ,hard_hit_text[i], rotation=0,va='center', ha='left', | |
bbox=dict(facecolor='white',alpha=0.7, edgecolor=colour_palette[2+i], pad=2),zorder=111) | |
# # Annotate with an arrow | |
# ax.annotate('June 6, 2023\nSeason Worst Decision Value', xy=(976, df_will.y_pred.rolling(window=win).mean().min()*100-0.03), | |
# xytext=(976 - 150, df_will.y_pred.rolling(window=win).mean().min()*100 - 0.2), | |
# arrowprops=dict(facecolor=colour_palette[7], shrink=0.01),zorder=150,fontsize=10, | |
# bbox=dict(facecolor='white', edgecolor='black'),va='top') | |
ax.set_xlim(win,len(df_will)) | |
ax.set_yticks([1.0,1.5,2.0,2.5,3.0]) | |
# ax.set_ylim(1,3) | |
ax.set_xlabel('Takes') | |
ax.set_ylabel('Expected Run Value Added per 100 Pitches (xRV/100)') | |
axheader.text(s=f'{player_select_full} - {win} Pitch Rolling In-Zone Awareness Expected Run Value Added\n{input.level_list()} - {year_input}',x=0.5,y=-0.8,ha='center',va='bottom',fontsize=14) | |
axfooter1.text(.05, 0.2, "By: Thomas Nestico",ha='left', va='bottom',fontsize=12) | |
axfooter1.text(0.95, 0.2, "Data: MLB",ha='right', va='bottom',fontsize=12) | |
fig.subplots_adjust(left=0.01, right=0.99, top=0.98, bottom=0.02) | |
def oz_plot(): | |
if input.batter_id() is "": | |
fig = plt.figure(figsize=(12, 12)) | |
fig.text(s='Please Select a Batter',x=0.5,y=0.5) | |
return | |
player_select = int(input.batter_id()) | |
player_select_full = batter_dict[player_select] | |
df_will = df_model_2023[df_model_2023.batter_id == player_select].sort_values(by=['game_date','start_time']) | |
df_will = df_will[df_will['level']==input.level_list()] | |
df_will = df_will[df_will['is_swing'] == 1] | |
win = max(1,int(input.rolling_window())) | |
sns.set_theme(style="whitegrid", palette="pastel") | |
#fig, ax = plt.subplots(1, 1, figsize=(10, 10),dpi=300) | |
from matplotlib.gridspec import GridSpec | |
# fig,ax = plt.subplots(figsize=(12, 12),dpi=150) | |
fig = plt.figure(figsize=(12,12)) | |
gs = GridSpec(3, 3, height_ratios=[0.3,10,0.2], width_ratios=[0.01,2,0.01]) | |
axheader = fig.add_subplot(gs[0, :]) | |
ax10 = fig.add_subplot(gs[1, 0]) | |
ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position | |
ax12 = fig.add_subplot(gs[1, 2]) | |
axfooter1 = fig.add_subplot(gs[-1, :]) | |
axheader.axis('off') | |
ax10.axis('off') | |
ax12.axis('off') | |
axfooter1.axis('off') | |
sns.lineplot( x= range(win,len(df_will.y_pred.rolling(window=win).mean())+1), | |
y= df_will.y_pred.rolling(window=win).mean().dropna()*100, | |
color=colour_palette[0],linewidth=2,ax=ax,zorder=100) | |
ax.hlines(y=df_will.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[0],linestyle='--', | |
label=f'{player_select_full} Average: {df_will.y_pred.mean()*100:.2} xRV/100 ({p.ordinal(int(np.around(percentileofscore(df_model_2023_group_swing_plus_no.y_pred_swing,df_will.y_pred.mean(), kind="strict"))))} Percentile)') | |
# ax.hlines(y=df_model_2023.y_pred_swing.std()*100,xmin=win,xmax=len(df_will)) | |
# sns.scatterplot( x= [976], | |
# y= df_will.y_pred.rolling(window=win).mean().min()*100, | |
# color=colour_palette[0],linewidth=2,ax=ax,zorder=100,s=100,edgecolor=colour_palette[7]) | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[1],linestyle='-.',alpha=1, | |
label = f'{input.level_list()} Average: {df_model_2023_group_swing_plus_no.y_pred_swing.mean()*100:.2} xRV/100') | |
ax.legend() | |
hard_hit_dates = [df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.9)*100, | |
df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.75)*100, | |
df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.25)*100, | |
df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.1)*100] | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.9)*100,xmin=win,xmax=len(df_will),color=colour_palette[2],linestyle='dotted',alpha=0.5,zorder=1) | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.75)*100,xmin=win,xmax=len(df_will),color=colour_palette[3],linestyle='dotted',alpha=0.5,zorder=1) | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.25)*100,xmin=win,xmax=len(df_will),color=colour_palette[4],linestyle='dotted',alpha=0.5,zorder=1) | |
ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.1)*100,xmin=win,xmax=len(df_will),color=colour_palette[5],linestyle='dotted',alpha=0.5,zorder=1) | |
hard_hit_text = ['90th %','75th %','25th %','10th %'] | |
for i, x in enumerate(hard_hit_dates): | |
ax.text(min(win+win/1000,win+win+5), x ,hard_hit_text[i], rotation=0,va='center', ha='left', | |
bbox=dict(facecolor='white',alpha=0.7, edgecolor=colour_palette[2+i], pad=2),zorder=111) | |
# # Annotate with an arrow | |
# ax.annotate('June 6, 2023\nSeason Worst Decision Value', xy=(976, df_will.y_pred.rolling(window=win).mean().min()*100-0.03), | |
# xytext=(976 - 150, df_will.y_pred.rolling(window=win).mean().min()*100 - 0.2), | |
# arrowprops=dict(facecolor=colour_palette[7], shrink=0.01),zorder=150,fontsize=10, | |
# bbox=dict(facecolor='white', edgecolor='black'),va='top') | |
ax.set_xlim(win,len(df_will)) | |
#ax.set_ylim(-3.25,-1.25) | |
ax.set_yticks([-3.25,-2.75,-2.25,-1.75,-1.25]) | |
ax.set_xlabel('Swing') | |
ax.set_ylabel('Expected Run Value Added per 100 Pitches (xRV/100)') | |
axheader.text(s=f'{player_select_full} - {win} Pitch Rolling Out of Zone Awareness Expected Run Value Added\n{input.level_list()} - {year_input}',x=0.5,y=-0.8,ha='center',va='bottom',fontsize=14) | |
axfooter1.text(.05, 0.2, "By: Thomas Nestico",ha='left', va='bottom',fontsize=12) | |
axfooter1.text(0.95, 0.2, "Data: MLB",ha='right', va='bottom',fontsize=12) | |
fig.subplots_adjust(left=0.01, right=0.99, top=0.98, bottom=0.02) | |
app = App(ui.page_fluid( | |
ui.tags.base(href=base_url), | |
ui.tags.div( | |
{"style": "width:90%;margin: 0 auto;max-width: 1600px;"}, | |
ui.tags.style( | |
""" | |
h4 { | |
margin-top: 1em;font-size:35px; | |
} | |
h2{ | |
font-size:25px; | |
} | |
""" | |
), | |
shinyswatch.theme.simplex(), | |
ui.tags.h4("TJStats"), | |
ui.tags.i("Baseball Analytics and Visualizations"), | |
# ui.markdown("""<a href='https://www.patreon.com/tj_stats'>Support me on Patreon for Access to 2024 Apps</a><sup>1</sup>"""), | |
# # ui.navset_tab( | |
# # ui.nav_control( | |
# # ui.a( | |
# # "Home", | |
# # href="home/" | |
# # ), | |
# # ), | |
# # ui.nav_menu( | |
# # "Batter Charts", | |
# # ui.nav_control( | |
# # ui.a( | |
# # "Batting Rolling", | |
# # href="rolling_batter/" | |
# # ), | |
# # ui.a( | |
# # "Spray & Damage", | |
# # href="https://nesticot-tjstats-site-spray.hf.space/" | |
# # ), | |
# # ui.a( | |
# # "Decision Value", | |
# # href="decision_value/" | |
# # ), | |
# # # ui.a( | |
# # # "Damage Model", | |
# # # href="damage_model/" | |
# # # ), | |
# # ui.a( | |
# # "Batter Scatter", | |
# # href="batter_scatter/" | |
# # ), | |
# # # ui.a( | |
# # # "EV vs LA Plot", | |
# # # href="ev_angle/" | |
# # # ), | |
# # ui.a( | |
# # "Statcast Compare", | |
# # href="statcast_compare/" | |
# # ) | |
# # ), | |
# # ), | |
# # ui.nav_menu( | |
# # "Pitcher Charts", | |
# # ui.nav_control( | |
# # ui.a( | |
# # "Pitcher Rolling", | |
# # href="rolling_pitcher/" | |
# # ), | |
# # ui.a( | |
# # "Pitcher Summary", | |
# # href="pitching_summary_graphic_new/" | |
# # ), | |
# # ui.a( | |
# # "Pitcher Scatter", | |
# # href="pitcher_scatter/" | |
# # ) | |
# # ), | |
# # )), | |
# ui.navset_tab( | |
# ui.nav_control( | |
# ui.a( | |
# "Home", | |
# href="home/" | |
# ), | |
# ), | |
# ui.nav_menu( | |
# "Batter Charts", | |
# ui.nav_control( | |
# ui.a( | |
# "Batting Rolling", | |
# href="https://nesticot-tjstats-site-rolling-batter.hf.space/" | |
# ), | |
# ui.a( | |
# "Spray", | |
# href="https://nesticot-tjstats-site-spray.hf.space/" | |
# ), | |
# ui.a( | |
# "Decision Value", | |
# href="https://nesticot-tjstats-site-decision-value.hf.space/" | |
# ), | |
# ui.a( | |
# "Damage Model", | |
# href="https://nesticot-tjstats-site-damage.hf.space/" | |
# ), | |
# ui.a( | |
# "Batter Scatter", | |
# href="https://nesticot-tjstats-site-batter-scatter.hf.space/" | |
# ), | |
# ui.a( | |
# "EV vs LA Plot", | |
# href="https://nesticot-tjstats-site-ev-angle.hf.space/" | |
# ), | |
# ui.a( | |
# "Statcast Compare", | |
# href="https://nesticot-tjstats-site-statcast-compare.hf.space/" | |
# ), | |
# ui.a( | |
# "MLB/MiLB Cards", | |
# href="https://nesticot-tjstats-site-mlb-cards.hf.space/" | |
# ) | |
# ), | |
# ), | |
# ui.nav_menu( | |
# "Pitcher Charts", | |
# ui.nav_control( | |
# ui.a( | |
# "Pitcher Rolling", | |
# href="https://nesticot-tjstats-site-rolling-pitcher.hf.space/" | |
# ), | |
# ui.a( | |
# "Pitcher Summary", | |
# href="https://nesticot-tjstats-site-pitching-summary-graphic-new.hf.space/" | |
# ), | |
# ui.a( | |
# "Pitcher Scatter", | |
# href="https://nesticot-tjstats-site-pitcher-scatter.hf.space" | |
# ) | |
# ), | |
# )), | |
ui.row( | |
ui.layout_sidebar( | |
ui.panel_sidebar( | |
ui.input_numeric("pitch_min", | |
"Select Pitch Minimum [min. 50] (Scatter)", | |
value=100, | |
min=50), | |
ui.input_select("name_list", | |
"Select Players to List (Scatter)", | |
batter_dict, | |
selectize=True, | |
multiple=True), | |
ui.input_select("batter_id", | |
"Select Batter (Rolling)", | |
batter_dict, | |
width=1, | |
size=1, | |
selectize=True), | |
ui.input_numeric("rolling_window", | |
"Select Rolling Window (Rolling)", | |
value=100, | |
min=1), | |
ui.input_select("level_list", | |
"Select Level", | |
['MLB','AAA'], | |
selected='MLB'), | |
ui.input_action_button("go", "Generate",class_="btn-primary"), | |
), | |
ui.panel_main( | |
ui.navset_tab( | |
ui.nav("Scatter Plot", | |
ui.output_plot('scatter_plot', | |
width='1000px', | |
height='1000px')), | |
ui.nav("Rolling DV", | |
ui.output_plot('dv_plot', | |
width='1000px', | |
height='1000px')), | |
ui.nav("Rolling In-Zone", | |
ui.output_plot('iz_plot', | |
width='1000px', | |
height='1000px')), | |
ui.nav("Rolling Out-of-Zone", | |
ui.output_plot('oz_plot', | |
width='1000px', | |
height='1000px')) | |
)) | |
)),)),server) |