from shiny import App, Inputs, Outputs, Session, reactive, render, req, ui import datasets from datasets import load_dataset import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import numpy as np from scipy.stats import gaussian_kde import matplotlib from matplotlib.ticker import MaxNLocator from matplotlib.gridspec import GridSpec from scipy.stats import zscore import math import matplotlib from adjustText import adjust_text import matplotlib.ticker as mtick from shinywidgets import output_widget, render_widget import pandas as pd from configure import base_url import shinyswatch from datetime import datetime, timedelta year_input = 2024 ### Import Datasets dataset = load_dataset('nesticot/mlb_data', data_files=['mlb_pitch_data_2024.csv' ]) dataset_train = dataset['train'] df_2023_mlb = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True) # from api_scraper import MLB_Scrape # mlb_stats = MLB_Scrape() # schedule_spring = mlb_stats.get_schedule(year_input=2024, # sport_id=1, # start_date='2024-01-01', # end_date='2024-12-31', # final=False, # regular=True, # spring=False) # schedule_spring = schedule_spring.drop_duplicates(subset=['game_id']) # schedule_spring = schedule_spring[(schedule_spring['date']==(datetime.today() - timedelta(hours=8)).date())] # data = mlb_stats.get_data(schedule_spring.game_id[:].values) # df_2023_new = mlb_stats.get_data_df(data_list = data) # df_2023 = pd.concat([df_2023_mlb,df_2023_new]) # df_2023 = df_2023.drop_duplicates(subset=['play_id'],keep='last') # df_2023_mlb = pd.concat([df_2023_mlb,df_2023_new]) ### Import Datasets dataset = load_dataset('nesticot/mlb_data', data_files=['aaa_pitch_data_2024.csv' ]) dataset_train = dataset['train'] df_2023_aaa = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True) df_2023_mlb['level'] = 'MLB' df_2023_aaa['level'] = 'AAA' df_2023 = pd.concat([df_2023_mlb,df_2023_aaa]) # df_2023 = pd.concat([df_2023_mlb]) #print(df_2023) ### Normalize Hit Locations import joblib swing_model = joblib.load('swing.joblib') no_swing_model = joblib.load('no_swing.joblib') # Now you can use the loaded model for prediction or any other task batter_dict = df_2023.sort_values('batter_name').set_index('batter_id')['batter_name'].to_dict() ## Make Predictions ## Define Features and Target features = ['px','pz','strikes','balls'] ## Set up 2023 Data for Prediction of Run Expectancy df_model_2023_no_swing = df_2023[df_2023.is_swing != 1].dropna(subset=features) df_model_2023_swing = df_2023[df_2023.is_swing == 1].dropna(subset=features) import xgboost as xgb df_model_2023_no_swing['y_pred'] = no_swing_model.predict(xgb.DMatrix(df_model_2023_no_swing[features])) df_model_2023_swing['y_pred'] = swing_model.predict(xgb.DMatrix(df_model_2023_swing[features])) df_model_2023 = pd.concat([df_model_2023_no_swing,df_model_2023_swing]) import joblib # # Dump the model to a file named 'model.joblib' # model = joblib.load('xtb_model.joblib') # ## Create a Dataset to calculate xRV/100 Pitches # df_model_2023['pitcher_name'] = df_model_2023.pitcher.map(pitcher_dict) # df_model_2023['player_team'] = df_model_2023.batter.map(team_player_dict) df_model_2023_group = df_model_2023.groupby(['batter_id','batter_name','level']).agg( pitches = ('start_speed','count'), y_pred = ('y_pred','mean'), ) ## Minimum 500 pitches faced #min_pitches = 300 #df_model_2023_group = df_model_2023_group[df_model_2023_group.pitches >= min_pitches] ## Calculate 20-80 Scale df_model_2023_group['decision_value'] = zscore(df_model_2023_group['y_pred']) df_model_2023_group['decision_value'] = (50+df_model_2023_group['decision_value']*10) ## Create a Dataset to calculate xRV/100 for Pitches Taken df_model_2023_group_no_swing = df_model_2023[df_model_2023.is_swing!=1].groupby(['batter_id','batter_name','level']).agg( pitches = ('start_speed','count'), y_pred = ('y_pred','mean') ) # Select Pitches with 500 total pitches df_model_2023_group_no_swing = df_model_2023_group_no_swing[df_model_2023_group_no_swing.index.get_level_values(1).isin(df_model_2023_group.index.get_level_values(1))] ## Calculate 20-80 Scale df_model_2023_group_no_swing['iz_awareness'] = zscore(df_model_2023_group_no_swing['y_pred']) df_model_2023_group_no_swing['iz_awareness'] = (((50+df_model_2023_group_no_swing['iz_awareness']*10))) ## Create a Dataset for xRV/100 Pitches Swung At df_model_2023_group_swing = df_model_2023[df_model_2023.is_swing==1].groupby(['batter_id','batter_name','level']).agg( pitches = ('start_speed','count'), y_pred = ('y_pred','mean') ) # Select Pitches with 500 total pitches df_model_2023_group_swing = df_model_2023_group_swing[df_model_2023_group_swing.index.get_level_values(1).isin(df_model_2023_group.index.get_level_values(1))] ## Calculate 20-80 Scale df_model_2023_group_swing['oz_awareness'] = zscore(df_model_2023_group_swing['y_pred']) df_model_2023_group_swing['oz_awareness'] = (((50+df_model_2023_group_swing['oz_awareness']*10))) ## Create df for plotting # Merge Datasets df_model_2023_group_swing_plus_no = df_model_2023_group_swing.merge(df_model_2023_group_no_swing,left_index=True,right_index=True,suffixes=['_swing','_no_swing']) df_model_2023_group_swing_plus_no['pitches'] = df_model_2023_group_swing_plus_no.pitches_swing + df_model_2023_group_swing_plus_no.pitches_no_swing # Calculate xRV/100 Pitches df_model_2023_group_swing_plus_no['y_pred'] = (df_model_2023_group_swing_plus_no.y_pred_swing*df_model_2023_group_swing_plus_no.pitches_swing + \ df_model_2023_group_swing_plus_no.y_pred_no_swing*df_model_2023_group_swing_plus_no.pitches_no_swing) / \ df_model_2023_group_swing_plus_no.pitches df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no.merge(right=df_model_2023_group, left_index=True, right_index=True, suffixes=['','_y']) df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no.reset_index() team_dict = df_2023.groupby(['batter_name'])[['batter_id','batter_team']].tail().set_index('batter_id')['batter_team'].to_dict() df_model_2023_group_swing_plus_no['team'] = df_model_2023_group_swing_plus_no['batter_id'].map(team_dict) df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no.set_index(['batter_id','batter_name','level','team']) df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no[df_model_2023_group_swing_plus_no['pitches']>=50] df_model_2023_group_swing_plus_no_copy = df_model_2023_group_swing_plus_no.copy() import matplotlib colour_palette = ['#FFB000','#648FFF','#785EF0', '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED'] cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[1],'#ffffff',colour_palette[0]]) cmap_hue2 = matplotlib.colors.LinearSegmentedColormap.from_list("",['#ffffff',colour_palette[0]]) from matplotlib.pyplot import text import inflect from scipy.stats import percentileofscore p = inflect.engine() def server(input,output,session): @output @render.plot(alt="hex_plot") @reactive.event(input.go, ignore_none=False) def scatter_plot(): if input.batter_id() is "": fig = plt.figure(figsize=(12, 12)) fig.text(s='Please Select a Batter',x=0.5,y=0.5) return print(df_model_2023_group_swing_plus_no_copy) print(input.level_list()) df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no_copy[df_model_2023_group_swing_plus_no_copy.index.get_level_values(2) == input.level_list()] print('this one') print(df_model_2023_group_swing_plus_no) batter_select_id = int(input.batter_id()) # batter_select_name = 'Edouard Julien' #max(1,int(input.pitch_min())) plot_min = max(50,int(input.pitch_min())) df_model_2023_group_swing_plus_no = df_model_2023_group_swing_plus_no[df_model_2023_group_swing_plus_no.pitches >= plot_min] ## Plot In-Zone vs Out-of-Zone Awareness sns.set_theme(style="whitegrid", palette="pastel") # fig, ax = plt.subplots(1,1,figsize=(12,12)) fig = plt.figure(figsize=(12,12)) gs = GridSpec(3, 3, height_ratios=[0.6,10,0.2], width_ratios=[0.25,0.50,0.25]) axheader = fig.add_subplot(gs[0, :]) #ax10 = fig.add_subplot(gs[1, 0]) ax = fig.add_subplot(gs[1, :]) # Subplot at the top-right position #ax12 = fig.add_subplot(gs[1, 2]) axfooter1 = fig.add_subplot(gs[-1, 0]) axfooter2 = fig.add_subplot(gs[-1, 1]) axfooter3 = fig.add_subplot(gs[-1, 2]) cmap_hue = matplotlib.colors.LinearSegmentedColormap.from_list("", [colour_palette[1],colour_palette[3],colour_palette[0]]) norm = plt.Normalize(df_model_2023_group_swing_plus_no['y_pred'].min()*100, df_model_2023_group_swing_plus_no['y_pred'].max()*100) sns.scatterplot( x=df_model_2023_group_swing_plus_no['y_pred_swing']*100, y=df_model_2023_group_swing_plus_no['y_pred_no_swing']*100, hue=df_model_2023_group_swing_plus_no['y_pred']*100, size=df_model_2023_group_swing_plus_no['pitches_swing']/df_model_2023_group_swing_plus_no['pitches'], palette=cmap_hue,ax=ax) sm = plt.cm.ScalarMappable(cmap=cmap_hue, norm=norm) cbar = plt.colorbar(sm, cax=axfooter2, orientation='horizontal',shrink=1) cbar.set_label('Decision Value xRV/100 Pitches',fontsize=12) ax.axhline(y=df_model_2023_group_swing_plus_no['y_pred_no_swing'].mean()*100,color='gray',linewidth=3,linestyle='dotted',alpha=0.4) ax.axvline(x=df_model_2023_group_swing_plus_no['y_pred_swing'].mean()*100,color='gray',linewidth=3,linestyle='dotted',alpha=0.4) x_lim_min = (math.floor((df_model_2023_group_swing_plus_no['y_pred_swing'].min()*100*100)/5))*5/100 x_lim_max = (math.ceil((df_model_2023_group_swing_plus_no['y_pred_swing'].max()*100*100)/5))*5/100 y_lim_min = (math.floor((df_model_2023_group_swing_plus_no['y_pred_no_swing'].min()*100*100)/5))*5/100 y_lim_max = (math.ceil((df_model_2023_group_swing_plus_no['y_pred_no_swing'].max()*100*100)/5))*5/100 ax.set_xlim(x_lim_min,x_lim_max) ax.set_ylim(y_lim_min,y_lim_max) ax.tick_params(axis='both', which='major', labelsize=12) ax.set_xlabel('Out-of-Zone Awareness Value xRV/100 Swings',fontsize=16) ax.set_ylabel('In-Zone Awareness Value xRV/100 Takes',fontsize=16) ax.get_legend().remove() ts=[] # thresh = 0.5 # thresh_2 = -0.9 # for i in range(len(df_model_2023_group_swing_plus_no)): # if (df_model_2023_group_swing_plus_no['y_pred'].values[i]*100) >= thresh or \ # (df_model_2023_group_swing_plus_no['y_pred'].values[i]*100) <= thresh_2 or \ # (str(df_model_2023_group_swing_plus_no.index.get_level_values(0).values[i]) in (input.name_list())) : # ts.append(ax.text(x=df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]*100, # y=df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]*100, # s=df_model_2023_group_swing_plus_no.index.get_level_values(1).values[i], # fontsize=8)) thresh = 0.5 thresh_2 = -0.9 for i in range(len(df_model_2023_group_swing_plus_no)): if (df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]) >= df_model_2023_group_swing_plus_no['y_pred_swing'].quantile(0.98) or \ (df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]) <= df_model_2023_group_swing_plus_no['y_pred_swing'].quantile(0.02) or \ (df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]) >= df_model_2023_group_swing_plus_no['y_pred_no_swing'].quantile(0.98) or \ (df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]) <= df_model_2023_group_swing_plus_no['y_pred_no_swing'].quantile(0.02) or \ (df_model_2023_group_swing_plus_no['y_pred'].values[i]) >= df_model_2023_group_swing_plus_no['y_pred'].quantile(0.98) or \ (df_model_2023_group_swing_plus_no['y_pred'].values[i]) <= df_model_2023_group_swing_plus_no['y_pred'].quantile(0.02) or \ (str(df_model_2023_group_swing_plus_no.index.get_level_values(0).values[i]) in (input.name_list())) : ts.append(ax.text(x=df_model_2023_group_swing_plus_no['y_pred_swing'].values[i]*100, y=df_model_2023_group_swing_plus_no['y_pred_no_swing'].values[i]*100, s=df_model_2023_group_swing_plus_no.index.get_level_values(1).values[i], fontsize=8)) ax.text(x=x_lim_min+abs(x_lim_min)*0.02,y=y_lim_max-abs(y_lim_max-y_lim_min)*0.02,s=f'Min. {plot_min} Pitches',fontsize='10',fontstyle='oblique',va='top', bbox=dict(facecolor='white', edgecolor='black')) # ax.text(x=x_lim_min+abs(x_lim_min)*0.02,y=y_lim_max-abs(y_lim_max-y_lim_min)*0.06,s=f'Labels for Batters with\nDescion Value xRV/100 > {thresh:.2f}\nDescion Value xRV/100 < {thresh_2:.2f}',fontsize='10',fontstyle='oblique',va='top', # bbox=dict(facecolor='white', edgecolor='black')) ax.text(x=x_lim_min+abs(x_lim_min)*0.02,y=y_lim_max-abs(y_lim_max-y_lim_min)*0.06,s=f'Point Size Represents Swing%',fontsize='10',fontstyle='oblique',va='top', bbox=dict(facecolor='white', edgecolor='black')) adjust_text(ts, arrowprops=dict(arrowstyle="-", color=colour_palette[4], lw=1),ax=ax) axfooter1.axis('off') axfooter3.axis('off') axheader.axis('off') axheader.text(s=f'{input.level_list()} In-Zone vs Out-of-Zone Awareness Value',fontsize=24,x=0.5,y=0,va='top',ha='center') axfooter1.text(0.05, -0.5,"By: Thomas Nestico\n @TJStats",ha='left', va='bottom',fontsize=12) axfooter3.text(0.95, -0.5, "Data: MLB",ha='right', va='bottom',fontsize=12) fig.subplots_adjust(left=0.01, right=0.99, top=0.975, bottom=0.025) @output @render.plot(alt="hex_plot") @reactive.event(input.go, ignore_none=False) def dv_plot(): if input.batter_id() is "": fig = plt.figure(figsize=(12, 12)) fig.text(s='Please Select a Batter',x=0.5,y=0.5) return player_select = int(input.batter_id()) player_select_full = batter_dict[player_select] df_will = df_model_2023[df_model_2023.batter_id == player_select].sort_values(by=['game_date','start_time']) df_will = df_will[df_will['level']==input.level_list()] # df_will['y_pred'] = df_will['y_pred'] - df_will['y_pred'].mean() win = max(1,int(input.rolling_window())) sns.set_theme(style="whitegrid", palette="pastel") #fig, ax = plt.subplots(1, 1, figsize=(10, 10),dpi=300) from matplotlib.gridspec import GridSpec # fig,ax = plt.subplots(figsize=(12, 12),dpi=150) fig = plt.figure(figsize=(12,12)) gs = GridSpec(3, 3, height_ratios=[0.3,10,0.2], width_ratios=[0.01,2,0.01]) axheader = fig.add_subplot(gs[0, :]) ax10 = fig.add_subplot(gs[1, 0]) ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position ax12 = fig.add_subplot(gs[1, 2]) axfooter1 = fig.add_subplot(gs[-1, :]) axheader.axis('off') ax10.axis('off') ax12.axis('off') axfooter1.axis('off') sns.lineplot( x= range(win,len(df_will.y_pred.rolling(window=win).mean())+1), y= df_will.y_pred.rolling(window=win).mean().dropna()*100, color=colour_palette[0],linewidth=2,ax=ax,zorder=100) ax.hlines(y=df_will.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[0],linestyle='--', label=f'{player_select_full} Average: {df_will.y_pred.mean()*100:.2} xRV/100 ({p.ordinal(int(np.around(percentileofscore(df_model_2023_group_swing_plus_no.y_pred,df_will.y_pred.mean(), kind="strict"))))} Percentile)') # ax.hlines(y=df_model_2023.y_pred.std()*100,xmin=win,xmax=len(df_will)) # sns.scatterplot( x= [976], # y= df_will.y_pred.rolling(window=win).mean().min()*100, # color=colour_palette[0],linewidth=2,ax=ax,zorder=100,s=100,edgecolor=colour_palette[7]) ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[1],linestyle='-.',alpha=1, label = f'{input.level_list()} Average: {df_model_2023_group_swing_plus_no.y_pred.mean()*100:.2f} xRV/100') ax.legend() hard_hit_dates = [df_model_2023_group_swing_plus_no.y_pred.quantile(0.9)*100, df_model_2023_group_swing_plus_no.y_pred.quantile(0.75)*100, df_model_2023_group_swing_plus_no.y_pred.quantile(0.25)*100, df_model_2023_group_swing_plus_no.y_pred.quantile(0.1)*100] ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.9)*100,xmin=win,xmax=len(df_will),color=colour_palette[2],linestyle='dotted',alpha=0.5,zorder=1) ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.75)*100,xmin=win,xmax=len(df_will),color=colour_palette[3],linestyle='dotted',alpha=0.5,zorder=1) ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.25)*100,xmin=win,xmax=len(df_will),color=colour_palette[4],linestyle='dotted',alpha=0.5,zorder=1) ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred.quantile(0.1)*100,xmin=win,xmax=len(df_will),color=colour_palette[5],linestyle='dotted',alpha=0.5,zorder=1) hard_hit_text = ['90th %','75th %','25th %','10th %'] for i, x in enumerate(hard_hit_dates): ax.text(min(win+win/1000,win+win+5), x ,hard_hit_text[i], rotation=0,va='center', ha='left', bbox=dict(facecolor='white',alpha=0.7, edgecolor=colour_palette[2+i], pad=2),zorder=1100) # # Annotate with an arrow # ax.annotate('June 6, 2023\nSeason Worst Decision Value', xy=(976, df_will.y_pred.rolling(window=win).mean().min()*100-0.03), # xytext=(976 - 150, df_will.y_pred.rolling(window=win).mean().min()*100 - 0.2), # arrowprops=dict(facecolor=colour_palette[7], shrink=0.01),zorder=150,fontsize=10, # bbox=dict(facecolor='white', edgecolor='black'),va='top') ax.set_xlim(win,len(df_will)) #ax.set_ylim(-1.5,1.5) ax.set_yticks([-1.5,-1,-0.5,0,0.5,1,1.5]) ax.set_xlabel('Pitch') ax.set_ylabel('Expected Run Value Added per 100 Pitches (xRV/100)') axheader.text(s=f'{player_select_full} - {win} Pitch Rolling Swing Decision Expected Run Value Added\n{input.level_list()} - {year_input}',x=0.5,y=-0.8,ha='center',va='bottom',fontsize=14) axfooter1.text(.05, 0.2, "By: Thomas Nestico",ha='left', va='bottom',fontsize=12) axfooter1.text(0.95, 0.2, "Data: MLB",ha='right', va='bottom',fontsize=12) fig.subplots_adjust(left=0.01, right=0.99, top=0.98, bottom=0.02) #fig.set_facecolor(colour_palette[5]) @output @render.plot(alt="hex_plot") @reactive.event(input.go, ignore_none=False) def iz_plot(): if input.batter_id() is "": fig = plt.figure(figsize=(12, 12)) fig.text(s='Please Select a Batter',x=0.5,y=0.5) return player_select = int(input.batter_id()) player_select_full = batter_dict[player_select] df_will = df_model_2023[df_model_2023.batter_id == player_select].sort_values(by=['game_date','start_time']) df_will = df_will[df_will['level']==input.level_list()] df_will = df_will[df_will['is_swing'] != 1] win = max(1,int(input.rolling_window())) sns.set_theme(style="whitegrid", palette="pastel") #fig, ax = plt.subplots(1, 1, figsize=(10, 10),dpi=300) from matplotlib.gridspec import GridSpec # fig,ax = plt.subplots(figsize=(12, 12),dpi=150) fig = plt.figure(figsize=(12,12)) gs = GridSpec(3, 3, height_ratios=[0.3,10,0.2], width_ratios=[0.01,2,0.01]) axheader = fig.add_subplot(gs[0, :]) ax10 = fig.add_subplot(gs[1, 0]) ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position ax12 = fig.add_subplot(gs[1, 2]) axfooter1 = fig.add_subplot(gs[-1, :]) axheader.axis('off') ax10.axis('off') ax12.axis('off') axfooter1.axis('off') sns.lineplot( x= range(win,len(df_will.y_pred.rolling(window=win).mean())+1), y= df_will.y_pred.rolling(window=win).mean().dropna()*100, color=colour_palette[0],linewidth=2,ax=ax,zorder=100) ax.hlines(y=df_will.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[0],linestyle='--', label=f'{player_select_full} Average: {df_will.y_pred.mean()*100:.2} xRV/100 ({p.ordinal(int(np.around(percentileofscore(df_model_2023_group_swing_plus_no.y_pred_no_swing,df_will.y_pred.mean(), kind="strict"))))} Percentile)') # ax.hlines(y=df_model_2023.y_pred_no_swing.std()*100,xmin=win,xmax=len(df_will)) # sns.scatterplot( x= [976], # y= df_will.y_pred.rolling(window=win).mean().min()*100, # color=colour_palette[0],linewidth=2,ax=ax,zorder=100,s=100,edgecolor=colour_palette[7]) ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[1],linestyle='-.',alpha=1, label = f'{input.level_list()} Average: {df_model_2023_group_swing_plus_no.y_pred_no_swing.mean()*100:.2} xRV/100') ax.legend() hard_hit_dates = [df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.9)*100, df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.75)*100, df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.25)*100, df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.1)*100] ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.9)*100,xmin=win,xmax=len(df_will),color=colour_palette[2],linestyle='dotted',alpha=0.5,zorder=1) ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.75)*100,xmin=win,xmax=len(df_will),color=colour_palette[3],linestyle='dotted',alpha=0.5,zorder=1) ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.25)*100,xmin=win,xmax=len(df_will),color=colour_palette[4],linestyle='dotted',alpha=0.5,zorder=1) ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_no_swing.quantile(0.1)*100,xmin=win,xmax=len(df_will),color=colour_palette[5],linestyle='dotted',alpha=0.5,zorder=1) hard_hit_text = ['90th %','75th %','25th %','10th %'] for i, x in enumerate(hard_hit_dates): ax.text(min(win+win/1000,win+win+5), x ,hard_hit_text[i], rotation=0,va='center', ha='left', bbox=dict(facecolor='white',alpha=0.7, edgecolor=colour_palette[2+i], pad=2),zorder=111) # # Annotate with an arrow # ax.annotate('June 6, 2023\nSeason Worst Decision Value', xy=(976, df_will.y_pred.rolling(window=win).mean().min()*100-0.03), # xytext=(976 - 150, df_will.y_pred.rolling(window=win).mean().min()*100 - 0.2), # arrowprops=dict(facecolor=colour_palette[7], shrink=0.01),zorder=150,fontsize=10, # bbox=dict(facecolor='white', edgecolor='black'),va='top') ax.set_xlim(win,len(df_will)) ax.set_yticks([1.0,1.5,2.0,2.5,3.0]) # ax.set_ylim(1,3) ax.set_xlabel('Takes') ax.set_ylabel('Expected Run Value Added per 100 Pitches (xRV/100)') axheader.text(s=f'{player_select_full} - {win} Pitch Rolling In-Zone Awareness Expected Run Value Added\n{input.level_list()} - {year_input}',x=0.5,y=-0.8,ha='center',va='bottom',fontsize=14) axfooter1.text(.05, 0.2, "By: Thomas Nestico",ha='left', va='bottom',fontsize=12) axfooter1.text(0.95, 0.2, "Data: MLB",ha='right', va='bottom',fontsize=12) fig.subplots_adjust(left=0.01, right=0.99, top=0.98, bottom=0.02) @output @render.plot(alt="hex_plot") @reactive.event(input.go, ignore_none=False) def oz_plot(): if input.batter_id() is "": fig = plt.figure(figsize=(12, 12)) fig.text(s='Please Select a Batter',x=0.5,y=0.5) return player_select = int(input.batter_id()) player_select_full = batter_dict[player_select] df_will = df_model_2023[df_model_2023.batter_id == player_select].sort_values(by=['game_date','start_time']) df_will = df_will[df_will['level']==input.level_list()] df_will = df_will[df_will['is_swing'] == 1] win = max(1,int(input.rolling_window())) sns.set_theme(style="whitegrid", palette="pastel") #fig, ax = plt.subplots(1, 1, figsize=(10, 10),dpi=300) from matplotlib.gridspec import GridSpec # fig,ax = plt.subplots(figsize=(12, 12),dpi=150) fig = plt.figure(figsize=(12,12)) gs = GridSpec(3, 3, height_ratios=[0.3,10,0.2], width_ratios=[0.01,2,0.01]) axheader = fig.add_subplot(gs[0, :]) ax10 = fig.add_subplot(gs[1, 0]) ax = fig.add_subplot(gs[1, 1]) # Subplot at the top-right position ax12 = fig.add_subplot(gs[1, 2]) axfooter1 = fig.add_subplot(gs[-1, :]) axheader.axis('off') ax10.axis('off') ax12.axis('off') axfooter1.axis('off') sns.lineplot( x= range(win,len(df_will.y_pred.rolling(window=win).mean())+1), y= df_will.y_pred.rolling(window=win).mean().dropna()*100, color=colour_palette[0],linewidth=2,ax=ax,zorder=100) ax.hlines(y=df_will.y_pred.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[0],linestyle='--', label=f'{player_select_full} Average: {df_will.y_pred.mean()*100:.2} xRV/100 ({p.ordinal(int(np.around(percentileofscore(df_model_2023_group_swing_plus_no.y_pred_swing,df_will.y_pred.mean(), kind="strict"))))} Percentile)') # ax.hlines(y=df_model_2023.y_pred_swing.std()*100,xmin=win,xmax=len(df_will)) # sns.scatterplot( x= [976], # y= df_will.y_pred.rolling(window=win).mean().min()*100, # color=colour_palette[0],linewidth=2,ax=ax,zorder=100,s=100,edgecolor=colour_palette[7]) ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.mean()*100,xmin=win,xmax=len(df_will),color=colour_palette[1],linestyle='-.',alpha=1, label = f'{input.level_list()} Average: {df_model_2023_group_swing_plus_no.y_pred_swing.mean()*100:.2} xRV/100') ax.legend() hard_hit_dates = [df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.9)*100, df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.75)*100, df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.25)*100, df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.1)*100] ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.9)*100,xmin=win,xmax=len(df_will),color=colour_palette[2],linestyle='dotted',alpha=0.5,zorder=1) ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.75)*100,xmin=win,xmax=len(df_will),color=colour_palette[3],linestyle='dotted',alpha=0.5,zorder=1) ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.25)*100,xmin=win,xmax=len(df_will),color=colour_palette[4],linestyle='dotted',alpha=0.5,zorder=1) ax.hlines(y=df_model_2023_group_swing_plus_no.y_pred_swing.quantile(0.1)*100,xmin=win,xmax=len(df_will),color=colour_palette[5],linestyle='dotted',alpha=0.5,zorder=1) hard_hit_text = ['90th %','75th %','25th %','10th %'] for i, x in enumerate(hard_hit_dates): ax.text(min(win+win/1000,win+win+5), x ,hard_hit_text[i], rotation=0,va='center', ha='left', bbox=dict(facecolor='white',alpha=0.7, edgecolor=colour_palette[2+i], pad=2),zorder=111) # # Annotate with an arrow # ax.annotate('June 6, 2023\nSeason Worst Decision Value', xy=(976, df_will.y_pred.rolling(window=win).mean().min()*100-0.03), # xytext=(976 - 150, df_will.y_pred.rolling(window=win).mean().min()*100 - 0.2), # arrowprops=dict(facecolor=colour_palette[7], shrink=0.01),zorder=150,fontsize=10, # bbox=dict(facecolor='white', edgecolor='black'),va='top') ax.set_xlim(win,len(df_will)) #ax.set_ylim(-3.25,-1.25) ax.set_yticks([-3.25,-2.75,-2.25,-1.75,-1.25]) ax.set_xlabel('Swing') ax.set_ylabel('Expected Run Value Added per 100 Pitches (xRV/100)') axheader.text(s=f'{player_select_full} - {win} Pitch Rolling Out of Zone Awareness Expected Run Value Added\n{input.level_list()} - {year_input}',x=0.5,y=-0.8,ha='center',va='bottom',fontsize=14) axfooter1.text(.05, 0.2, "By: Thomas Nestico",ha='left', va='bottom',fontsize=12) axfooter1.text(0.95, 0.2, "Data: MLB",ha='right', va='bottom',fontsize=12) fig.subplots_adjust(left=0.01, right=0.99, top=0.98, bottom=0.02) app = App(ui.page_fluid( ui.tags.base(href=base_url), ui.tags.div( {"style": "width:90%;margin: 0 auto;max-width: 1600px;"}, ui.tags.style( """ h4 { margin-top: 1em;font-size:35px; } h2{ font-size:25px; } """ ), shinyswatch.theme.simplex(), ui.tags.h4("TJStats"), ui.tags.i("Baseball Analytics and Visualizations"), # ui.markdown("""Support me on Patreon for Access to 2024 Apps1"""), # # ui.navset_tab( # # ui.nav_control( # # ui.a( # # "Home", # # href="home/" # # ), # # ), # # ui.nav_menu( # # "Batter Charts", # # ui.nav_control( # # ui.a( # # "Batting Rolling", # # href="rolling_batter/" # # ), # # ui.a( # # "Spray & Damage", # # href="https://nesticot-tjstats-site-spray.hf.space/" # # ), # # ui.a( # # "Decision Value", # # href="decision_value/" # # ), # # # ui.a( # # # "Damage Model", # # # href="damage_model/" # # # ), # # ui.a( # # "Batter Scatter", # # href="batter_scatter/" # # ), # # # ui.a( # # # "EV vs LA Plot", # # # href="ev_angle/" # # # ), # # ui.a( # # "Statcast Compare", # # href="statcast_compare/" # # ) # # ), # # ), # # ui.nav_menu( # # "Pitcher Charts", # # ui.nav_control( # # ui.a( # # "Pitcher Rolling", # # href="rolling_pitcher/" # # ), # # ui.a( # # "Pitcher Summary", # # href="pitching_summary_graphic_new/" # # ), # # ui.a( # # "Pitcher Scatter", # # href="pitcher_scatter/" # # ) # # ), # # )), # ui.navset_tab( # ui.nav_control( # ui.a( # "Home", # href="home/" # ), # ), # ui.nav_menu( # "Batter Charts", # ui.nav_control( # ui.a( # "Batting Rolling", # href="https://nesticot-tjstats-site-rolling-batter.hf.space/" # ), # ui.a( # "Spray", # href="https://nesticot-tjstats-site-spray.hf.space/" # ), # ui.a( # "Decision Value", # href="https://nesticot-tjstats-site-decision-value.hf.space/" # ), # ui.a( # "Damage Model", # href="https://nesticot-tjstats-site-damage.hf.space/" # ), # ui.a( # "Batter Scatter", # href="https://nesticot-tjstats-site-batter-scatter.hf.space/" # ), # ui.a( # "EV vs LA Plot", # href="https://nesticot-tjstats-site-ev-angle.hf.space/" # ), # ui.a( # "Statcast Compare", # href="https://nesticot-tjstats-site-statcast-compare.hf.space/" # ), # ui.a( # "MLB/MiLB Cards", # href="https://nesticot-tjstats-site-mlb-cards.hf.space/" # ) # ), # ), # ui.nav_menu( # "Pitcher Charts", # ui.nav_control( # ui.a( # "Pitcher Rolling", # href="https://nesticot-tjstats-site-rolling-pitcher.hf.space/" # ), # ui.a( # "Pitcher Summary", # href="https://nesticot-tjstats-site-pitching-summary-graphic-new.hf.space/" # ), # ui.a( # "Pitcher Scatter", # href="https://nesticot-tjstats-site-pitcher-scatter.hf.space" # ) # ), # )), ui.row( ui.layout_sidebar( ui.panel_sidebar( ui.input_numeric("pitch_min", "Select Pitch Minimum [min. 50] (Scatter)", value=100, min=50), ui.input_select("name_list", "Select Players to List (Scatter)", batter_dict, selectize=True, multiple=True), ui.input_select("batter_id", "Select Batter (Rolling)", batter_dict, width=1, size=1, selectize=True), ui.input_numeric("rolling_window", "Select Rolling Window (Rolling)", value=100, min=1), ui.input_select("level_list", "Select Level", ['MLB','AAA'], selected='MLB'), ui.input_action_button("go", "Generate",class_="btn-primary"), ), ui.panel_main( ui.navset_tab( ui.nav("Scatter Plot", ui.output_plot('scatter_plot', width='1000px', height='1000px')), ui.nav("Rolling DV", ui.output_plot('dv_plot', width='1000px', height='1000px')), ui.nav("Rolling In-Zone", ui.output_plot('iz_plot', width='1000px', height='1000px')), ui.nav("Rolling Out-of-Zone", ui.output_plot('oz_plot', width='1000px', height='1000px')) )) )),)),server)