nesticot commited on
Commit
08f1132
·
verified ·
1 Parent(s): 0db14a4

Upload 22 files

Browse files
functions/__pycache__/df_update.cpython-39.pyc ADDED
Binary file (14.1 kB). View file
 
functions/__pycache__/pitch_summary_functions.cpython-39.pyc ADDED
Binary file (33.8 kB). View file
 
functions/df_update.py ADDED
@@ -0,0 +1,472 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import polars as pl
2
+ import numpy as np
3
+ import joblib
4
+
5
+ loaded_model = joblib.load('joblib_model/barrel_model.joblib')
6
+ in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
7
+ attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
8
+ xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
9
+ px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
10
+ pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
11
+
12
+
13
+ class df_update:
14
+ def __init__(self):
15
+ pass
16
+
17
+ def update(self, df_clone: pl.DataFrame):
18
+
19
+ df = df_clone.clone()
20
+ # Assuming px_model is defined and df is your DataFrame
21
+ hit_codes = ['single',
22
+ 'double','home_run', 'triple']
23
+
24
+ ab_codes = ['single', 'strikeout', 'field_out',
25
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
26
+ 'double', 'field_error', 'home_run', 'triple',
27
+ 'double_play',
28
+ 'fielders_choice_out', 'strikeout_double_play',
29
+ 'other_out','triple_play']
30
+
31
+
32
+ obp_true_codes = ['single', 'walk',
33
+ 'double','home_run', 'triple',
34
+ 'hit_by_pitch', 'intent_walk']
35
+
36
+ obp_codes = ['single', 'strikeout', 'walk', 'field_out',
37
+ 'grounded_into_double_play', 'fielders_choice', 'force_out',
38
+ 'double', 'sac_fly', 'field_error', 'home_run', 'triple',
39
+ 'hit_by_pitch', 'double_play', 'intent_walk',
40
+ 'fielders_choice_out', 'strikeout_double_play',
41
+ 'sac_fly_double_play',
42
+ 'other_out','triple_play']
43
+
44
+
45
+ contact_codes = ['In play, no out',
46
+ 'Foul', 'In play, out(s)',
47
+ 'In play, run(s)',
48
+ 'Foul Bunt']
49
+
50
+ bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
51
+
52
+
53
+ conditions_barrel = [
54
+ df['launch_speed'].is_null(),
55
+ (df['launch_speed'] * 1.5 - df['launch_angle'] >= 117) &
56
+ (df['launch_speed'] + df['launch_angle'] >= 124) &
57
+ (df['launch_speed'] >= 98) &
58
+ (df['launch_angle'] >= 4) & (df['launch_angle'] <= 50)
59
+ ]
60
+ choices_barrel = [False, True]
61
+
62
+ conditions_tb = [
63
+ (df['event_type'] == 'single'),
64
+ (df['event_type'] == 'double'),
65
+ (df['event_type'] == 'triple'),
66
+ (df['event_type'] == 'home_run')
67
+ ]
68
+ choices_tb = [1, 2, 3, 4]
69
+
70
+
71
+ conditions_woba = [
72
+ df['event_type'].is_in(['strikeout', 'field_out', 'sac_fly', 'force_out', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']),
73
+ df['event_type'] == 'walk',
74
+ df['event_type'] == 'hit_by_pitch',
75
+ df['event_type'] == 'single',
76
+ df['event_type'] == 'double',
77
+ df['event_type'] == 'triple',
78
+ df['event_type'] == 'home_run'
79
+ ]
80
+ choices_woba = [0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
81
+
82
+ woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', 'double', 'sac_fly', 'force_out', 'home_run', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'triple', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']
83
+
84
+ pitch_cat = {'FA': 'Fastball',
85
+ 'FF': 'Fastball',
86
+ 'FT': 'Fastball',
87
+ 'FC': 'Fastball',
88
+ 'FS': 'Off-Speed',
89
+ 'FO': 'Off-Speed',
90
+ 'SI': 'Fastball',
91
+ 'ST': 'Breaking',
92
+ 'SL': 'Breaking',
93
+ 'CU': 'Breaking',
94
+ 'KC': 'Breaking',
95
+ 'SC': 'Off-Speed',
96
+ 'GY': 'Off-Speed',
97
+ 'SV': 'Breaking',
98
+ 'CS': 'Breaking',
99
+ 'CH': 'Off-Speed',
100
+ 'KN': 'Off-Speed',
101
+ 'EP': 'Breaking',
102
+ 'UN': None,
103
+ 'IN': None,
104
+ 'PO': None,
105
+ 'AB': None,
106
+ 'AS': None,
107
+ 'NP': None}
108
+
109
+
110
+ df = df.with_columns([
111
+ pl.when(df['type_ab'].is_not_null()).then(1).otherwise(0).alias('pa'),
112
+ pl.when(df['is_pitch']).then(1).otherwise(0).alias('pitches'),
113
+ pl.when(df['sz_top'] == 0).then(None).otherwise(df['sz_top']).alias('sz_top'),
114
+ pl.when(df['sz_bot'] == 0).then(None).otherwise(df['sz_bot']).alias('sz_bot'),
115
+ pl.when(df['zone'] > 0).then(df['zone'] < 10).otherwise(None).alias('in_zone'),
116
+ pl.Series(px_model.predict(df[['x']].fill_null(0).to_numpy())[:, 0]).alias('px_predict'),
117
+ pl.Series(pz_model.predict(df[['y']].fill_null(0).to_numpy())[:, 0] + 3.2).alias('pz_predict'),
118
+ pl.Series(in_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('in_zone_predict'),
119
+ pl.Series(attack_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('attack_zone_predict'),
120
+ pl.when(df['event_type'].is_in(hit_codes)).then(True).otherwise(False).alias('hits'),
121
+ pl.when(df['event_type'].is_in(ab_codes)).then(True).otherwise(False).alias('ab'),
122
+ pl.when(df['event_type'].is_in(obp_true_codes)).then(True).otherwise(False).alias('on_base'),
123
+ pl.when(df['event_type'].is_in(obp_codes)).then(True).otherwise(False).alias('obp'),
124
+ pl.when(df['play_description'].is_in(bip_codes)).then(True).otherwise(False).alias('bip'),
125
+ pl.when(conditions_barrel[0]).then(choices_barrel[0]).when(conditions_barrel[1]).then(choices_barrel[1]).otherwise(None).alias('barrel'),
126
+ pl.when(df['launch_angle'].is_null()).then(False).when((df['launch_angle'] >= 8) & (df['launch_angle'] <= 32)).then(True).otherwise(None).alias('sweet_spot'),
127
+ pl.when(df['launch_speed'].is_null()).then(False).when(df['launch_speed'] >= 94.5).then(True).otherwise(None).alias('hard_hit'),
128
+ pl.when(conditions_tb[0]).then(choices_tb[0]).when(conditions_tb[1]).then(choices_tb[1]).when(conditions_tb[2]).then(choices_tb[2]).when(conditions_tb[3]).then(choices_tb[3]).otherwise(None).alias('tb'),
129
+ pl.when(conditions_woba[0]).then(choices_woba[0]).when(conditions_woba[1]).then(choices_woba[1]).when(conditions_woba[2]).then(choices_woba[2]).when(conditions_woba[3]).then(choices_woba[3]).when(conditions_woba[4]).then(choices_woba[4]).when(conditions_woba[5]).then(choices_woba[5]).when(conditions_woba[6]).then(choices_woba[6]).otherwise(None).alias('woba'),
130
+ pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T')).then(1).otherwise(0).alias('whiffs'),
131
+ pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T') | (df['play_code'] == 'C')).then(1).otherwise(0).alias('csw'),
132
+ pl.when(pl.col('is_swing').cast(pl.Boolean)).then(1).otherwise(0).alias('swings'),
133
+ pl.col('event_type').is_in(['strikeout','strikeout_double_play']).alias('k'),
134
+ pl.col('event_type').is_in(['walk', 'intent_walk']).alias('bb'),
135
+ pl.lit(None).alias('attack_zone'),
136
+ pl.lit(None).alias('woba_pred'),
137
+ pl.lit(None).alias('woba_pred_contact')
138
+
139
+ ])
140
+
141
+ df = df.with_columns([
142
+ pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('woba_codes'),
143
+ pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('xwoba_codes'),
144
+ pl.when((pl.col('tb') >= 0)).then(df['woba']).otherwise(None).alias('woba_contact'),
145
+ pl.when(pl.col('px').is_null()).then(pl.col('px_predict')).otherwise(pl.col('px')).alias('px'),
146
+ pl.when(pl.col('pz').is_null()).then(pl.col('pz_predict')).otherwise(pl.col('pz')).alias('pz'),
147
+ pl.when(pl.col('in_zone').is_null()).then(pl.col('in_zone_predict')).otherwise(pl.col('in_zone')).alias('in_zone'),
148
+ pl.when(df['launch_speed'].is_null()).then(None).otherwise(df['barrel']).alias('barrel'),
149
+ pl.lit('average').alias('average'),
150
+ pl.when(pl.col('in_zone') == False).then(True).otherwise(False).alias('out_zone'),
151
+ pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('zone_swing'),
152
+ pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('zone_contact'),
153
+ pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('ozone_swing'),
154
+ pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('ozone_contact'),
155
+ pl.when(pl.col('event_type').str.contains('strikeout')).then(True).otherwise(False).alias('k'),
156
+ pl.when(pl.col('event_type').is_in(['walk', 'intent_walk'])).then(True).otherwise(False).alias('bb'),
157
+ pl.when(pl.col('attack_zone').is_null()).then(pl.col('attack_zone_predict')).otherwise(pl.col('attack_zone')).alias('attack_zone'),
158
+
159
+
160
+ ])
161
+
162
+ df = df.with_columns([
163
+ (df['k'].cast(pl.Float32) - df['bb'].cast(pl.Float32)).alias('k_minus_bb'),
164
+ (df['bb'].cast(pl.Float32) - df['k'].cast(pl.Float32)).alias('bb_minus_k'),
165
+ (df['launch_speed'] > 0).alias('bip_div'),
166
+ (df['attack_zone'] == 0).alias('heart'),
167
+ (df['attack_zone'] == 1).alias('shadow'),
168
+ (df['attack_zone'] == 2).alias('chase'),
169
+ (df['attack_zone'] == 3).alias('waste'),
170
+ ((df['attack_zone'] == 0) & (df['swings'] == 1)).alias('heart_swing'),
171
+ ((df['attack_zone'] == 1) & (df['swings'] == 1)).alias('shadow_swing'),
172
+ ((df['attack_zone'] == 2) & (df['swings'] == 1)).alias('chase_swing'),
173
+ ((df['attack_zone'] == 3) & (df['swings'] == 1)).alias('waste_swing'),
174
+ ((df['attack_zone'] == 0) & (df['whiffs'] == 1)).alias('heart_whiff'),
175
+ ((df['attack_zone'] == 1) & (df['whiffs'] == 1)).alias('shadow_whiff'),
176
+ ((df['attack_zone'] == 2) & (df['whiffs'] == 1)).alias('chase_whiff'),
177
+ ((df['attack_zone'] == 3) & (df['whiffs'] == 1)).alias('waste_whiff')
178
+ ])
179
+
180
+
181
+ [0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
182
+
183
+ df = df.with_columns([
184
+ pl.Series(
185
+ [sum(x) for x in xwoba_model.predict_proba(df[['launch_angle', 'launch_speed']].fill_null(0).to_numpy()[:]) * ([0, 0.881, 1.254, 1.589, 2.048])]
186
+ ).alias('woba_pred_predict')
187
+ ])
188
+
189
+ df = df.with_columns([
190
+ pl.when(pl.col('event_type').is_in(['walk'])).then(0.689)
191
+ .when(pl.col('event_type').is_in(['hit_by_pitch'])).then(0.720)
192
+ .when(pl.col('event_type').is_in(['strikeout', 'strikeout_double_play'])).then(0)
193
+ .otherwise(pl.col('woba_pred_predict')).alias('woba_pred_predict')
194
+ ])
195
+
196
+ df = df.with_columns([
197
+ pl.when(pl.col('woba_codes').is_null()).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred'),
198
+ pl.when(pl.col('bip')!=1).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred_contact'),
199
+ ])
200
+
201
+ df = df.with_columns([
202
+ pl.when(pl.col('trajectory').is_in(['bunt_popup'])).then(pl.lit('popup'))
203
+ .when(pl.col('trajectory').is_in(['bunt_grounder'])).then(pl.lit('ground_ball'))
204
+ .when(pl.col('trajectory').is_in(['bunt_line_drive'])).then(pl.lit('line_drive'))
205
+ .when(pl.col('trajectory').is_in([''])).then(pl.lit(None))
206
+ .otherwise(pl.col('trajectory')).alias('trajectory')
207
+ ])
208
+
209
+
210
+ # Create one-hot encoded columns for the trajectory column
211
+ dummy_df = df.select(pl.col('trajectory')).to_dummies()
212
+
213
+ # Rename the one-hot encoded columns
214
+ dummy_df = dummy_df.rename({
215
+ 'trajectory_fly_ball': 'trajectory_fly_ball',
216
+ 'trajectory_ground_ball': 'trajectory_ground_ball',
217
+ 'trajectory_line_drive': 'trajectory_line_drive',
218
+ 'trajectory_popup': 'trajectory_popup'
219
+ })
220
+
221
+ # Ensure the columns are present in the DataFrame
222
+ for col in ['trajectory_fly_ball', 'trajectory_ground_ball', 'trajectory_line_drive', 'trajectory_popup']:
223
+ if col not in dummy_df.columns:
224
+ dummy_df = dummy_df.with_columns(pl.lit(0).alias(col))
225
+
226
+ # Join the one-hot encoded columns back to the original DataFrame
227
+ df = df.hstack(dummy_df)
228
+
229
+ # Check if 'trajectory_null' column exists and drop it
230
+ if 'trajectory_null' in df.columns:
231
+ df = df.drop('trajectory_null')
232
+
233
+ return df
234
+
235
+ # Assuming df is your Polars DataFrame
236
+ def update_summary(self, df: pl.DataFrame, pitcher: bool = True) -> pl.DataFrame:
237
+ """
238
+ Update summary statistics for pitchers or batters.
239
+
240
+ Parameters:
241
+ df (pl.DataFrame): The input Polars DataFrame containing player statistics.
242
+ pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
243
+
244
+ Returns:
245
+ pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
246
+ """
247
+
248
+ # Determine the position based on the pitcher flag
249
+ if pitcher:
250
+ position = 'pitcher'
251
+ else:
252
+ position = 'batter'
253
+
254
+ # Group by position_id and position_name, then aggregate various statistics
255
+ df_summ = df.group_by([f'{position}_id', f'{position}_name']).agg([
256
+ pl.col('pa').sum().alias('pa'),
257
+ pl.col('ab').sum().alias('ab'),
258
+ pl.col('obp').sum().alias('obp_pa'),
259
+ pl.col('hits').sum().alias('hits'),
260
+ pl.col('on_base').sum().alias('on_base'),
261
+ pl.col('k').sum().alias('k'),
262
+ pl.col('bb').sum().alias('bb'),
263
+ pl.col('bb_minus_k').sum().alias('bb_minus_k'),
264
+ pl.col('csw').sum().alias('csw'),
265
+ pl.col('bip').sum().alias('bip'),
266
+ pl.col('bip_div').sum().alias('bip_div'),
267
+ pl.col('tb').sum().alias('tb'),
268
+ pl.col('woba').sum().alias('woba'),
269
+ pl.col('woba_contact').sum().alias('woba_contact'),
270
+ pl.col('woba_pred').sum().alias('xwoba'),
271
+ pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
272
+ pl.col('woba_codes').sum().alias('woba_codes'),
273
+ pl.col('xwoba_codes').sum().alias('xwoba_codes'),
274
+ pl.col('hard_hit').sum().alias('hard_hit'),
275
+ pl.col('barrel').sum().alias('barrel'),
276
+ pl.col('sweet_spot').sum().alias('sweet_spot'),
277
+ pl.col('launch_speed').max().alias('max_launch_speed'),
278
+ pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
279
+ pl.col('launch_speed').mean().alias('launch_speed'),
280
+ pl.col('launch_angle').mean().alias('launch_angle'),
281
+ pl.col('is_pitch').sum().alias('pitches'),
282
+ pl.col('swings').sum().alias('swings'),
283
+ pl.col('in_zone').sum().alias('in_zone'),
284
+ pl.col('out_zone').sum().alias('out_zone'),
285
+ pl.col('whiffs').sum().alias('whiffs'),
286
+ pl.col('zone_swing').sum().alias('zone_swing'),
287
+ pl.col('zone_contact').sum().alias('zone_contact'),
288
+ pl.col('ozone_swing').sum().alias('ozone_swing'),
289
+ pl.col('ozone_contact').sum().alias('ozone_contact'),
290
+ pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
291
+ pl.col('trajectory_line_drive').sum().alias('line_drive'),
292
+ pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
293
+ pl.col('trajectory_popup').sum().alias('pop_up'),
294
+ pl.col('attack_zone').count().alias('attack_zone'),
295
+ pl.col('heart').sum().alias('heart'),
296
+ pl.col('shadow').sum().alias('shadow'),
297
+ pl.col('chase').sum().alias('chase'),
298
+ pl.col('waste').sum().alias('waste'),
299
+ pl.col('heart_swing').sum().alias('heart_swing'),
300
+ pl.col('shadow_swing').sum().alias('shadow_swing'),
301
+ pl.col('chase_swing').sum().alias('chase_swing'),
302
+ pl.col('waste_swing').sum().alias('waste_swing'),
303
+ pl.col('heart_whiff').sum().alias('heart_whiff'),
304
+ pl.col('shadow_whiff').sum().alias('shadow_whiff'),
305
+ pl.col('chase_whiff').sum().alias('chase_whiff'),
306
+ pl.col('waste_whiff').sum().alias('waste_whiff')
307
+ ])
308
+
309
+ # Add calculated columns to the summary DataFrame
310
+ df_summ = df_summ.with_columns([
311
+ (pl.col('hits') / pl.col('ab')).alias('avg'),
312
+ (pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
313
+ (pl.col('tb') / pl.col('ab')).alias('slg'),
314
+ (pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
315
+ (pl.col('k') / pl.col('pa')).alias('k_percent'),
316
+ (pl.col('bb') / pl.col('pa')).alias('bb_percent'),
317
+ (pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
318
+ (pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
319
+ (pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
320
+ (pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
321
+ (pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
322
+ (pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
323
+ (pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
324
+ (pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
325
+ (pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
326
+ (pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
327
+ (pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
328
+ (pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
329
+ (pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
330
+ (pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
331
+ (pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
332
+ (pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
333
+ (pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
334
+ (pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
335
+ (pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
336
+ (pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
337
+ (pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
338
+ (pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
339
+ (pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
340
+ (pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
341
+ (pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
342
+ (pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
343
+ (pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
344
+ (pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
345
+ (pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
346
+ (pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
347
+ (pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
348
+ (pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
349
+ (pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
350
+ (pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact')
351
+ ])
352
+
353
+ return df_summ
354
+
355
+
356
+
357
+
358
+
359
+
360
+ # Assuming df is your Polars DataFrame
361
+ def update_summary_select(self, df: pl.DataFrame, selection: list) -> pl.DataFrame:
362
+ """
363
+ Update summary statistics for pitchers or batters.
364
+
365
+ Parameters:
366
+ df (pl.DataFrame): The input Polars DataFrame containing player statistics.
367
+ pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
368
+
369
+ Returns:
370
+ pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
371
+ """
372
+
373
+ # Group by position_id and position_name, then aggregate various statistics
374
+ df_summ = df.group_by(selection).agg([
375
+ pl.col('pa').sum().alias('pa'),
376
+ pl.col('ab').sum().alias('ab'),
377
+ pl.col('obp').sum().alias('obp_pa'),
378
+ pl.col('hits').sum().alias('hits'),
379
+ pl.col('on_base').sum().alias('on_base'),
380
+ pl.col('k').sum().alias('k'),
381
+ pl.col('bb').sum().alias('bb'),
382
+ pl.col('bb_minus_k').sum().alias('bb_minus_k'),
383
+ pl.col('csw').sum().alias('csw'),
384
+ pl.col('bip').sum().alias('bip'),
385
+ pl.col('bip_div').sum().alias('bip_div'),
386
+ pl.col('tb').sum().alias('tb'),
387
+ pl.col('woba').sum().alias('woba'),
388
+ pl.col('woba_contact').sum().alias('woba_contact'),
389
+ pl.col('woba_pred').sum().alias('xwoba'),
390
+ pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
391
+ pl.col('woba_codes').sum().alias('woba_codes'),
392
+ pl.col('xwoba_codes').sum().alias('xwoba_codes'),
393
+ pl.col('hard_hit').sum().alias('hard_hit'),
394
+ pl.col('barrel').sum().alias('barrel'),
395
+ pl.col('sweet_spot').sum().alias('sweet_spot'),
396
+ pl.col('launch_speed').max().alias('max_launch_speed'),
397
+ pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
398
+ pl.col('launch_speed').mean().alias('launch_speed'),
399
+ pl.col('launch_angle').mean().alias('launch_angle'),
400
+ pl.col('is_pitch').sum().alias('pitches'),
401
+ pl.col('swings').sum().alias('swings'),
402
+ pl.col('in_zone').sum().alias('in_zone'),
403
+ pl.col('out_zone').sum().alias('out_zone'),
404
+ pl.col('whiffs').sum().alias('whiffs'),
405
+ pl.col('zone_swing').sum().alias('zone_swing'),
406
+ pl.col('zone_contact').sum().alias('zone_contact'),
407
+ pl.col('ozone_swing').sum().alias('ozone_swing'),
408
+ pl.col('ozone_contact').sum().alias('ozone_contact'),
409
+ pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
410
+ pl.col('trajectory_line_drive').sum().alias('line_drive'),
411
+ pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
412
+ pl.col('trajectory_popup').sum().alias('pop_up'),
413
+ pl.col('attack_zone').count().alias('attack_zone'),
414
+ pl.col('heart').sum().alias('heart'),
415
+ pl.col('shadow').sum().alias('shadow'),
416
+ pl.col('chase').sum().alias('chase'),
417
+ pl.col('waste').sum().alias('waste'),
418
+ pl.col('heart_swing').sum().alias('heart_swing'),
419
+ pl.col('shadow_swing').sum().alias('shadow_swing'),
420
+ pl.col('chase_swing').sum().alias('chase_swing'),
421
+ pl.col('waste_swing').sum().alias('waste_swing'),
422
+ pl.col('heart_whiff').sum().alias('heart_whiff'),
423
+ pl.col('shadow_whiff').sum().alias('shadow_whiff'),
424
+ pl.col('chase_whiff').sum().alias('chase_whiff'),
425
+ pl.col('waste_whiff').sum().alias('waste_whiff')
426
+ ])
427
+
428
+ # Add calculated columns to the summary DataFrame
429
+ df_summ = df_summ.with_columns([
430
+ (pl.col('hits') / pl.col('ab')).alias('avg'),
431
+ (pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
432
+ (pl.col('tb') / pl.col('ab')).alias('slg'),
433
+ (pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
434
+ (pl.col('k') / pl.col('pa')).alias('k_percent'),
435
+ (pl.col('bb') / pl.col('pa')).alias('bb_percent'),
436
+ (pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
437
+ (pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
438
+ (pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
439
+ (pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
440
+ (pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
441
+ (pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
442
+ (pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
443
+ (pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
444
+ (pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
445
+ (pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
446
+ (pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
447
+ (pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
448
+ (pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
449
+ (pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
450
+ (pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
451
+ (pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
452
+ (pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
453
+ (pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
454
+ (pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
455
+ (pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
456
+ (pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
457
+ (pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
458
+ (pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
459
+ (pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
460
+ (pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
461
+ (pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
462
+ (pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
463
+ (pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
464
+ (pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
465
+ (pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
466
+ (pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
467
+ (pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
468
+ (pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
469
+ (pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact')
470
+ ])
471
+
472
+ return df_summ
functions/pitch_summary_functions.py ADDED
@@ -0,0 +1,1029 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import json
4
+ from matplotlib.ticker import FuncFormatter
5
+ from matplotlib.ticker import MaxNLocator
6
+ import math
7
+ from matplotlib.patches import Ellipse
8
+ import matplotlib.transforms as transforms
9
+ import matplotlib.colors
10
+ import matplotlib.colors as mcolors
11
+ import seaborn as sns
12
+ import matplotlib.pyplot as plt
13
+ import requests
14
+ import polars as pl
15
+ from PIL import Image
16
+ import requests
17
+ from io import BytesIO
18
+ from matplotlib.offsetbox import OffsetImage, AnnotationBbox
19
+ import matplotlib.pyplot as plt
20
+ import matplotlib.gridspec as gridspec
21
+ import PIL
22
+
23
+
24
+ ### PITCH COLOURS ###
25
+
26
+ # Dictionary to map pitch types to their corresponding colors and names
27
+ pitch_colours = {
28
+ ## Fastballs ##
29
+ 'FF': {'colour': '#FF007D', 'name': '4-Seam Fastball'},
30
+ 'FA': {'colour': '#FF007D', 'name': 'Fastball'},
31
+ 'SI': {'colour': '#98165D', 'name': 'Sinker'},
32
+ 'FC': {'colour': '#BE5FA0', 'name': 'Cutter'},
33
+
34
+ ## Offspeed ##
35
+ 'CH': {'colour': '#F79E70', 'name': 'Changeup'},
36
+ 'FS': {'colour': '#FE6100', 'name': 'Splitter'},
37
+ 'SC': {'colour': '#F08223', 'name': 'Screwball'},
38
+ 'FO': {'colour': '#FFB000', 'name': 'Forkball'},
39
+
40
+ ## Sliders ##
41
+ 'SL': {'colour': '#67E18D', 'name': 'Slider'},
42
+ 'ST': {'colour': '#1BB999', 'name': 'Sweeper'},
43
+ 'SV': {'colour': '#376748', 'name': 'Slurve'},
44
+
45
+ ## Curveballs ##
46
+ 'KC': {'colour': '#311D8B', 'name': 'Knuckle Curve'},
47
+ 'CU': {'colour': '#3025CE', 'name': 'Curveball'},
48
+ 'CS': {'colour': '#274BFC', 'name': 'Slow Curve'},
49
+ 'EP': {'colour': '#648FFF', 'name': 'Eephus'},
50
+
51
+ ## Others ##
52
+ 'KN': {'colour': '#867A08', 'name': 'Knuckleball'},
53
+ 'PO': {'colour': '#472C30', 'name': 'Pitch Out'},
54
+ 'UN': {'colour': '#9C8975', 'name': 'Unknown'},
55
+ }
56
+
57
+ # Create dictionaries for pitch types and their attributes
58
+ dict_colour = {key: value['colour'] for key, value in pitch_colours.items()}
59
+ dict_pitch = {key: value['name'] for key, value in pitch_colours.items()}
60
+ dict_pitch_desc_type = {value['name']: key for key, value in pitch_colours.items()}
61
+ dict_pitch_desc_type.update({'Four-Seam Fastball':'FF'})
62
+ dict_pitch_desc_type.update({'All':'All'})
63
+ dict_pitch_name = {value['name']: value['colour'] for key, value in pitch_colours.items()}
64
+ dict_pitch_name.update({'Four-Seam Fastball':'#FF007D'})
65
+
66
+ font_properties = {'family': 'calibi', 'size': 12}
67
+ font_properties_titles = {'family': 'calibi', 'size': 20}
68
+ font_properties_axes = {'family': 'calibi', 'size': 16}
69
+
70
+ cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
71
+
72
+ ### FANGRAPHS STATS DICT ###
73
+ fangraphs_stats_dict = {'IP':{'table_header':'$\\bf{IP}$','format':'.1f',} ,
74
+ 'TBF':{'table_header':'$\\bf{PA}$','format':'.0f',} ,
75
+ 'AVG':{'table_header':'$\\bf{AVG}$','format':'.3f',} ,
76
+ 'K/9':{'table_header':'$\\bf{K\/9}$','format':'.2f',} ,
77
+ 'BB/9':{'table_header':'$\\bf{BB\/9}$','format':'.2f',} ,
78
+ 'K/BB':{'table_header':'$\\bf{K\/BB}$','format':'.2f',} ,
79
+ 'HR/9':{'table_header':'$\\bf{HR\/9}$','format':'.2f',} ,
80
+ 'K%':{'table_header':'$\\bf{K\%}$','format':'.1%',} ,
81
+ 'BB%':{'table_header':'$\\bf{BB\%}$','format':'.1%',} ,
82
+ 'K-BB%':{'table_header':'$\\bf{K-BB\%}$','format':'.1%',} ,
83
+ 'WHIP':{'table_header':'$\\bf{WHIP}$','format':'.2f',} ,
84
+ 'BABIP':{'table_header':'$\\bf{BABIP}$','format':'.3f',} ,
85
+ 'LOB%':{'table_header':'$\\bf{LOB\%}$','format':'.1%',} ,
86
+ 'xFIP':{'table_header':'$\\bf{xFIP}$','format':'.2f',} ,
87
+ 'FIP':{'table_header':'$\\bf{FIP}$','format':'.2f',} ,
88
+ 'H':{'table_header':'$\\bf{H}$','format':'.0f',} ,
89
+ '2B':{'table_header':'$\\bf{2B}$','format':'.0f',} ,
90
+ '3B':{'table_header':'$\\bf{3B}$','format':'.0f',} ,
91
+ 'R':{'table_header':'$\\bf{R}$','format':'.0f',} ,
92
+ 'ER':{'table_header':'$\\bf{ER}$','format':'.0f',} ,
93
+ 'HR':{'table_header':'$\\bf{HR}$','format':'.0f',} ,
94
+ 'BB':{'table_header':'$\\bf{BB}$','format':'.0f',} ,
95
+ 'IBB':{'table_header':'$\\bf{IBB}$','format':'.0f',} ,
96
+ 'HBP':{'table_header':'$\\bf{HBP}$','format':'.0f',} ,
97
+ 'SO':{'table_header':'$\\bf{SO}$','format':'.0f',} ,
98
+ 'OBP':{'table_header':'$\\bf{OBP}$','format':'.0f',} ,
99
+ 'SLG':{'table_header':'$\\bf{SLG}$','format':'.0f',} ,
100
+ 'ERA':{'table_header':'$\\bf{ERA}$','format':'.2f',} ,
101
+ 'wOBA':{'table_header':'$\\bf{wOBA}$','format':'.3f',} ,
102
+ 'G':{'table_header':'$\\bf{G}$','format':'.0f',},
103
+ 'strikePercentage':{'table_header':'$\\bf{Strike\%}$','format':'.1%'} }
104
+
105
+ colour_palette = ['#FFB000','#648FFF','#785EF0',
106
+ '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
107
+
108
+ ### GET COLOURS ###
109
+ def get_color(value, normalize, cmap_sum):
110
+ """
111
+ Get the color corresponding to a value based on a colormap and normalization.
112
+
113
+ Parameters
114
+ ----------
115
+ value : float
116
+ The value to be mapped to a color.
117
+ normalize : matplotlib.colors.Normalize
118
+ The normalization function to scale the value.
119
+ cmap_sum : matplotlib.colors.Colormap
120
+ The colormap to use for mapping the value to a color.
121
+
122
+ Returns
123
+ -------
124
+ str
125
+ The hexadecimal color code corresponding to the value.
126
+ """
127
+ color = cmap_sum(normalize(value))
128
+ return mcolors.to_hex(color)
129
+
130
+ ### PITCH ELLIPSE ###
131
+ def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
132
+ """
133
+ Create a plot of the covariance confidence ellipse of *x* and *y*.
134
+
135
+ Parameters
136
+ ----------
137
+ x, y : array-like, shape (n, )
138
+ Input data.
139
+
140
+ ax : matplotlib.axes.Axes
141
+ The axes object to draw the ellipse into.
142
+
143
+ n_std : float
144
+ The number of standard deviations to determine the ellipse's radiuses.
145
+
146
+ **kwargs
147
+ Forwarded to `~matplotlib.patches.Ellipse`
148
+
149
+ Returns
150
+ -------
151
+ matplotlib.patches.Ellipse
152
+ """
153
+
154
+ if len(x) != len(y):
155
+ raise ValueError("x and y must be the same size")
156
+ try:
157
+ cov = np.cov(x, y)
158
+ pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
159
+ # Using a special case to obtain the eigenvalues of this
160
+ # two-dimensional dataset.
161
+ ell_radius_x = np.sqrt(1 + pearson)
162
+ ell_radius_y = np.sqrt(1 - pearson)
163
+ ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
164
+ facecolor=facecolor,linewidth=2,linestyle='--', **kwargs)
165
+
166
+
167
+ # Calculating the standard deviation of x from
168
+ # the squareroot of the variance and multiplying
169
+ # with the given number of standard deviations.
170
+ scale_x = np.sqrt(cov[0, 0]) * n_std
171
+ mean_x = x.mean()
172
+
173
+
174
+ # calculating the standard deviation of y ...
175
+ scale_y = np.sqrt(cov[1, 1]) * n_std
176
+ mean_y = y.mean()
177
+
178
+
179
+ transf = transforms.Affine2D() \
180
+ .rotate_deg(45) \
181
+ .scale(scale_x, scale_y) \
182
+ .translate(mean_x, mean_y)
183
+
184
+
185
+
186
+ ellipse.set_transform(transf + ax.transData)
187
+ except ValueError:
188
+ return
189
+
190
+ return ax.add_patch(ellipse)
191
+ ### VELOCITY KDES ###
192
+ def velocity_kdes(df: pl.DataFrame,
193
+ ax: plt.Axes,
194
+ gs: gridspec.GridSpec,
195
+ gs_x: list,
196
+ gs_y: list,
197
+ fig: plt.Figure):
198
+ """
199
+ Plot the velocity KDEs for different pitch types.
200
+
201
+ Parameters
202
+ ----------
203
+ df : pl.DataFrame
204
+ The DataFrame containing pitch data.
205
+ ax : plt.Axes
206
+ The axis to plot on.
207
+ gs : GridSpec
208
+ The GridSpec for the subplot layout.
209
+ gs_x : list
210
+ The x-coordinates for the GridSpec.
211
+ gs_y : list
212
+ The y-coordinates for the GridSpec.
213
+ fig : plt.Figure
214
+ The figure to plot on.
215
+ """
216
+ # Join the original DataFrame on 'pitch_type' with sorted counts to reorder
217
+ items_in_order = (df
218
+ .sort("pitch_count", descending=True)['pitch_type']
219
+ .unique(maintain_order=True)
220
+ .to_numpy()
221
+ )
222
+
223
+ # Create the inner subplot inside the outer subplot
224
+ import matplotlib.gridspec as gridspec
225
+ ax.axis('off')
226
+ ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20})
227
+
228
+ inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order), 1, subplot_spec=gs[gs_x[0]:gs_x[-1], gs_y[0]:gs_y[-1]])
229
+ ax_top = [fig.add_subplot(inner) for inner in inner_grid_1]
230
+
231
+ for idx, i in enumerate(items_in_order):
232
+ pitch_data = df.filter(pl.col('pitch_type') == i)['start_speed']
233
+ if np.unique(pitch_data).size == 1: # Check if all values are the same
234
+ ax_top[idx].plot([np.unique(pitch_data), np.unique(pitch_data)], [0, 1], linewidth=4,
235
+ color=dict_colour[i], zorder=20)
236
+ else:
237
+ sns.kdeplot(pitch_data, ax=ax_top[idx], fill=True,
238
+ clip=(pitch_data.min(), pitch_data.max()),
239
+ color=dict_colour[i])
240
+
241
+ # Plot the mean release speed for the current data
242
+ df_average = df.filter(df['pitch_type'] == i)['start_speed']
243
+ ax_top[idx].plot([df_average.mean(), df_average.mean()],
244
+ [ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]],
245
+ color=dict_colour[i],
246
+ linestyle='--')
247
+ df_statcast_group = pl.read_csv('functions/statcast_2024_grouped.csv')
248
+
249
+ # Plot the mean release speed for the statcast group data
250
+ df_average = df_statcast_group.filter(df_statcast_group['pitch_type'] == i)['release_speed']
251
+ ax_top[idx].plot([df_average.mean(), df_average.mean()],
252
+ [ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]],
253
+ color=dict_colour[i],
254
+ linestyle=':')
255
+
256
+
257
+ ax_top[idx].set_xlim(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5)
258
+ ax_top[idx].set_xlabel('')
259
+ ax_top[idx].set_ylabel('')
260
+ if idx < len(items_in_order) - 1:
261
+ ax_top[idx].spines['top'].set_visible(False)
262
+ ax_top[idx].spines['right'].set_visible(False)
263
+ ax_top[idx].spines['left'].set_visible(False)
264
+ ax_top[idx].tick_params(axis='x', colors='none')
265
+
266
+ ax_top[idx].set_xticks(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5))
267
+ ax_top[idx].set_yticks([])
268
+ ax_top[idx].grid(axis='x', linestyle='--')
269
+ ax_top[idx].text(-0.01, 0.5, i, transform=ax_top[idx].transAxes,
270
+ fontsize=14, va='center', ha='right')
271
+
272
+ ax_top[-1].spines['top'].set_visible(False)
273
+ ax_top[-1].spines['right'].set_visible(False)
274
+ ax_top[-1].spines['left'].set_visible(False)
275
+ ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5)))
276
+ ax_top[-1].set_xlabel('Velocity (mph)')
277
+
278
+
279
+ ### TJ STUFF+ ROLLING ###
280
+ def tj_stuff_roling(df: pl.DataFrame, window: int, ax: plt.Axes):
281
+ """
282
+ Plot the rolling average of tjStuff+ for different pitch types.
283
+
284
+ Parameters
285
+ ----------
286
+ df : pl.DataFrame
287
+ The DataFrame containing pitch data.
288
+ window : int
289
+ The window size for calculating the rolling average.
290
+ ax : plt.Axes
291
+ The axis to plot on.
292
+ """
293
+ # Join the original DataFrame on 'pitch_type' with sorted counts to reorder
294
+ items_in_order = (
295
+ df.sort("pitch_count", descending=True)['pitch_type']
296
+ .unique(maintain_order=True)
297
+ .to_numpy()
298
+ )
299
+
300
+ # Plot the rolling average for each pitch type
301
+ for i in items_in_order:
302
+ if max(df.filter(pl.col('pitch_type') == i)['pitch_count']) >= window:
303
+ print('LENGTH',
304
+ len(range(1, max(df.filter(pl.col('pitch_type') == i)['pitch_count']) + 1)),
305
+ len(df.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window)))
306
+ sns.lineplot(
307
+ x=range(1, max(df.filter(pl.col('pitch_type') == i)['pitch_count']) + 1),
308
+ y=df.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window),
309
+ color=dict_colour[i],
310
+ ax=ax,
311
+ linewidth=3
312
+ )
313
+
314
+ # Adjust x-axis limits to start from 1
315
+ ax.set_xlim(window, max(df['pitch_count']))
316
+ ax.set_ylim(70, 130)
317
+ ax.set_xlabel('Pitches', fontdict=font_properties_axes)
318
+ ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
319
+ ax.set_title(f"{window} Pitch Rolling tjStuff+", fontdict=font_properties_titles)
320
+ ax.xaxis.set_major_locator(MaxNLocator(integer=True))
321
+
322
+
323
+ ### TJ STUFF+ ROLLING ###
324
+ def tj_stuff_roling_game(df: pl.DataFrame, window: int, ax: plt.Axes):
325
+ """
326
+ Plot the rolling average of tjStuff+ for different pitch types over games.
327
+
328
+ Parameters
329
+ ----------
330
+ df : pl.DataFrame
331
+ The DataFrame containing pitch data.
332
+ window : int
333
+ The window size for calculating the rolling average.
334
+ ax : plt.Axes
335
+ The axis to plot on.
336
+ """
337
+ # Map game_id to sequential numbers
338
+ date_to_number = {date: i + 1 for i, date in enumerate(df['game_id'].unique(maintain_order=True))}
339
+
340
+ # Add a column with the sequential game numbers
341
+ df_plot = df.with_columns(
342
+ pl.col("game_id").map_elements(lambda x: date_to_number.get(x, x)).alias("start_number")
343
+ )
344
+
345
+ # Group by relevant columns and calculate mean tj_stuff_plus
346
+ plot_game_roll = df_plot.group_by(['start_number', 'game_id', 'game_date', 'pitch_type', 'pitch_description']).agg(
347
+ pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus')
348
+ ).sort('start_number', descending=False)
349
+
350
+ # Get the list of pitch types ordered by frequency
351
+ sorted_value_counts = df['pitch_type'].value_counts().sort('count', descending=True)
352
+ items_in_order = sorted_value_counts['pitch_type'].to_list()
353
+
354
+ # Plot the rolling average for each pitch type
355
+ for i in items_in_order:
356
+ df_item = plot_game_roll.filter(pl.col('pitch_type') == i)
357
+ df_item = df_item.with_columns(
358
+ pl.col("start_number").cast(pl.Int64)
359
+ ).join(
360
+ pl.DataFrame({"start_number": list(date_to_number.values())}),
361
+ on="start_number",
362
+ how="outer"
363
+ ).sort("start_number_right").with_columns([
364
+ pl.col("start_number").fill_null(strategy="forward").fill_null(strategy="backward"),
365
+ pl.col("tj_stuff_plus").fill_null(strategy="forward").fill_null(strategy="backward"),
366
+ pl.col("pitch_type").fill_null(strategy="forward").fill_null(strategy="backward"),
367
+ pl.col("pitch_description").fill_null(strategy="forward").fill_null(strategy="backward")
368
+ ])
369
+
370
+ sns.lineplot(x=range(1, max(df_item['start_number_right']) + 1),
371
+ y=df_item.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window),
372
+ color=dict_colour[i],
373
+ ax=ax, linewidth=3)
374
+
375
+ # Highlight missing game data points
376
+ for n in range(len(df_item)):
377
+ if df_item['game_id'].is_null()[n]:
378
+ sns.scatterplot(x=[df_item['start_number_right'][n]],
379
+ y=[df_item['tj_stuff_plus'][n]],
380
+ color='white',
381
+ ec='black',
382
+ ax=ax,
383
+ zorder=100)
384
+
385
+ # Adjust x-axis limits to start from 1
386
+ ax.set_xlim(window, max(df_item['start_number']))
387
+ ax.set_ylim(70, 130)
388
+ ax.set_xlabel('Games', fontdict=font_properties_axes)
389
+ ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
390
+ ax.set_title(f"{window} Game Rolling tjStuff+", fontdict=font_properties_titles)
391
+ ax.xaxis.set_major_locator(MaxNLocator(integer=True))
392
+
393
+
394
+ def break_plot(df: pl.DataFrame, ax: plt.Axes):
395
+ """
396
+ Plot the pitch breaks for different pitch types.
397
+
398
+ Parameters
399
+ ----------
400
+ df : pl.DataFrame
401
+ The DataFrame containing pitch data.
402
+ ax : plt.Axes
403
+ The axis to plot on.
404
+ """
405
+ # Get unique pitch types sorted by pitch count
406
+ label_labels = df.sort(by=['pitch_count', 'pitch_type'], descending=[False, True])['pitch_type'].unique(maintain_order=True).to_numpy()
407
+
408
+ # Plot confidence ellipses for each pitch type
409
+ for idx, label in enumerate(label_labels):
410
+ subset = df.filter(pl.col('pitch_type') == label)
411
+ if len(subset) > 4:
412
+ try:
413
+ confidence_ellipse(subset['hb'], subset['ivb'], ax=ax, edgecolor=dict_colour[label], n_std=2, facecolor=dict_colour[label], alpha=0.2)
414
+ except ValueError:
415
+ return
416
+
417
+ # Plot scatter plot for pitch breaks
418
+ if df['pitcher_hand'][0] == 'R':
419
+ sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'] * 1, hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
420
+ if df['pitcher_hand'][0] == 'L':
421
+ sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'] * 1, hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
422
+
423
+ # Set axis limits
424
+ ax.set_xlim((-25, 25))
425
+ ax.set_ylim((-25, 25))
426
+
427
+ # Add horizontal and vertical lines
428
+ ax.hlines(y=0, xmin=-50, xmax=50, color=colour_palette[8], alpha=0.5, linestyles='--', zorder=1)
429
+ ax.vlines(x=0, ymin=-50, ymax=50, color=colour_palette[8], alpha=0.5, linestyles='--', zorder=1)
430
+
431
+ # Set axis labels and title
432
+ ax.set_xlabel('Horizontal Break (in)', fontdict=font_properties_axes)
433
+ ax.set_ylabel('Induced Vertical Break (in)', fontdict=font_properties_axes)
434
+ ax.set_title("Pitch Breaks", fontdict=font_properties_titles)
435
+
436
+ # Remove legend
437
+ ax.get_legend().remove()
438
+
439
+ # Set tick labels
440
+ ax.set_xticklabels(ax.get_xticks(), fontdict=font_properties)
441
+ ax.set_yticklabels(ax.get_yticks(), fontdict=font_properties)
442
+
443
+ # Add text annotations for glove side and arm side
444
+ if df['pitcher_hand'][0] == 'R':
445
+ ax.text(-24.5, -24.5, s='← Glove Side', fontstyle='italic', ha='left', va='bottom',
446
+ bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
447
+ ax.text(24.5, -24.5, s='Arm Side →', fontstyle='italic', ha='right', va='bottom',
448
+ bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
449
+ if df['pitcher_hand'][0] == 'L':
450
+ ax.invert_xaxis()
451
+ ax.text(24.5, -24.5, s='← Arm Side', fontstyle='italic', ha='left', va='bottom',
452
+ bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
453
+ ax.text(-24.5, -24.5, s='Glove Side →', fontstyle='italic', ha='right', va='bottom',
454
+ bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
455
+
456
+ # Set aspect ratio and format axis ticks
457
+ ax.set_aspect('equal', adjustable='box')
458
+ ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
459
+ ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
460
+
461
+ # DEFINE STRIKE ZONE
462
+ strike_zone = pl.DataFrame({
463
+ 'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9],
464
+ 'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5]
465
+ })
466
+
467
+ ### STRIKE ZONE ###
468
+ def draw_line(axis, alpha_spot=1, catcher_p=True):
469
+ """
470
+ Draw the strike zone and home plate on the given axis.
471
+
472
+ Parameters
473
+ ----------
474
+ axis : matplotlib.axes.Axes
475
+ The axis to draw the strike zone on.
476
+ alpha_spot : float, optional
477
+ The transparency level of the lines (default is 1).
478
+ catcher_p : bool, optional
479
+ Whether to draw the catcher's perspective (default is True).
480
+ """
481
+ # Draw the strike zone
482
+ axis.plot(strike_zone['PlateLocSide'].to_list(), strike_zone['PlateLocHeight'].to_list(),
483
+ color='black', linewidth=1.3, zorder=3, alpha=alpha_spot)
484
+
485
+ if catcher_p:
486
+ # Draw home plate from catcher's perspective
487
+ axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
488
+ axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
489
+ axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
490
+ axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
491
+ axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
492
+ else:
493
+ # Draw home plate from pitcher's perspective
494
+ axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
495
+ axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
496
+ axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
497
+ axis.plot([0, 0.9], [-0.35, -0.1], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
498
+ axis.plot([0.9, 0.708], [-0.1, 0.4], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
499
+
500
+ def location_plot(df: pl.DataFrame, ax: plt.Axes, hand: str):
501
+ """
502
+ Plot the pitch locations for different pitch types against a specific batter hand.
503
+
504
+ Parameters
505
+ ----------
506
+ df : pl.DataFrame
507
+ The DataFrame containing pitch data.
508
+ ax : plt.Axes
509
+ The axis to plot on.
510
+ hand : str
511
+ The batter hand ('L' for left-handed, 'R' for right-handed).
512
+ """
513
+ # Get unique pitch types sorted by pitch count
514
+ label_labels = df.sort(by=['pitch_count', 'pitch_type'], descending=[False, True])['pitch_type'].unique(maintain_order=True).to_numpy()
515
+
516
+ # Plot confidence ellipses for each pitch type
517
+ for label in label_labels:
518
+ subset = df.filter((pl.col('pitch_type') == label) & (pl.col('batter_hand') == hand))
519
+ if len(subset) >= 5:
520
+ confidence_ellipse(subset['px'], subset['pz'], ax=ax, edgecolor=dict_colour[label], n_std=1.5, facecolor=dict_colour[label], alpha=0.3)
521
+
522
+ # Group pitch locations by pitch type and calculate mean values
523
+ pitch_location_group = (
524
+ df.filter(pl.col("batter_hand") == hand)
525
+ .group_by("pitch_type")
526
+ .agg([
527
+ pl.col("start_speed").count().alias("pitches"),
528
+ pl.col("px").mean().alias("px"),
529
+ pl.col("pz").mean().alias("pz")
530
+ ])
531
+ )
532
+
533
+ # Calculate pitch percentages
534
+ total_pitches = pitch_location_group['pitches'].sum()
535
+ pitch_location_group = pitch_location_group.with_columns(
536
+ (pl.col("pitches") / total_pitches).alias("pitch_percent")
537
+ )
538
+
539
+ # Plot pitch locations
540
+ sns.scatterplot(ax=ax, x=pitch_location_group['px'], y=pitch_location_group['pz'],
541
+ hue=pitch_location_group['pitch_type'], palette=dict_colour, ec='black',
542
+ s=pitch_location_group['pitch_percent'] * 750, linewidth=2, zorder=2)
543
+
544
+ # Customize plot appearance
545
+ ax.axis('square')
546
+ draw_line(ax, alpha_spot=0.75, catcher_p=False)
547
+ ax.axis('off')
548
+ ax.set_xlim((-2.75, 2.75))
549
+ ax.set_ylim((-0.5, 5))
550
+ if len(pitch_location_group['px']) > 0:
551
+ ax.get_legend().remove()
552
+ ax.grid(False)
553
+ ax.set_title(f"Pitch Locations vs {hand}HB\n{pitch_location_group['pitches'].sum()} Pitches", fontdict=font_properties_titles)
554
+
555
+
556
+ def summary_table(df: pl.DataFrame, ax: plt.Axes):
557
+ """
558
+ Create a summary table of pitch data.
559
+
560
+ Parameters
561
+ ----------
562
+ df : pl.DataFrame
563
+ The DataFrame containing pitch data.
564
+ ax : plt.Axes
565
+ The axis to plot the table on.
566
+ """
567
+ # Aggregate pitch data by pitch description
568
+ df_agg = df.group_by("pitch_description").agg(
569
+ pl.col('is_pitch').sum().alias('count'),
570
+ (pl.col('is_pitch').sum() / df.select(pl.col('is_pitch').sum())).alias('count_percent'),
571
+ pl.col('start_speed').mean().alias('start_speed'),
572
+ pl.col('ivb').mean().alias('ivb'),
573
+ pl.col('hb').mean().alias('hb'),
574
+ pl.col('spin_rate').mean().alias('spin_rate'),
575
+ pl.col('vaa').mean().alias('vaa'),
576
+ pl.col('haa').mean().alias('haa'),
577
+ pl.col('z0').mean().alias('z0'),
578
+ pl.col('x0').mean().alias('x0'),
579
+ pl.col('extension').mean().alias('extension'),
580
+ (((pl.col('spin_direction').mean() + 180) % 360 // 30) +
581
+ (((pl.col('spin_direction').mean() + 180) % 360 % 30 / 30 / 100 * 60).round(2) * 10).round(0) // 1.5 / 4)
582
+ .cast(pl.Float64).map_elements(lambda x: f"{int(x)}:{int((x % 1) * 60):02d}", return_dtype=pl.Utf8).alias('clock_time'),
583
+ pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
584
+ pl.col('pitch_grade').mean().alias('pitch_grade'),
585
+ (pl.col('in_zone').sum() / pl.col('is_pitch').sum()).alias('zone_percent'),
586
+ (pl.col('ozone_swing').sum() / pl.col('out_zone').sum()).alias('chase_percent'),
587
+ (pl.col('whiffs').sum() / pl.col('swings').sum()).alias('whiff_percent'),
588
+ (pl.col('woba_pred_contact').sum() / pl.col('bip').sum()).alias('xwobacon')
589
+ ).sort("count", descending=True)
590
+
591
+ # Aggregate all pitch data
592
+ df_agg_all = df.group_by(pl.lit("All").alias("pitch_description")).agg(
593
+ pl.col('is_pitch').sum().alias('count'),
594
+ (pl.col('is_pitch').sum() / df.select(pl.col('is_pitch').sum())).alias('count_percent'),
595
+ pl.lit(None).alias('start_speed'),
596
+ pl.lit(None).alias('ivb'),
597
+ pl.lit(None).alias('hb'),
598
+ pl.lit(None).alias('spin_rate'),
599
+ pl.lit(None).alias('vaa'),
600
+ pl.lit(None).alias('haa'),
601
+ pl.lit(None).alias('z0'),
602
+ pl.lit(None).alias('x0'),
603
+ pl.col('extension').mean().alias('extension'),
604
+ pl.lit(None).alias('clock_time'),
605
+ pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
606
+ pl.lit(None).alias('pitch_grade'),
607
+ (pl.col('in_zone').sum() / pl.col('is_pitch').sum()).alias('zone_percent'),
608
+ (pl.col('ozone_swing').sum() / pl.col('out_zone').sum()).alias('chase_percent'),
609
+ (pl.col('whiffs').sum() / pl.col('swings').sum()).alias('whiff_percent'),
610
+ (pl.col('woba_pred_contact').sum() / pl.col('bip').sum()).alias('xwobacon')
611
+ )
612
+
613
+ # Concatenate aggregated data
614
+ df_agg = pl.concat([df_agg, df_agg_all]).fill_nan(None)
615
+
616
+ # Load statcast pitch summary data
617
+ statcast_pitch_summary = pl.read_csv('functions/statcast_2024_grouped.csv')
618
+
619
+ # Create table
620
+ table = ax.table(cellText=df_agg.fill_nan('—').fill_null('—').to_numpy(), colLabels=df_agg.columns, cellLoc='center',
621
+ colWidths=[2.3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], bbox=[0.0, 0, 1, 0.8])
622
+
623
+ # Set table properties
624
+ min_font_size = 14
625
+ table.auto_set_font_size(False)
626
+ table.set_fontsize(min_font_size)
627
+ table.scale(1, 0.5)
628
+
629
+ # Set font size for values
630
+ min_font_size = 18
631
+ for i in range(len(df_agg) + 1):
632
+ for j in range(len(df_agg.columns)):
633
+ if i > 0: # Skip the header row
634
+ cell = table.get_celld()[i, j]
635
+ cell.set_fontsize(min_font_size)
636
+
637
+ # Define color maps
638
+ cmap_sum = mcolors.LinearSegmentedColormap.from_list("", ['#648FFF', '#FFFFFF', '#FFB000'])
639
+ cmap_sum_r = mcolors.LinearSegmentedColormap.from_list("", ['#FFB000', '#FFFFFF', '#648FFF'])
640
+
641
+ # Update table cells with colors and text properties
642
+ for i in range(len(df_agg)):
643
+ pitch_check = dict_pitch_desc_type[df_agg['pitch_description'][i]]
644
+ cell_text = table.get_celld()[(i + 1, 0)].get_text().get_text()
645
+
646
+ if cell_text != 'All':
647
+ table.get_celld()[(i + 1, 0)].set_facecolor(dict_pitch_name[cell_text])
648
+ text_props = {'color': '#000000', 'fontweight': 'bold'} if cell_text in ['Split-Finger', 'Slider', 'Changeup'] else {'color': '#ffffff', 'fontweight': 'bold'}
649
+ table.get_celld()[(i + 1, 0)].set_text_props(**text_props)
650
+ if cell_text == 'Four-Seam Fastball':
651
+ table.get_celld()[(i + 1, 0)].get_text().set_text('4-Seam')
652
+
653
+ select_df = statcast_pitch_summary.filter(statcast_pitch_summary['pitch_type'] == pitch_check)
654
+
655
+ # Apply color to specific columns based on normalized values
656
+ columns_to_color = [(3, 'release_speed', 0.95, 1.05), (11, 'release_extension', 0.9, 1.1), (13, None, 80, 120),
657
+ (14, None, 30, 70), (15, 'in_zone_rate', 0.7, 1.3), (16, 'chase_rate', 0.7, 1.3),
658
+ (17, 'whiff_rate', 0.7, 1.3), (18, 'xwoba', 0.7, 1.3)]
659
+
660
+ for col, stat, vmin_factor, vmax_factor in columns_to_color:
661
+ cell_value = table.get_celld()[(i + 1, col)].get_text().get_text()
662
+ if cell_value != '—':
663
+ vmin = select_df[stat].mean() * vmin_factor if stat else vmin_factor
664
+ vmax = select_df[stat].mean() * vmax_factor if stat else vmax_factor
665
+ normalize = mcolors.Normalize(vmin=vmin, vmax=vmax)
666
+ cmap = cmap_sum if col != 18 else cmap_sum_r
667
+ table.get_celld()[(i + 1, col)].set_facecolor(get_color(float(cell_value.strip('%')), normalize, cmap))
668
+
669
+ # Set header text properties
670
+ table.get_celld()[(len(df_agg), 0)].set_text_props(color='#000000', fontweight='bold')
671
+
672
+ # Update column names
673
+ new_column_names = ['$\\bf{Pitch\\ Name}$', '$\\bf{Count}$', '$\\bf{Pitch\\%}$', '$\\bf{Velocity}$', '$\\bf{iVB}$',
674
+ '$\\bf{HB}$', '$\\bf{Spin}$', '$\\bf{VAA}$', '$\\bf{HAA}$', '$\\bf{vRel}$', '$\\bf{hRel}$',
675
+ '$\\bf{Ext.}$', '$\\bf{Axis}$', '$\\bf{tjStuff+}$', '$\\bf{Grade}$', '$\\bf{Zone\\%}$',
676
+ '$\\bf{Chase\\%}$', '$\\bf{Whiff\\%}$', '$\\bf{xwOBA}$\n$\\bf{Contact}$']
677
+
678
+ for i, col_name in enumerate(new_column_names):
679
+ table.get_celld()[(0, i)].get_text().set_text(col_name)
680
+
681
+ # Format cell values
682
+ def format_cells(columns, fmt):
683
+ for col in columns:
684
+ col_idx = df_agg.columns.index(col)
685
+ for row in range(1, len(df_agg) + 1):
686
+ cell_value = table.get_celld()[(row, col_idx)].get_text().get_text()
687
+ if cell_value != '—':
688
+ table.get_celld()[(row, col_idx)].get_text().set_text(fmt.format(float(cell_value.strip('%'))))
689
+
690
+ format_cells(['start_speed', 'ivb', 'hb', 'vaa', 'haa', 'z0', 'x0', 'extension'], '{:,.1f}')
691
+ format_cells(['xwobacon'], '{:,.3f}')
692
+ format_cells(['count_percent', 'zone_percent', 'chase_percent', 'whiff_percent'], '{:,.1%}')
693
+ format_cells(['tj_stuff_plus', 'pitch_grade', 'spin_rate'], '{:,.0f}')
694
+
695
+ # Create legend for pitch types
696
+ items_in_order = (df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy())
697
+ colour_pitches = [dict_colour[x] for x in items_in_order]
698
+ label = [dict_pitch[x] for x in items_in_order]
699
+ handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
700
+ if len(label) > 5:
701
+ ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5,
702
+ fancybox=True, loc='lower center', fontsize=16, framealpha=1.0, markerscale=1.7, prop={'family': 'calibi', 'size': 16})
703
+ else:
704
+ ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5,
705
+ fancybox=True, loc='lower center', fontsize=20, framealpha=1.0, markerscale=2, prop={'family': 'calibi', 'size': 20})
706
+ ax.axis('off')
707
+
708
+ def plot_footer(ax:plt.Axes):
709
+ # Add footer text
710
+ ax.text(0, 1, 'By: @TJStats', ha='left', va='top', fontsize=24)
711
+ ax.text(0.5, 0.25,
712
+ '''
713
+ Colour Coding Compares to League Average By Pitch
714
+ tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
715
+ tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10
716
+ Pitch Grade scales tjStuff+ to the traditional 20-80 Scouting Scale for a given pitch type
717
+ ''',
718
+ ha='center', va='bottom', fontsize=16)
719
+ ax.text(1, 1, 'Data: MLB, Fangraphs\nImages: MLB, ESPN', ha='right', va='top', fontsize=24)
720
+ ax.axis('off')
721
+
722
+
723
+ # Function to get an image from a URL and display it on the given axis
724
+ def player_headshot(player_input: str, ax: plt.Axes, sport_id: int,season: int):
725
+ # Construct the URL for the player's headshot image
726
+ print('SPORT ID',sport_id)
727
+ try:
728
+ if int(sport_id) == 1:
729
+ url = f'https://img.mlbstatic.com/mlb-photos/image/'\
730
+ f'upload/d_people:generic:headshot:67:current.png'\
731
+ f'/w_640,q_auto:best/v1/people/{player_input}/headshot/silo/current.png'
732
+
733
+ # Send a GET request to the URL
734
+ response = requests.get(url)
735
+
736
+ # Open the image from the response content
737
+ img = Image.open(BytesIO(response.content))
738
+
739
+
740
+ # Display the image on the axis
741
+ ax.set_xlim(0, 1.3)
742
+ ax.set_ylim(0, 1)
743
+ ax.imshow(img, extent=[0, 1, 0, 1], origin='upper')
744
+ else:
745
+ url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{player_input}/headshot/milb/current.png'
746
+ response = requests.get(url)
747
+ img = Image.open(BytesIO(response.content))
748
+ ax.set_xlim(0, 1.3)
749
+ ax.set_ylim(0, 1)
750
+ ax.imshow(img, extent=[1/6, 5/6, 0, 1], origin='upper')
751
+ except PIL.UnidentifiedImageError as e:
752
+ ax.axis('off')
753
+ return
754
+
755
+ # Turn off the axis
756
+ ax.axis('off')
757
+
758
+
759
+ def player_bio(pitcher_id: str, ax: plt.Axes,sport_id: int,year_input: int):
760
+ # Construct the URL to fetch player data
761
+ url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam"
762
+
763
+ # Send a GET request to the URL and parse the JSON response
764
+ data = requests.get(url).json()
765
+
766
+ # Extract player information from the JSON data
767
+ player_name = data['people'][0]['fullName']
768
+ pitcher_hand = data['people'][0]['pitchHand']['code']
769
+ age = data['people'][0]['currentAge']
770
+ height = data['people'][0]['height']
771
+ weight = data['people'][0]['weight']
772
+
773
+ # Display the player's name, handedness, age, height, and weight on the axis
774
+ ax.text(0.5, 1, f'{player_name}', va='top', ha='center', fontsize=56)
775
+ ax.text(0.5, 0.7, f'{pitcher_hand}HP, Age:{age}, {height}/{weight}', va='top', ha='center', fontsize=30)
776
+ ax.text(0.5, 0.45, f'Season Pitching Summary', va='top', ha='center', fontsize=40)
777
+
778
+ # Make API call to retrieve sports information
779
+ response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json()
780
+
781
+ # Convert the JSON response into a Polars DataFrame
782
+ df_sport_id = pl.DataFrame(response['sports'])
783
+ abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0]
784
+
785
+ ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=30, fontstyle='italic')
786
+
787
+ # Turn off the axis
788
+ ax.axis('off')
789
+
790
+
791
+ def plot_logo(pitcher_id: str, ax: plt.Axes,df_team: pl.DataFrame,df_players : pl.DataFrame):
792
+ # List of MLB teams and their corresponding ESPN logo URLs
793
+ mlb_teams = [
794
+ {"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
795
+ {"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
796
+ {"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"},
797
+ {"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"},
798
+ {"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"},
799
+ {"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"},
800
+ {"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"},
801
+ {"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"},
802
+ {"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"},
803
+ {"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"},
804
+ {"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"},
805
+ {"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"},
806
+ {"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"},
807
+ {"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"},
808
+ {"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"},
809
+ {"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"},
810
+ {"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"},
811
+ {"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"},
812
+ {"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"},
813
+ {"team": "OAK", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
814
+ {"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"},
815
+ {"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"},
816
+ {"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"},
817
+ {"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"},
818
+ {"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"},
819
+ {"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"},
820
+ {"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"},
821
+ {"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"},
822
+ {"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"},
823
+ {"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
824
+ {"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
825
+ ]
826
+ try:
827
+ # Create a DataFrame from the list of dictionaries
828
+ df_image = pd.DataFrame(mlb_teams)
829
+ image_dict = df_image.set_index('team')['logo_url'].to_dict()
830
+
831
+ team_id = df_players.filter(pl.col('player_id') == pitcher_id)['team'][0]
832
+
833
+ # Construct the URL to fetch team data
834
+ url_team = f'https://statsapi.mlb.com/api/v1/teams/{team_id}'
835
+
836
+ # Send a GET request to the team URL and parse the JSON response
837
+ data_team = requests.get(url_team).json()
838
+
839
+ # Extract the team abbreviation
840
+ if data_team['teams'][0]['id'] in df_team['parent_org_id']:
841
+ team_abb = df_team.filter(pl.col('team_id') == data_team['teams'][0]['id'])['parent_org_abbreviation'][0]
842
+
843
+ else:
844
+ team_abb = df_team.filter(pl.col('parent_org_id') == data_team['teams'][0]['parentOrgId'])['parent_org_abbreviation'][0]
845
+
846
+ # Get the logo URL from the image dictionary using the team abbreviation
847
+ logo_url = image_dict[team_abb]
848
+
849
+ # Send a GET request to the logo URL
850
+ response = requests.get(logo_url)
851
+
852
+ # Open the image from the response content
853
+ img = Image.open(BytesIO(response.content))
854
+
855
+ # Display the image on the axis
856
+ ax.set_xlim(0, 1.3)
857
+ ax.set_ylim(0, 1)
858
+ ax.imshow(img, extent=[0.3, 1.3, 0, 1], origin='upper')
859
+
860
+ # Turn off the axis
861
+ ax.axis('off')
862
+ except KeyError as e:
863
+ ax.axis('off')
864
+ return
865
+
866
+ splits = {
867
+ 'All':0,
868
+ 'LHH':13,
869
+ 'RHH':14,
870
+ }
871
+
872
+ splits_title = {
873
+
874
+ 'All':'',
875
+ 'LHH':' vs LHH',
876
+ 'RHH':' vs RHH',
877
+
878
+ }
879
+
880
+
881
+ def fangraphs_pitching_leaderboards(season: int,
882
+ split: str,
883
+ start_date: str = '2024-01-01',
884
+ end_date: str = '2024-12-31'):
885
+ """
886
+ Fetch pitching leaderboards data from Fangraphs.
887
+
888
+ Parameters
889
+ ----------
890
+ season : int
891
+ The season year.
892
+ split : str
893
+ The split type (e.g., 'All', 'LHH', 'RHH').
894
+ start_date : str, optional
895
+ The start date for the data (default is '2024-01-01').
896
+ end_date : str, optional
897
+ The end date for the data (default is '2024-12-31').
898
+
899
+ Returns
900
+ -------
901
+ pl.DataFrame
902
+ The DataFrame containing the pitching leaderboards data.
903
+ """
904
+ url = f"""
905
+ https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&season={season}&season1={season}
906
+ &startdate={start_date}&enddate={end_date}&ind=0&qual=0&type=8&month={splits[split]}&pageitems=500000
907
+ """
908
+
909
+ data = requests.get(url).json()
910
+ df = pl.DataFrame(data=data['data'], infer_schema_length=1000)
911
+ return df
912
+
913
+
914
+ def fangraphs_table(df: pl.DataFrame,
915
+ ax: plt.Axes,
916
+ player_input: str,
917
+ season: int,
918
+ split: str):
919
+ """
920
+ Create a table of Fangraphs pitching leaderboards data for a specific player.
921
+
922
+ Parameters
923
+ ----------
924
+ ax : plt.Axes
925
+ The axis to plot the table on.
926
+ season : int
927
+ The season year.
928
+ split : str
929
+ The split type (e.g., 'All', 'LHH', 'RHH').
930
+ """
931
+
932
+ start_date = df['game_date'][0]
933
+ end_date = df['game_date'][-1]
934
+
935
+ # Fetch Fangraphs pitching leaderboards data
936
+ df_fangraphs = fangraphs_pitching_leaderboards(season=season,
937
+ split=split,
938
+ start_date=start_date,
939
+ end_date=end_date).filter(pl.col('xMLBAMID') == player_input)
940
+
941
+ df_fangraphs = df_fangraphs.with_columns(
942
+ ((pl.col('Strikes')/pl.col('Pitches'))).alias('strikePercentage'),
943
+
944
+ )
945
+
946
+ # Select relevant columns for the table
947
+ plot_table = df_fangraphs.select(['IP', 'WHIP', 'ERA', 'TBF', 'FIP', 'K%', 'BB%', 'K-BB%','strikePercentage'])
948
+
949
+ # Format table values
950
+ plot_table_values = [format(plot_table[x][0], fangraphs_stats_dict[x]['format']) if plot_table[x][0] != '---' else '---' for x in plot_table.columns]
951
+
952
+ # Create the table
953
+ table_fg = ax.table(cellText=[plot_table_values], colLabels=plot_table.columns, cellLoc='center',
954
+ bbox=[0.0, 0.1, 1, 0.7])
955
+
956
+ # Set font size for the table
957
+ min_font_size = 20
958
+ table_fg.set_fontsize(min_font_size)
959
+
960
+ # Update column names with formatted headers
961
+ new_column_names = [fangraphs_stats_dict[col]['table_header'] for col in plot_table.columns]
962
+ for i, col_name in enumerate(new_column_names):
963
+ table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
964
+
965
+ # Set header text properties
966
+ ax.text(0.5, 0.9, f'{start_date} to {end_date}{splits_title[split]}', va='bottom', ha='center',
967
+ fontsize=36, fontstyle='italic')
968
+ ax.axis('off')
969
+
970
+
971
+ def stat_summary_table(df: pl.DataFrame,
972
+ player_input: int,
973
+ sport_id: int,
974
+ ax: plt.Axes,
975
+ split: str = 'All'):
976
+ start_date_format = str(pd.to_datetime(df['game_date'][0]).strftime('%m/%d/%Y'))
977
+ end_date_format = str(pd.to_datetime(df['game_date'][-1]).strftime('%m/%d/%Y'))
978
+
979
+ if sport_id == 1:
980
+ appContext = 'majorLeague'
981
+ else:
982
+ appContext = 'minorLeague'
983
+
984
+ pitcher_stats_call = requests.get(f'https://statsapi.mlb.com/api/v1/people/{player_input}?appContext={appContext}&hydrate=stats(group=[pitching],type=[byDateRange],sportId={sport_id},startDate={start_date_format},endDate={end_date_format})').json()
985
+ pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
986
+ pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
987
+ pitcher_stats_call_df = pl.DataFrame(data=dict(zip(pitcher_stats_call_header,pitcher_stats_call_values)))
988
+
989
+ pitcher_stats_call_df = pitcher_stats_call_df.with_columns(
990
+ pl.lit(df['is_whiff'].sum()).alias('whiffs'),
991
+ (pl.col('strikeOuts')/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('k_percent'),
992
+ (pl.col('baseOnBalls')/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('bb_percent'),
993
+ ((pl.col('strikeOuts') - pl.col('baseOnBalls'))/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('k_bb_percent'),
994
+ (((pl.col('homeRuns')*13 + 3*((pl.col('baseOnBalls'))+(pl.col('hitByPitch')))-2*(pl.col('strikeOuts'))))/((pl.col('outs'))/3)+3.15).round(2).map_elements(lambda x: f"{x:.2f}") .alias('fip'),
995
+ ((pl.col('strikes')/pl.col('numberOfPitches')*100)).round(1).cast(pl.Utf8).str.concat('%').alias('strikePercentage'),
996
+ )
997
+
998
+
999
+ if df['game_id'][0] == df['game_id'][-1]:
1000
+ pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched','battersFaced','earnedRuns','hits','strikeOuts','baseOnBalls','hitByPitch','homeRuns','strikePercentage','whiffs'])
1001
+ new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{ER}$','$\\bf{H}$','$\\bf{K}$','$\\bf{BB}$','$\\bf{HBP}$','$\\bf{HR}$','$\\bf{Strike\%}$','$\\bf{Whiffs}$']
1002
+ title = f'{df["game_date"][0]} vs {df["batter_team"][0]}'
1003
+ elif sport_id != 1:
1004
+ pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched','battersFaced','whip','era','fip','k_percent','bb_percent','k_bb_percent','strikePercentage'])
1005
+ new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{WHIP}$','$\\bf{ERA}$','$\\bf{FIP}$','$\\bf{K\%}$','$\\bf{BB\%}$','$\\bf{K-BB\%}$','$\\bf{Strike\%}$']
1006
+ title = f'{df["game_date"][0]} to {df["game_date"][-1]}'
1007
+ else:
1008
+ fangraphs_table(df=df,
1009
+ ax=ax,
1010
+ player_input=player_input,
1011
+ season=2024,
1012
+ split=split)
1013
+ return
1014
+
1015
+ import matplotlib.pyplot as plt
1016
+ table_fg = ax.table(cellText=pitcher_stats_call_df_small.to_numpy(), colLabels=pitcher_stats_call_df_small.columns, cellLoc='center',
1017
+ bbox=[0.0, 0.1, 1, 0.7])
1018
+
1019
+ min_font_size = 20
1020
+ table_fg.set_fontsize(min_font_size)
1021
+
1022
+ # #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%']
1023
+ for i, col_name in enumerate(new_column_names):
1024
+ table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
1025
+
1026
+ ax.text(0.5, 0.9, title, va='bottom', ha='center',
1027
+ fontsize=36, fontstyle='italic')
1028
+
1029
+ ax.axis('off')
functions/statcast_2024_grouped.csv ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pitch_type,pitch,release_speed,pfx_z,pfx_x,release_spin_rate,release_pos_x,release_pos_z,release_extension,delta_run_exp,swing,whiff,in_zone,out_zone,chase,xwoba,pitch_usage,whiff_rate,in_zone_rate,chase_rate,delta_run_exp_per_100,all
2
+ CH,74155,85.46226725895522,5.247514143364433,-3.9745011679246045,1803.342540762527,-0.5077629855663421,5.740925968432281,6.449406057002311,204.631,37385,11538,28912,45151,15250,0.28973564881286695,0.10218846333521206,0.30862645446034503,0.38988604949093114,0.3377555314389493,-0.27595037421616886,
3
+ CS,22,66.38181818181819,-7.232727272727273,5.176363636363637,2039.2727272727273,-1.7981818181818183,6.5177272727272735,6.0636363636363635,-0.6290000000000001,9,2,10,12,2,0.13466666666666668,3.0316852449257168e-05,0.2222222222222222,0.45454545454545453,0.16666666666666666,2.85909090909091,
4
+ CU,47579,79.40938533133989,-9.345106445703216,4.516206279348902,2568.8591051473077,-0.6765712059634863,5.9438438375202685,6.401792908519479,93.57199999999999,19910,6150,20751,26738,7749,0.28049767649520974,0.0655657055765094,0.3088900050226017,0.4361377918829736,0.28981225222529733,-0.1966665966077471,
5
+ EP,576,50.51909722222222,16.357291666666665,-3.8287500000000003,1256.7152777777778,-0.9668749999999999,6.647100694444444,4.442013888888889,23.643,252,7,207,369,106,0.3971430703517588,0.0007937503186714604,0.027777777777777776,0.359375,0.2872628726287263,-4.104687500000001,
6
+ FA,635,67.81354330708662,15.865511811023623,-3.7226456692913388,1674.0144694533763,-1.1163779527559055,6.317716535433071,4.92488188976378,15.495,284,29,296,339,73,0.43393490999999995,0.0008750546047853774,0.10211267605633803,0.46614173228346456,0.2153392330383481,-2.4401574803149604,
7
+ FC,58379,89.56435813713696,8.08895396195288,1.5509243697478992,2389.231715947733,-0.9745362684951281,5.8461769002079365,6.403954996645393,-20.390000000000015,28753,6674,30002,28189,7757,0.34077822947428493,0.08044852405159929,0.23211490974854798,0.5139176758765994,0.2751782610238036,0.034926942907552404,
8
+ FF,230412,94.27369496062718,15.720274827472318,-3.1074418968484365,2296.591789895323,-0.7685432927147252,5.821400777026439,6.524392110813926,-80.28400000000002,113157,24741,127386,102722,24808,0.3401256910065045,0.3175166639335565,0.21864312415493517,0.5528618301130149,0.2415062012032476,0.03484367133656234,
9
+ FO,168,82.07916666666667,1.7357142857142858,0.1378571428571428,946.8154761904761,-0.5333333333333333,5.8914285714285715,6.666666666666667,2.539,89,29,60,108,43,0.27798747368421056,0.0002315105096125093,0.3258426966292135,0.35714285714285715,0.39814814814814814,-1.511309523809524,
10
+ FS,21727,86.31228885718231,2.979608781700189,-8.76550651263405,1302.3992981808108,-1.4640824780227366,5.742066553136651,6.508958525345622,-16.641000000000005,11333,3906,7982,13745,4946,0.2548785060302361,0.02994064787113684,0.34465719579987647,0.3673769963639711,0.3598399417970171,0.07659133796658538,
11
+ KC,11916,81.79965592480698,-9.370896273917422,4.895297079556898,2444.1642796967144,-0.8788083249412554,5.940037764350453,6.434007553503986,-12.997000000000003,5312,1860,4858,7058,2316,0.25845137325418993,0.016420709717515837,0.3501506024096386,0.40768714333669015,0.32813828279965995,0.10907183618663985,
12
+ KN,971,76.94819773429454,-2.9453759011328526,-5.356498455200824,263.56326987681973,-1.2303398558187437,5.542131822863028,6.45653964984552,12.681,426,113,428,543,130,0.2870389181034483,0.0013380756240103959,0.2652582159624413,0.4407826982492276,0.23941068139963168,-1.3059732234809474,
13
+ PO,55,91.24909090909091,13.11709090909091,-6.399272727272727,2195.3818181818183,-1.494181818181818,5.861272727272727,6.305454545454546,0.0,0,0,1,54,0,,7.579213112314292e-05,,0.01818181818181818,0.0,-0.0,
14
+ SC,159,81.02264150943397,-3.1056603773584905,-8.001509433962264,2050.5974842767296,-1.0535849056603774,6.110377358490566,6.064150943396227,4.623,58,13,63,96,20,0.35349463636363637,0.0002191081608832677,0.22413793103448276,0.39622641509433965,0.20833333333333334,-2.9075471698113207,
15
+ SI,116002,93.34805382235511,7.567078832293412,-6.148476070311284,2147.3631502060834,-0.7671983511070397,5.622119363257688,6.435364206296976,-32.837000000000025,53318,7390,65492,50222,12474,0.3501967420378125,0.15985525080994228,0.13860234817510034,0.5645764728194341,0.2483772052088726,0.028307270564300636,
16
+ SL,116390,85.60138786052518,1.5759858803271631,2.7325110632802407,2435.5705519351436,-0.9811034007748601,5.761407576409815,6.433055359327349,-167.41500000000002,56606,19101,52478,63672,20396,0.2818607008786495,0.16038992984404735,0.337437727449387,0.45088065985050263,0.3203291870838045,0.14383967694819144,
17
+ ST,43821,81.8580155633144,1.4796932977339632,7.821825152324228,2575.3661920073496,-1.080187124894457,5.4607240820611125,6.40352674793587,-52.96800000000001,20035,6276,19349,24472,7531,0.25978070794500324,0.0603870359626772,0.3132518093336661,0.44154629059126904,0.30773945733899966,0.12087355377558708,
18
+ SV,2702,81.67483345669874,-4.788941524796447,7.356861584011844,2470.624858757062,-0.5779570688378979,5.420762398223538,6.227296392711045,0.19299999999999926,1117,339,1138,1564,479,0.2907683709923664,0.0037234606962678577,0.3034914950760967,0.42116950407105846,0.3062659846547315,-0.007142857142857115,
19
+ All,725669,89.1521052747817,7.058379139422499,-1.2140087540219224,2255.6768252515376,-0.8282529777063689,5.758824349487279,6.456550518555369,-20.178000000000118,352163,89742,359413,365054,104080,0.3147037524825,1.0,0.25483085957354973,0.4952850404247667,0.28510850449522535,0.002780606585095976,all
joblib_model/__pycache__/feature_engineering.cpython-39.pyc ADDED
Binary file (2.14 kB). View file
 
joblib_model/barrel_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9428e89f2a408148377efb3cd169dc8790bcc89df9495cb895b9db5a955e8fb7
3
+ size 11447
joblib_model/in_zone.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5300b15a6ccfb1dd1e79c85bd9ea478a1945c454845e6be31cd8815e4063a3e
3
+ size 54459064
joblib_model/in_zone_model_knn_20240410.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82d6d95be88b006bea7efd4bbf0464a0a50f261f6f65f060bf022114300721ed
3
+ size 46782024
joblib_model/linear_reg_model_x.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:179663ae0fa65c626b9a941b6934bda1ce58bdf02a69c0daefc28abd28154201
3
+ size 579
joblib_model/linear_reg_model_z.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ceabc302949cdbe5515b428f900bce98d6f6bedf99153c8d8a645cb0240ef8b
3
+ size 579
joblib_model/model_attack_zone.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2671d4db2606cfee299dcffba2a94138fce77c1b7ef6ad14695a972a38dda3c8
3
+ size 50570139
joblib_model/no_swing.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3da3e7ab2b513b87d05e90ae30c788ac819dfcaa7cc1cd9943fc13d2958a00f
3
+ size 279409
joblib_model/swing.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fef4a66363e5f3fdc70ae45c5382bd986c800ff8bf9296a1f9b334461e70fd4
3
+ size 262137
joblib_model/xwoba_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05bade9c0420657d3f0dfe35f0b1adbd2d5ae25c87a07bdf6629987f29926438
3
+ size 10684246
stuff_model/__pycache__/feature_engineering.cpython-39.pyc ADDED
Binary file (2.17 kB). View file
 
stuff_model/__pycache__/stuff_apply.cpython-39.pyc ADDED
Binary file (1.33 kB). View file
 
stuff_model/feature_engineering.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import polars as pl
2
+ import numpy as np
3
+
4
+ def feature_engineering(df: pl.DataFrame) -> pl.DataFrame:
5
+ # Extract the year from the game_date column
6
+ df = df.with_columns(
7
+ pl.col('game_date').str.slice(0, 4).alias('year')
8
+ )
9
+
10
+ df = df.with_columns([
11
+
12
+ (-(pl.col('vy0')**2 - (2 * pl.col('ay') * (pl.col('y0') - 17/12)))**0.5).alias('vy_f'),
13
+ ])
14
+
15
+ df = df.with_columns([
16
+ ((pl.col('vy_f') - pl.col('vy0')) / pl.col('ay')).alias('t'),
17
+ ])
18
+
19
+ df = df.with_columns([
20
+ (pl.col('vz0') + (pl.col('az') * pl.col('t'))).alias('vz_f'),
21
+ (pl.col('vx0') + (pl.col('ax') * pl.col('t'))).alias('vx_f')
22
+ ])
23
+
24
+ df = df.with_columns([
25
+ (-np.arctan(pl.col('vz_f') / pl.col('vy_f')) * (180 / np.pi)).alias('vaa'),
26
+ (-np.arctan(pl.col('vx_f') / pl.col('vy_f')) * (180 / np.pi)).alias('haa')
27
+ ])
28
+
29
+ # Mirror horizontal break for left-handed pitchers
30
+ df = df.with_columns(
31
+ pl.when(pl.col('pitcher_hand') == 'L')
32
+ .then(-pl.col('ax'))
33
+ .otherwise(pl.col('ax'))
34
+ .alias('ax')
35
+ )
36
+
37
+ # Mirror horizontal break for left-handed pitchers
38
+ df = df.with_columns(
39
+ pl.when(pl.col('pitcher_hand') == 'L')
40
+ .then(-pl.col('hb'))
41
+ .otherwise(pl.col('hb'))
42
+ .alias('hb')
43
+ )
44
+
45
+ # Mirror horizontal release point for left-handed pitchers
46
+ df = df.with_columns(
47
+ pl.when(pl.col('pitcher_hand') == 'L')
48
+ .then(pl.col('x0'))
49
+ .otherwise(-pl.col('x0'))
50
+ .alias('x0')
51
+ )
52
+
53
+ # Define the pitch types to be considered
54
+ pitch_types = ['SI', 'FF', 'FC']
55
+
56
+ # Filter the DataFrame to include only the specified pitch types
57
+ df_filtered = df.filter(pl.col('pitch_type').is_in(pitch_types))
58
+
59
+ # Group by pitcher_id and year, then aggregate to calculate average speed and usage percentage
60
+ df_agg = df_filtered.group_by(['pitcher_id', 'year', 'pitch_type']).agg([
61
+ pl.col('start_speed').mean().alias('avg_fastball_speed'),
62
+ pl.col('az').mean().alias('avg_fastball_az'),
63
+ pl.col('ax').mean().alias('avg_fastball_ax'),
64
+ pl.len().alias('count')
65
+ ])
66
+
67
+ # Sort the aggregated data by count and average fastball speed
68
+ df_agg = df_agg.sort(['count', 'avg_fastball_speed'], descending=[True, True])
69
+ df_agg = df_agg.unique(subset=['pitcher_id', 'year'], keep='first')
70
+
71
+ # Join the aggregated data with the main DataFrame
72
+ df = df.join(df_agg, on=['pitcher_id', 'year'])
73
+
74
+ # If no fastball, use the fastest pitch for avg_fastball_speed
75
+ df = df.with_columns(
76
+ pl.when(pl.col('avg_fastball_speed').is_null())
77
+ .then(pl.col('start_speed').max().over('pitcher_id'))
78
+ .otherwise(pl.col('avg_fastball_speed'))
79
+ .alias('avg_fastball_speed')
80
+ )
81
+
82
+ # If no fastball, use the fastest pitch for avg_fastball_az
83
+ df = df.with_columns(
84
+ pl.when(pl.col('avg_fastball_az').is_null())
85
+ .then(pl.col('az').max().over('pitcher_id'))
86
+ .otherwise(pl.col('avg_fastball_az'))
87
+ .alias('avg_fastball_az')
88
+ )
89
+
90
+ # If no fastball, use the fastest pitch for avg_fastball_ax
91
+ df = df.with_columns(
92
+ pl.when(pl.col('avg_fastball_ax').is_null())
93
+ .then(pl.col('ax').max().over('ax'))
94
+ .otherwise(pl.col('avg_fastball_ax'))
95
+ .alias('avg_fastball_ax')
96
+ )
97
+
98
+ # Calculate pitch differentials
99
+ df = df.with_columns(
100
+ (pl.col('start_speed') - pl.col('avg_fastball_speed')).alias('speed_diff'),
101
+ (pl.col('az') - pl.col('avg_fastball_az')).alias('az_diff'),
102
+ (pl.col('ax') - pl.col('avg_fastball_ax')).abs().alias('ax_diff')
103
+ )
104
+
105
+ # Cast the year column to integer type
106
+ df = df.with_columns(
107
+ pl.col('year').cast(pl.Int64)
108
+ )
109
+
110
+
111
+
112
+ df = df.with_columns([
113
+ pl.lit('All').alias('all')
114
+ ])
115
+
116
+
117
+
118
+ return df
stuff_model/lgbm_model_2020_2023.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41001a1acf6ce7dbe247f1b8b7e68a1bb1b112f39d080b7e95a83479e56cb7c1
3
+ size 3092328
stuff_model/stuff_apply.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import polars as pl
2
+ import joblib
3
+
4
+ model = joblib.load('stuff_model/lgbm_model_2020_2023.joblib')
5
+ # Read the values from the text file
6
+ with open('stuff_model/target_stats.txt', 'r') as file:
7
+ lines = file.readlines()
8
+ target_mean = float(lines[0].strip())
9
+ target_std = float(lines[1].strip())
10
+
11
+ # Define the features to be used for training
12
+ features = ['start_speed',
13
+ 'spin_rate',
14
+ 'extension',
15
+ 'az',
16
+ 'ax',
17
+ 'x0',
18
+ 'z0',
19
+ 'speed_diff',
20
+ 'az_diff',
21
+ 'ax_diff']
22
+
23
+
24
+ def stuff_apply(df:pl.DataFrame) -> pl.DataFrame:
25
+ # Filter the dataframe to include only the rows for the year 2024 and drop rows with null values in the specified features and target column
26
+ # df_test = df.drop_nulls(subset=features)
27
+ df_test = df.clone()
28
+
29
+ # Predict the target values for the 2024 data using the trained model
30
+ df_test = df_test.with_columns(
31
+ pl.Series(name="target", values=model.predict(df_test[features].to_numpy()))
32
+ )
33
+ # Standardize the target column to create a z-score
34
+ df_test = df_test.with_columns(
35
+ ((pl.col('target') - target_mean) / target_std).alias('target_zscore')
36
+ )
37
+
38
+ # Convert the z-score to tj_stuff_plus
39
+ df_test = df_test.with_columns(
40
+ (100 - (pl.col('target_zscore') * 10)).alias('tj_stuff_plus')
41
+ )
42
+
43
+ df_pitch_types = pl.read_csv('stuff_model/tj_stuff_plus_pitch.csv')
44
+
45
+ # Join the pitch type statistics with the main DataFrame based on pitch_type
46
+ df_pitch_all = df_test.join(df_pitch_types, left_on='pitch_type', right_on='pitch_type')
47
+
48
+ # Normalize pitch_grade values to a range between -0.5 and 0.5 based on the percentiles
49
+ df_pitch_all = df_pitch_all.with_columns(
50
+ ((pl.col('tj_stuff_plus') - pl.col('mean')) / pl.col('std')).alias('pitch_grade')
51
+ )
52
+
53
+ # Scale the pitch_grade values to a range between 20 and 80
54
+ df_pitch_all = df_pitch_all.with_columns(
55
+ (pl.col('pitch_grade') * 10 + 50).clip(20, 80)
56
+ )
57
+ return df_pitch_all
stuff_model/target_stats.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 0.0034732498406374636
2
+ 0.006846752748626548
stuff_model/tj_stuff_plus_pitch.csv ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pitch_type,mean,std,median,min,max,percentile_1,percentile_99
2
+ ST,106.44784631565936,5.593943599731136,106.24878922952112,91.18894850636659,125.29541262167034,91.69322149368426,125.25688309207108
3
+ SV,103.73183202363764,3.001226780758946,103.50047554089315,93.3173875900245,111.34757479687066,93.32953434698274,111.33689503153641
4
+ SL,103.49296290610897,5.265572779780409,103.19144262214559,88.84957017284297,121.88798777026031,89.76670287371176,121.36013955239422
5
+ KC,101.8993919341341,4.271694896723436,100.79211889194949,93.69754063161618,119.4933202093256,93.75149298057133,119.38166236091195
6
+ All,99.9275100894791,5.01699442232884,99.65265124489378,84.73033633038408,116.94934527087541,86.65905811630736,116.7610246502804
7
+ CU,99.88832068607897,4.615228571103906,99.08993373693156,89.84495168337246,119.90089262632986,90.20429983334718,117.89567125997061
8
+ FC,98.83449547008738,5.811964883678063,98.54483029899575,83.20928731685326,119.78700324933075,83.34007602984008,118.21186533190846
9
+ FS,98.25541635267653,6.898952096824192,98.46204303842217,72.25450024197754,114.88400714657823,73.39595959354874,114.78967217449389
10
+ FO,98.15224613640243,1.081819065809178,99.94816563615653,94.0023252668585,100.50624750619224,94.0142169475971,100.50513134245217
11
+ FF,97.29024735737988,6.078459125845886,97.09670890504734,81.2230917971995,118.10419744965911,81.32311771953398,117.7938724746093
12
+ SC,97.27958020025409,1.2452898498180456,97.27958020025409,93.536223938276,101.02293646223218,93.54371065079995,101.01544974970822
13
+ CH,96.35866365133434,6.178939251378385,95.80884625564597,81.28802319264824,121.14136334013493,82.02275793969746,119.09639344796777
14
+ SI,95.14161603816645,4.9734372581529955,95.11657827702109,82.5850956341191,112.99618112461533,82.8856383780296,112.72626192694757
15
+ CS,93.97853627048322,0.0,93.97853627048322,93.97853627048322,93.97853627048322,93.97853627048322,93.97853627048322
16
+ KN,93.41890096234394,0.0,93.41890096234394,93.41890096234394,93.41890096234394,93.41890096234394,93.41890096234394