Upload 22 files
Browse files- functions/__pycache__/df_update.cpython-39.pyc +0 -0
- functions/__pycache__/pitch_summary_functions.cpython-39.pyc +0 -0
- functions/df_update.py +472 -0
- functions/pitch_summary_functions.py +1029 -0
- functions/statcast_2024_grouped.csv +19 -0
- joblib_model/__pycache__/feature_engineering.cpython-39.pyc +0 -0
- joblib_model/barrel_model.joblib +3 -0
- joblib_model/in_zone.joblib +3 -0
- joblib_model/in_zone_model_knn_20240410.joblib +3 -0
- joblib_model/linear_reg_model_x.joblib +3 -0
- joblib_model/linear_reg_model_z.joblib +3 -0
- joblib_model/model_attack_zone.joblib +3 -0
- joblib_model/no_swing.joblib +3 -0
- joblib_model/swing.joblib +3 -0
- joblib_model/xwoba_model.joblib +3 -0
- stuff_model/__pycache__/feature_engineering.cpython-39.pyc +0 -0
- stuff_model/__pycache__/stuff_apply.cpython-39.pyc +0 -0
- stuff_model/feature_engineering.py +118 -0
- stuff_model/lgbm_model_2020_2023.joblib +3 -0
- stuff_model/stuff_apply.py +57 -0
- stuff_model/target_stats.txt +2 -0
- stuff_model/tj_stuff_plus_pitch.csv +16 -0
functions/__pycache__/df_update.cpython-39.pyc
ADDED
Binary file (14.1 kB). View file
|
|
functions/__pycache__/pitch_summary_functions.cpython-39.pyc
ADDED
Binary file (33.8 kB). View file
|
|
functions/df_update.py
ADDED
@@ -0,0 +1,472 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import polars as pl
|
2 |
+
import numpy as np
|
3 |
+
import joblib
|
4 |
+
|
5 |
+
loaded_model = joblib.load('joblib_model/barrel_model.joblib')
|
6 |
+
in_zone_model = joblib.load('joblib_model/in_zone_model_knn_20240410.joblib')
|
7 |
+
attack_zone_model = joblib.load('joblib_model/model_attack_zone.joblib')
|
8 |
+
xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
|
9 |
+
px_model = joblib.load('joblib_model/linear_reg_model_x.joblib')
|
10 |
+
pz_model = joblib.load('joblib_model/linear_reg_model_z.joblib')
|
11 |
+
|
12 |
+
|
13 |
+
class df_update:
|
14 |
+
def __init__(self):
|
15 |
+
pass
|
16 |
+
|
17 |
+
def update(self, df_clone: pl.DataFrame):
|
18 |
+
|
19 |
+
df = df_clone.clone()
|
20 |
+
# Assuming px_model is defined and df is your DataFrame
|
21 |
+
hit_codes = ['single',
|
22 |
+
'double','home_run', 'triple']
|
23 |
+
|
24 |
+
ab_codes = ['single', 'strikeout', 'field_out',
|
25 |
+
'grounded_into_double_play', 'fielders_choice', 'force_out',
|
26 |
+
'double', 'field_error', 'home_run', 'triple',
|
27 |
+
'double_play',
|
28 |
+
'fielders_choice_out', 'strikeout_double_play',
|
29 |
+
'other_out','triple_play']
|
30 |
+
|
31 |
+
|
32 |
+
obp_true_codes = ['single', 'walk',
|
33 |
+
'double','home_run', 'triple',
|
34 |
+
'hit_by_pitch', 'intent_walk']
|
35 |
+
|
36 |
+
obp_codes = ['single', 'strikeout', 'walk', 'field_out',
|
37 |
+
'grounded_into_double_play', 'fielders_choice', 'force_out',
|
38 |
+
'double', 'sac_fly', 'field_error', 'home_run', 'triple',
|
39 |
+
'hit_by_pitch', 'double_play', 'intent_walk',
|
40 |
+
'fielders_choice_out', 'strikeout_double_play',
|
41 |
+
'sac_fly_double_play',
|
42 |
+
'other_out','triple_play']
|
43 |
+
|
44 |
+
|
45 |
+
contact_codes = ['In play, no out',
|
46 |
+
'Foul', 'In play, out(s)',
|
47 |
+
'In play, run(s)',
|
48 |
+
'Foul Bunt']
|
49 |
+
|
50 |
+
bip_codes = ['In play, no out', 'In play, run(s)','In play, out(s)']
|
51 |
+
|
52 |
+
|
53 |
+
conditions_barrel = [
|
54 |
+
df['launch_speed'].is_null(),
|
55 |
+
(df['launch_speed'] * 1.5 - df['launch_angle'] >= 117) &
|
56 |
+
(df['launch_speed'] + df['launch_angle'] >= 124) &
|
57 |
+
(df['launch_speed'] >= 98) &
|
58 |
+
(df['launch_angle'] >= 4) & (df['launch_angle'] <= 50)
|
59 |
+
]
|
60 |
+
choices_barrel = [False, True]
|
61 |
+
|
62 |
+
conditions_tb = [
|
63 |
+
(df['event_type'] == 'single'),
|
64 |
+
(df['event_type'] == 'double'),
|
65 |
+
(df['event_type'] == 'triple'),
|
66 |
+
(df['event_type'] == 'home_run')
|
67 |
+
]
|
68 |
+
choices_tb = [1, 2, 3, 4]
|
69 |
+
|
70 |
+
|
71 |
+
conditions_woba = [
|
72 |
+
df['event_type'].is_in(['strikeout', 'field_out', 'sac_fly', 'force_out', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']),
|
73 |
+
df['event_type'] == 'walk',
|
74 |
+
df['event_type'] == 'hit_by_pitch',
|
75 |
+
df['event_type'] == 'single',
|
76 |
+
df['event_type'] == 'double',
|
77 |
+
df['event_type'] == 'triple',
|
78 |
+
df['event_type'] == 'home_run'
|
79 |
+
]
|
80 |
+
choices_woba = [0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
|
81 |
+
|
82 |
+
woba_codes = ['strikeout', 'field_out', 'single', 'walk', 'hit_by_pitch', 'double', 'sac_fly', 'force_out', 'home_run', 'grounded_into_double_play', 'fielders_choice', 'field_error', 'triple', 'sac_bunt', 'double_play', 'fielders_choice_out', 'strikeout_double_play', 'sac_fly_double_play', 'other_out']
|
83 |
+
|
84 |
+
pitch_cat = {'FA': 'Fastball',
|
85 |
+
'FF': 'Fastball',
|
86 |
+
'FT': 'Fastball',
|
87 |
+
'FC': 'Fastball',
|
88 |
+
'FS': 'Off-Speed',
|
89 |
+
'FO': 'Off-Speed',
|
90 |
+
'SI': 'Fastball',
|
91 |
+
'ST': 'Breaking',
|
92 |
+
'SL': 'Breaking',
|
93 |
+
'CU': 'Breaking',
|
94 |
+
'KC': 'Breaking',
|
95 |
+
'SC': 'Off-Speed',
|
96 |
+
'GY': 'Off-Speed',
|
97 |
+
'SV': 'Breaking',
|
98 |
+
'CS': 'Breaking',
|
99 |
+
'CH': 'Off-Speed',
|
100 |
+
'KN': 'Off-Speed',
|
101 |
+
'EP': 'Breaking',
|
102 |
+
'UN': None,
|
103 |
+
'IN': None,
|
104 |
+
'PO': None,
|
105 |
+
'AB': None,
|
106 |
+
'AS': None,
|
107 |
+
'NP': None}
|
108 |
+
|
109 |
+
|
110 |
+
df = df.with_columns([
|
111 |
+
pl.when(df['type_ab'].is_not_null()).then(1).otherwise(0).alias('pa'),
|
112 |
+
pl.when(df['is_pitch']).then(1).otherwise(0).alias('pitches'),
|
113 |
+
pl.when(df['sz_top'] == 0).then(None).otherwise(df['sz_top']).alias('sz_top'),
|
114 |
+
pl.when(df['sz_bot'] == 0).then(None).otherwise(df['sz_bot']).alias('sz_bot'),
|
115 |
+
pl.when(df['zone'] > 0).then(df['zone'] < 10).otherwise(None).alias('in_zone'),
|
116 |
+
pl.Series(px_model.predict(df[['x']].fill_null(0).to_numpy())[:, 0]).alias('px_predict'),
|
117 |
+
pl.Series(pz_model.predict(df[['y']].fill_null(0).to_numpy())[:, 0] + 3.2).alias('pz_predict'),
|
118 |
+
pl.Series(in_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('in_zone_predict'),
|
119 |
+
pl.Series(attack_zone_model.predict(df[['px','pz','sz_top','sz_bot']].fill_null(0).to_numpy())[:]).alias('attack_zone_predict'),
|
120 |
+
pl.when(df['event_type'].is_in(hit_codes)).then(True).otherwise(False).alias('hits'),
|
121 |
+
pl.when(df['event_type'].is_in(ab_codes)).then(True).otherwise(False).alias('ab'),
|
122 |
+
pl.when(df['event_type'].is_in(obp_true_codes)).then(True).otherwise(False).alias('on_base'),
|
123 |
+
pl.when(df['event_type'].is_in(obp_codes)).then(True).otherwise(False).alias('obp'),
|
124 |
+
pl.when(df['play_description'].is_in(bip_codes)).then(True).otherwise(False).alias('bip'),
|
125 |
+
pl.when(conditions_barrel[0]).then(choices_barrel[0]).when(conditions_barrel[1]).then(choices_barrel[1]).otherwise(None).alias('barrel'),
|
126 |
+
pl.when(df['launch_angle'].is_null()).then(False).when((df['launch_angle'] >= 8) & (df['launch_angle'] <= 32)).then(True).otherwise(None).alias('sweet_spot'),
|
127 |
+
pl.when(df['launch_speed'].is_null()).then(False).when(df['launch_speed'] >= 94.5).then(True).otherwise(None).alias('hard_hit'),
|
128 |
+
pl.when(conditions_tb[0]).then(choices_tb[0]).when(conditions_tb[1]).then(choices_tb[1]).when(conditions_tb[2]).then(choices_tb[2]).when(conditions_tb[3]).then(choices_tb[3]).otherwise(None).alias('tb'),
|
129 |
+
pl.when(conditions_woba[0]).then(choices_woba[0]).when(conditions_woba[1]).then(choices_woba[1]).when(conditions_woba[2]).then(choices_woba[2]).when(conditions_woba[3]).then(choices_woba[3]).when(conditions_woba[4]).then(choices_woba[4]).when(conditions_woba[5]).then(choices_woba[5]).when(conditions_woba[6]).then(choices_woba[6]).otherwise(None).alias('woba'),
|
130 |
+
pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T')).then(1).otherwise(0).alias('whiffs'),
|
131 |
+
pl.when((df['play_code'] == 'S') | (df['play_code'] == 'W') | (df['play_code'] == 'T') | (df['play_code'] == 'C')).then(1).otherwise(0).alias('csw'),
|
132 |
+
pl.when(pl.col('is_swing').cast(pl.Boolean)).then(1).otherwise(0).alias('swings'),
|
133 |
+
pl.col('event_type').is_in(['strikeout','strikeout_double_play']).alias('k'),
|
134 |
+
pl.col('event_type').is_in(['walk', 'intent_walk']).alias('bb'),
|
135 |
+
pl.lit(None).alias('attack_zone'),
|
136 |
+
pl.lit(None).alias('woba_pred'),
|
137 |
+
pl.lit(None).alias('woba_pred_contact')
|
138 |
+
|
139 |
+
])
|
140 |
+
|
141 |
+
df = df.with_columns([
|
142 |
+
pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('woba_codes'),
|
143 |
+
pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('xwoba_codes'),
|
144 |
+
pl.when((pl.col('tb') >= 0)).then(df['woba']).otherwise(None).alias('woba_contact'),
|
145 |
+
pl.when(pl.col('px').is_null()).then(pl.col('px_predict')).otherwise(pl.col('px')).alias('px'),
|
146 |
+
pl.when(pl.col('pz').is_null()).then(pl.col('pz_predict')).otherwise(pl.col('pz')).alias('pz'),
|
147 |
+
pl.when(pl.col('in_zone').is_null()).then(pl.col('in_zone_predict')).otherwise(pl.col('in_zone')).alias('in_zone'),
|
148 |
+
pl.when(df['launch_speed'].is_null()).then(None).otherwise(df['barrel']).alias('barrel'),
|
149 |
+
pl.lit('average').alias('average'),
|
150 |
+
pl.when(pl.col('in_zone') == False).then(True).otherwise(False).alias('out_zone'),
|
151 |
+
pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('zone_swing'),
|
152 |
+
pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('zone_contact'),
|
153 |
+
pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('ozone_swing'),
|
154 |
+
pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('ozone_contact'),
|
155 |
+
pl.when(pl.col('event_type').str.contains('strikeout')).then(True).otherwise(False).alias('k'),
|
156 |
+
pl.when(pl.col('event_type').is_in(['walk', 'intent_walk'])).then(True).otherwise(False).alias('bb'),
|
157 |
+
pl.when(pl.col('attack_zone').is_null()).then(pl.col('attack_zone_predict')).otherwise(pl.col('attack_zone')).alias('attack_zone'),
|
158 |
+
|
159 |
+
|
160 |
+
])
|
161 |
+
|
162 |
+
df = df.with_columns([
|
163 |
+
(df['k'].cast(pl.Float32) - df['bb'].cast(pl.Float32)).alias('k_minus_bb'),
|
164 |
+
(df['bb'].cast(pl.Float32) - df['k'].cast(pl.Float32)).alias('bb_minus_k'),
|
165 |
+
(df['launch_speed'] > 0).alias('bip_div'),
|
166 |
+
(df['attack_zone'] == 0).alias('heart'),
|
167 |
+
(df['attack_zone'] == 1).alias('shadow'),
|
168 |
+
(df['attack_zone'] == 2).alias('chase'),
|
169 |
+
(df['attack_zone'] == 3).alias('waste'),
|
170 |
+
((df['attack_zone'] == 0) & (df['swings'] == 1)).alias('heart_swing'),
|
171 |
+
((df['attack_zone'] == 1) & (df['swings'] == 1)).alias('shadow_swing'),
|
172 |
+
((df['attack_zone'] == 2) & (df['swings'] == 1)).alias('chase_swing'),
|
173 |
+
((df['attack_zone'] == 3) & (df['swings'] == 1)).alias('waste_swing'),
|
174 |
+
((df['attack_zone'] == 0) & (df['whiffs'] == 1)).alias('heart_whiff'),
|
175 |
+
((df['attack_zone'] == 1) & (df['whiffs'] == 1)).alias('shadow_whiff'),
|
176 |
+
((df['attack_zone'] == 2) & (df['whiffs'] == 1)).alias('chase_whiff'),
|
177 |
+
((df['attack_zone'] == 3) & (df['whiffs'] == 1)).alias('waste_whiff')
|
178 |
+
])
|
179 |
+
|
180 |
+
|
181 |
+
[0, 0.689, 0.720, 0.881, 1.254, 1.589, 2.048]
|
182 |
+
|
183 |
+
df = df.with_columns([
|
184 |
+
pl.Series(
|
185 |
+
[sum(x) for x in xwoba_model.predict_proba(df[['launch_angle', 'launch_speed']].fill_null(0).to_numpy()[:]) * ([0, 0.881, 1.254, 1.589, 2.048])]
|
186 |
+
).alias('woba_pred_predict')
|
187 |
+
])
|
188 |
+
|
189 |
+
df = df.with_columns([
|
190 |
+
pl.when(pl.col('event_type').is_in(['walk'])).then(0.689)
|
191 |
+
.when(pl.col('event_type').is_in(['hit_by_pitch'])).then(0.720)
|
192 |
+
.when(pl.col('event_type').is_in(['strikeout', 'strikeout_double_play'])).then(0)
|
193 |
+
.otherwise(pl.col('woba_pred_predict')).alias('woba_pred_predict')
|
194 |
+
])
|
195 |
+
|
196 |
+
df = df.with_columns([
|
197 |
+
pl.when(pl.col('woba_codes').is_null()).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred'),
|
198 |
+
pl.when(pl.col('bip')!=1).then(None).otherwise(pl.col('woba_pred_predict')).alias('woba_pred_contact'),
|
199 |
+
])
|
200 |
+
|
201 |
+
df = df.with_columns([
|
202 |
+
pl.when(pl.col('trajectory').is_in(['bunt_popup'])).then(pl.lit('popup'))
|
203 |
+
.when(pl.col('trajectory').is_in(['bunt_grounder'])).then(pl.lit('ground_ball'))
|
204 |
+
.when(pl.col('trajectory').is_in(['bunt_line_drive'])).then(pl.lit('line_drive'))
|
205 |
+
.when(pl.col('trajectory').is_in([''])).then(pl.lit(None))
|
206 |
+
.otherwise(pl.col('trajectory')).alias('trajectory')
|
207 |
+
])
|
208 |
+
|
209 |
+
|
210 |
+
# Create one-hot encoded columns for the trajectory column
|
211 |
+
dummy_df = df.select(pl.col('trajectory')).to_dummies()
|
212 |
+
|
213 |
+
# Rename the one-hot encoded columns
|
214 |
+
dummy_df = dummy_df.rename({
|
215 |
+
'trajectory_fly_ball': 'trajectory_fly_ball',
|
216 |
+
'trajectory_ground_ball': 'trajectory_ground_ball',
|
217 |
+
'trajectory_line_drive': 'trajectory_line_drive',
|
218 |
+
'trajectory_popup': 'trajectory_popup'
|
219 |
+
})
|
220 |
+
|
221 |
+
# Ensure the columns are present in the DataFrame
|
222 |
+
for col in ['trajectory_fly_ball', 'trajectory_ground_ball', 'trajectory_line_drive', 'trajectory_popup']:
|
223 |
+
if col not in dummy_df.columns:
|
224 |
+
dummy_df = dummy_df.with_columns(pl.lit(0).alias(col))
|
225 |
+
|
226 |
+
# Join the one-hot encoded columns back to the original DataFrame
|
227 |
+
df = df.hstack(dummy_df)
|
228 |
+
|
229 |
+
# Check if 'trajectory_null' column exists and drop it
|
230 |
+
if 'trajectory_null' in df.columns:
|
231 |
+
df = df.drop('trajectory_null')
|
232 |
+
|
233 |
+
return df
|
234 |
+
|
235 |
+
# Assuming df is your Polars DataFrame
|
236 |
+
def update_summary(self, df: pl.DataFrame, pitcher: bool = True) -> pl.DataFrame:
|
237 |
+
"""
|
238 |
+
Update summary statistics for pitchers or batters.
|
239 |
+
|
240 |
+
Parameters:
|
241 |
+
df (pl.DataFrame): The input Polars DataFrame containing player statistics.
|
242 |
+
pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
|
243 |
+
|
244 |
+
Returns:
|
245 |
+
pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
|
246 |
+
"""
|
247 |
+
|
248 |
+
# Determine the position based on the pitcher flag
|
249 |
+
if pitcher:
|
250 |
+
position = 'pitcher'
|
251 |
+
else:
|
252 |
+
position = 'batter'
|
253 |
+
|
254 |
+
# Group by position_id and position_name, then aggregate various statistics
|
255 |
+
df_summ = df.group_by([f'{position}_id', f'{position}_name']).agg([
|
256 |
+
pl.col('pa').sum().alias('pa'),
|
257 |
+
pl.col('ab').sum().alias('ab'),
|
258 |
+
pl.col('obp').sum().alias('obp_pa'),
|
259 |
+
pl.col('hits').sum().alias('hits'),
|
260 |
+
pl.col('on_base').sum().alias('on_base'),
|
261 |
+
pl.col('k').sum().alias('k'),
|
262 |
+
pl.col('bb').sum().alias('bb'),
|
263 |
+
pl.col('bb_minus_k').sum().alias('bb_minus_k'),
|
264 |
+
pl.col('csw').sum().alias('csw'),
|
265 |
+
pl.col('bip').sum().alias('bip'),
|
266 |
+
pl.col('bip_div').sum().alias('bip_div'),
|
267 |
+
pl.col('tb').sum().alias('tb'),
|
268 |
+
pl.col('woba').sum().alias('woba'),
|
269 |
+
pl.col('woba_contact').sum().alias('woba_contact'),
|
270 |
+
pl.col('woba_pred').sum().alias('xwoba'),
|
271 |
+
pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
|
272 |
+
pl.col('woba_codes').sum().alias('woba_codes'),
|
273 |
+
pl.col('xwoba_codes').sum().alias('xwoba_codes'),
|
274 |
+
pl.col('hard_hit').sum().alias('hard_hit'),
|
275 |
+
pl.col('barrel').sum().alias('barrel'),
|
276 |
+
pl.col('sweet_spot').sum().alias('sweet_spot'),
|
277 |
+
pl.col('launch_speed').max().alias('max_launch_speed'),
|
278 |
+
pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
|
279 |
+
pl.col('launch_speed').mean().alias('launch_speed'),
|
280 |
+
pl.col('launch_angle').mean().alias('launch_angle'),
|
281 |
+
pl.col('is_pitch').sum().alias('pitches'),
|
282 |
+
pl.col('swings').sum().alias('swings'),
|
283 |
+
pl.col('in_zone').sum().alias('in_zone'),
|
284 |
+
pl.col('out_zone').sum().alias('out_zone'),
|
285 |
+
pl.col('whiffs').sum().alias('whiffs'),
|
286 |
+
pl.col('zone_swing').sum().alias('zone_swing'),
|
287 |
+
pl.col('zone_contact').sum().alias('zone_contact'),
|
288 |
+
pl.col('ozone_swing').sum().alias('ozone_swing'),
|
289 |
+
pl.col('ozone_contact').sum().alias('ozone_contact'),
|
290 |
+
pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
|
291 |
+
pl.col('trajectory_line_drive').sum().alias('line_drive'),
|
292 |
+
pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
|
293 |
+
pl.col('trajectory_popup').sum().alias('pop_up'),
|
294 |
+
pl.col('attack_zone').count().alias('attack_zone'),
|
295 |
+
pl.col('heart').sum().alias('heart'),
|
296 |
+
pl.col('shadow').sum().alias('shadow'),
|
297 |
+
pl.col('chase').sum().alias('chase'),
|
298 |
+
pl.col('waste').sum().alias('waste'),
|
299 |
+
pl.col('heart_swing').sum().alias('heart_swing'),
|
300 |
+
pl.col('shadow_swing').sum().alias('shadow_swing'),
|
301 |
+
pl.col('chase_swing').sum().alias('chase_swing'),
|
302 |
+
pl.col('waste_swing').sum().alias('waste_swing'),
|
303 |
+
pl.col('heart_whiff').sum().alias('heart_whiff'),
|
304 |
+
pl.col('shadow_whiff').sum().alias('shadow_whiff'),
|
305 |
+
pl.col('chase_whiff').sum().alias('chase_whiff'),
|
306 |
+
pl.col('waste_whiff').sum().alias('waste_whiff')
|
307 |
+
])
|
308 |
+
|
309 |
+
# Add calculated columns to the summary DataFrame
|
310 |
+
df_summ = df_summ.with_columns([
|
311 |
+
(pl.col('hits') / pl.col('ab')).alias('avg'),
|
312 |
+
(pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
|
313 |
+
(pl.col('tb') / pl.col('ab')).alias('slg'),
|
314 |
+
(pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
|
315 |
+
(pl.col('k') / pl.col('pa')).alias('k_percent'),
|
316 |
+
(pl.col('bb') / pl.col('pa')).alias('bb_percent'),
|
317 |
+
(pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
|
318 |
+
(pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
|
319 |
+
(pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
|
320 |
+
(pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
|
321 |
+
(pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
|
322 |
+
(pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
|
323 |
+
(pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
|
324 |
+
(pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
|
325 |
+
(pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
|
326 |
+
(pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
|
327 |
+
(pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
|
328 |
+
(pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
|
329 |
+
(pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
|
330 |
+
(pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
|
331 |
+
(pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
|
332 |
+
(pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
|
333 |
+
(pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
|
334 |
+
(pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
|
335 |
+
(pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
|
336 |
+
(pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
|
337 |
+
(pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
|
338 |
+
(pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
|
339 |
+
(pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
|
340 |
+
(pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
|
341 |
+
(pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
|
342 |
+
(pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
|
343 |
+
(pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
|
344 |
+
(pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
|
345 |
+
(pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
|
346 |
+
(pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
|
347 |
+
(pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
|
348 |
+
(pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
|
349 |
+
(pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
|
350 |
+
(pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact')
|
351 |
+
])
|
352 |
+
|
353 |
+
return df_summ
|
354 |
+
|
355 |
+
|
356 |
+
|
357 |
+
|
358 |
+
|
359 |
+
|
360 |
+
# Assuming df is your Polars DataFrame
|
361 |
+
def update_summary_select(self, df: pl.DataFrame, selection: list) -> pl.DataFrame:
|
362 |
+
"""
|
363 |
+
Update summary statistics for pitchers or batters.
|
364 |
+
|
365 |
+
Parameters:
|
366 |
+
df (pl.DataFrame): The input Polars DataFrame containing player statistics.
|
367 |
+
pitcher (bool): A flag indicating whether to calculate statistics for pitchers (True) or batters (False).
|
368 |
+
|
369 |
+
Returns:
|
370 |
+
pl.DataFrame: A Polars DataFrame with aggregated and calculated summary statistics.
|
371 |
+
"""
|
372 |
+
|
373 |
+
# Group by position_id and position_name, then aggregate various statistics
|
374 |
+
df_summ = df.group_by(selection).agg([
|
375 |
+
pl.col('pa').sum().alias('pa'),
|
376 |
+
pl.col('ab').sum().alias('ab'),
|
377 |
+
pl.col('obp').sum().alias('obp_pa'),
|
378 |
+
pl.col('hits').sum().alias('hits'),
|
379 |
+
pl.col('on_base').sum().alias('on_base'),
|
380 |
+
pl.col('k').sum().alias('k'),
|
381 |
+
pl.col('bb').sum().alias('bb'),
|
382 |
+
pl.col('bb_minus_k').sum().alias('bb_minus_k'),
|
383 |
+
pl.col('csw').sum().alias('csw'),
|
384 |
+
pl.col('bip').sum().alias('bip'),
|
385 |
+
pl.col('bip_div').sum().alias('bip_div'),
|
386 |
+
pl.col('tb').sum().alias('tb'),
|
387 |
+
pl.col('woba').sum().alias('woba'),
|
388 |
+
pl.col('woba_contact').sum().alias('woba_contact'),
|
389 |
+
pl.col('woba_pred').sum().alias('xwoba'),
|
390 |
+
pl.col('woba_pred_contact').sum().alias('xwoba_contact'),
|
391 |
+
pl.col('woba_codes').sum().alias('woba_codes'),
|
392 |
+
pl.col('xwoba_codes').sum().alias('xwoba_codes'),
|
393 |
+
pl.col('hard_hit').sum().alias('hard_hit'),
|
394 |
+
pl.col('barrel').sum().alias('barrel'),
|
395 |
+
pl.col('sweet_spot').sum().alias('sweet_spot'),
|
396 |
+
pl.col('launch_speed').max().alias('max_launch_speed'),
|
397 |
+
pl.col('launch_speed').quantile(0.90).alias('launch_speed_90'),
|
398 |
+
pl.col('launch_speed').mean().alias('launch_speed'),
|
399 |
+
pl.col('launch_angle').mean().alias('launch_angle'),
|
400 |
+
pl.col('is_pitch').sum().alias('pitches'),
|
401 |
+
pl.col('swings').sum().alias('swings'),
|
402 |
+
pl.col('in_zone').sum().alias('in_zone'),
|
403 |
+
pl.col('out_zone').sum().alias('out_zone'),
|
404 |
+
pl.col('whiffs').sum().alias('whiffs'),
|
405 |
+
pl.col('zone_swing').sum().alias('zone_swing'),
|
406 |
+
pl.col('zone_contact').sum().alias('zone_contact'),
|
407 |
+
pl.col('ozone_swing').sum().alias('ozone_swing'),
|
408 |
+
pl.col('ozone_contact').sum().alias('ozone_contact'),
|
409 |
+
pl.col('trajectory_ground_ball').sum().alias('ground_ball'),
|
410 |
+
pl.col('trajectory_line_drive').sum().alias('line_drive'),
|
411 |
+
pl.col('trajectory_fly_ball').sum().alias('fly_ball'),
|
412 |
+
pl.col('trajectory_popup').sum().alias('pop_up'),
|
413 |
+
pl.col('attack_zone').count().alias('attack_zone'),
|
414 |
+
pl.col('heart').sum().alias('heart'),
|
415 |
+
pl.col('shadow').sum().alias('shadow'),
|
416 |
+
pl.col('chase').sum().alias('chase'),
|
417 |
+
pl.col('waste').sum().alias('waste'),
|
418 |
+
pl.col('heart_swing').sum().alias('heart_swing'),
|
419 |
+
pl.col('shadow_swing').sum().alias('shadow_swing'),
|
420 |
+
pl.col('chase_swing').sum().alias('chase_swing'),
|
421 |
+
pl.col('waste_swing').sum().alias('waste_swing'),
|
422 |
+
pl.col('heart_whiff').sum().alias('heart_whiff'),
|
423 |
+
pl.col('shadow_whiff').sum().alias('shadow_whiff'),
|
424 |
+
pl.col('chase_whiff').sum().alias('chase_whiff'),
|
425 |
+
pl.col('waste_whiff').sum().alias('waste_whiff')
|
426 |
+
])
|
427 |
+
|
428 |
+
# Add calculated columns to the summary DataFrame
|
429 |
+
df_summ = df_summ.with_columns([
|
430 |
+
(pl.col('hits') / pl.col('ab')).alias('avg'),
|
431 |
+
(pl.col('on_base') / pl.col('obp_pa')).alias('obp'),
|
432 |
+
(pl.col('tb') / pl.col('ab')).alias('slg'),
|
433 |
+
(pl.col('on_base') / pl.col('obp_pa') + pl.col('tb') / pl.col('ab')).alias('ops'),
|
434 |
+
(pl.col('k') / pl.col('pa')).alias('k_percent'),
|
435 |
+
(pl.col('bb') / pl.col('pa')).alias('bb_percent'),
|
436 |
+
(pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
|
437 |
+
(pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
|
438 |
+
(pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
|
439 |
+
(pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
|
440 |
+
(pl.col('woba') / pl.col('woba_codes')).alias('woba_percent'),
|
441 |
+
(pl.col('woba_contact') / pl.col('bip')).alias('woba_percent_contact'),
|
442 |
+
(pl.col('hard_hit') / pl.col('bip_div')).alias('hard_hit_percent'),
|
443 |
+
(pl.col('barrel') / pl.col('bip_div')).alias('barrel_percent'),
|
444 |
+
(pl.col('zone_contact') / pl.col('zone_swing')).alias('zone_contact_percent'),
|
445 |
+
(pl.col('zone_swing') / pl.col('in_zone')).alias('zone_swing_percent'),
|
446 |
+
(pl.col('in_zone') / pl.col('pitches')).alias('zone_percent'),
|
447 |
+
(pl.col('ozone_swing') / (pl.col('pitches') - pl.col('in_zone'))).alias('chase_percent'),
|
448 |
+
(pl.col('ozone_contact') / pl.col('ozone_swing')).alias('chase_contact'),
|
449 |
+
(pl.col('swings') / pl.col('pitches')).alias('swing_percent'),
|
450 |
+
(pl.col('whiffs') / pl.col('swings')).alias('whiff_rate'),
|
451 |
+
(pl.col('whiffs') / pl.col('pitches')).alias('swstr_rate'),
|
452 |
+
(pl.col('ground_ball') / pl.col('bip')).alias('ground_ball_percent'),
|
453 |
+
(pl.col('line_drive') / pl.col('bip')).alias('line_drive_percent'),
|
454 |
+
(pl.col('fly_ball') / pl.col('bip')).alias('fly_ball_percent'),
|
455 |
+
(pl.col('pop_up') / pl.col('bip')).alias('pop_up_percent'),
|
456 |
+
(pl.col('heart') / pl.col('attack_zone')).alias('heart_zone_percent'),
|
457 |
+
(pl.col('shadow') / pl.col('attack_zone')).alias('shadow_zone_percent'),
|
458 |
+
(pl.col('chase') / pl.col('attack_zone')).alias('chase_zone_percent'),
|
459 |
+
(pl.col('waste') / pl.col('attack_zone')).alias('waste_zone_percent'),
|
460 |
+
(pl.col('heart_swing') / pl.col('heart')).alias('heart_zone_swing_percent'),
|
461 |
+
(pl.col('shadow_swing') / pl.col('shadow')).alias('shadow_zone_swing_percent'),
|
462 |
+
(pl.col('chase_swing') / pl.col('chase')).alias('chase_zone_swing_percent'),
|
463 |
+
(pl.col('waste_swing') / pl.col('waste')).alias('waste_zone_swing_percent'),
|
464 |
+
(pl.col('heart_whiff') / pl.col('heart_swing')).alias('heart_zone_whiff_percent'),
|
465 |
+
(pl.col('shadow_whiff') / pl.col('shadow_swing')).alias('shadow_zone_whiff_percent'),
|
466 |
+
(pl.col('chase_whiff') / pl.col('chase_swing')).alias('chase_zone_whiff_percent'),
|
467 |
+
(pl.col('waste_whiff') / pl.col('waste_swing')).alias('waste_zone_whiff_percent'),
|
468 |
+
(pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
|
469 |
+
(pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact')
|
470 |
+
])
|
471 |
+
|
472 |
+
return df_summ
|
functions/pitch_summary_functions.py
ADDED
@@ -0,0 +1,1029 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import json
|
4 |
+
from matplotlib.ticker import FuncFormatter
|
5 |
+
from matplotlib.ticker import MaxNLocator
|
6 |
+
import math
|
7 |
+
from matplotlib.patches import Ellipse
|
8 |
+
import matplotlib.transforms as transforms
|
9 |
+
import matplotlib.colors
|
10 |
+
import matplotlib.colors as mcolors
|
11 |
+
import seaborn as sns
|
12 |
+
import matplotlib.pyplot as plt
|
13 |
+
import requests
|
14 |
+
import polars as pl
|
15 |
+
from PIL import Image
|
16 |
+
import requests
|
17 |
+
from io import BytesIO
|
18 |
+
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
|
19 |
+
import matplotlib.pyplot as plt
|
20 |
+
import matplotlib.gridspec as gridspec
|
21 |
+
import PIL
|
22 |
+
|
23 |
+
|
24 |
+
### PITCH COLOURS ###
|
25 |
+
|
26 |
+
# Dictionary to map pitch types to their corresponding colors and names
|
27 |
+
pitch_colours = {
|
28 |
+
## Fastballs ##
|
29 |
+
'FF': {'colour': '#FF007D', 'name': '4-Seam Fastball'},
|
30 |
+
'FA': {'colour': '#FF007D', 'name': 'Fastball'},
|
31 |
+
'SI': {'colour': '#98165D', 'name': 'Sinker'},
|
32 |
+
'FC': {'colour': '#BE5FA0', 'name': 'Cutter'},
|
33 |
+
|
34 |
+
## Offspeed ##
|
35 |
+
'CH': {'colour': '#F79E70', 'name': 'Changeup'},
|
36 |
+
'FS': {'colour': '#FE6100', 'name': 'Splitter'},
|
37 |
+
'SC': {'colour': '#F08223', 'name': 'Screwball'},
|
38 |
+
'FO': {'colour': '#FFB000', 'name': 'Forkball'},
|
39 |
+
|
40 |
+
## Sliders ##
|
41 |
+
'SL': {'colour': '#67E18D', 'name': 'Slider'},
|
42 |
+
'ST': {'colour': '#1BB999', 'name': 'Sweeper'},
|
43 |
+
'SV': {'colour': '#376748', 'name': 'Slurve'},
|
44 |
+
|
45 |
+
## Curveballs ##
|
46 |
+
'KC': {'colour': '#311D8B', 'name': 'Knuckle Curve'},
|
47 |
+
'CU': {'colour': '#3025CE', 'name': 'Curveball'},
|
48 |
+
'CS': {'colour': '#274BFC', 'name': 'Slow Curve'},
|
49 |
+
'EP': {'colour': '#648FFF', 'name': 'Eephus'},
|
50 |
+
|
51 |
+
## Others ##
|
52 |
+
'KN': {'colour': '#867A08', 'name': 'Knuckleball'},
|
53 |
+
'PO': {'colour': '#472C30', 'name': 'Pitch Out'},
|
54 |
+
'UN': {'colour': '#9C8975', 'name': 'Unknown'},
|
55 |
+
}
|
56 |
+
|
57 |
+
# Create dictionaries for pitch types and their attributes
|
58 |
+
dict_colour = {key: value['colour'] for key, value in pitch_colours.items()}
|
59 |
+
dict_pitch = {key: value['name'] for key, value in pitch_colours.items()}
|
60 |
+
dict_pitch_desc_type = {value['name']: key for key, value in pitch_colours.items()}
|
61 |
+
dict_pitch_desc_type.update({'Four-Seam Fastball':'FF'})
|
62 |
+
dict_pitch_desc_type.update({'All':'All'})
|
63 |
+
dict_pitch_name = {value['name']: value['colour'] for key, value in pitch_colours.items()}
|
64 |
+
dict_pitch_name.update({'Four-Seam Fastball':'#FF007D'})
|
65 |
+
|
66 |
+
font_properties = {'family': 'calibi', 'size': 12}
|
67 |
+
font_properties_titles = {'family': 'calibi', 'size': 20}
|
68 |
+
font_properties_axes = {'family': 'calibi', 'size': 16}
|
69 |
+
|
70 |
+
cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#648FFF','#FFFFFF','#FFB000',])
|
71 |
+
|
72 |
+
### FANGRAPHS STATS DICT ###
|
73 |
+
fangraphs_stats_dict = {'IP':{'table_header':'$\\bf{IP}$','format':'.1f',} ,
|
74 |
+
'TBF':{'table_header':'$\\bf{PA}$','format':'.0f',} ,
|
75 |
+
'AVG':{'table_header':'$\\bf{AVG}$','format':'.3f',} ,
|
76 |
+
'K/9':{'table_header':'$\\bf{K\/9}$','format':'.2f',} ,
|
77 |
+
'BB/9':{'table_header':'$\\bf{BB\/9}$','format':'.2f',} ,
|
78 |
+
'K/BB':{'table_header':'$\\bf{K\/BB}$','format':'.2f',} ,
|
79 |
+
'HR/9':{'table_header':'$\\bf{HR\/9}$','format':'.2f',} ,
|
80 |
+
'K%':{'table_header':'$\\bf{K\%}$','format':'.1%',} ,
|
81 |
+
'BB%':{'table_header':'$\\bf{BB\%}$','format':'.1%',} ,
|
82 |
+
'K-BB%':{'table_header':'$\\bf{K-BB\%}$','format':'.1%',} ,
|
83 |
+
'WHIP':{'table_header':'$\\bf{WHIP}$','format':'.2f',} ,
|
84 |
+
'BABIP':{'table_header':'$\\bf{BABIP}$','format':'.3f',} ,
|
85 |
+
'LOB%':{'table_header':'$\\bf{LOB\%}$','format':'.1%',} ,
|
86 |
+
'xFIP':{'table_header':'$\\bf{xFIP}$','format':'.2f',} ,
|
87 |
+
'FIP':{'table_header':'$\\bf{FIP}$','format':'.2f',} ,
|
88 |
+
'H':{'table_header':'$\\bf{H}$','format':'.0f',} ,
|
89 |
+
'2B':{'table_header':'$\\bf{2B}$','format':'.0f',} ,
|
90 |
+
'3B':{'table_header':'$\\bf{3B}$','format':'.0f',} ,
|
91 |
+
'R':{'table_header':'$\\bf{R}$','format':'.0f',} ,
|
92 |
+
'ER':{'table_header':'$\\bf{ER}$','format':'.0f',} ,
|
93 |
+
'HR':{'table_header':'$\\bf{HR}$','format':'.0f',} ,
|
94 |
+
'BB':{'table_header':'$\\bf{BB}$','format':'.0f',} ,
|
95 |
+
'IBB':{'table_header':'$\\bf{IBB}$','format':'.0f',} ,
|
96 |
+
'HBP':{'table_header':'$\\bf{HBP}$','format':'.0f',} ,
|
97 |
+
'SO':{'table_header':'$\\bf{SO}$','format':'.0f',} ,
|
98 |
+
'OBP':{'table_header':'$\\bf{OBP}$','format':'.0f',} ,
|
99 |
+
'SLG':{'table_header':'$\\bf{SLG}$','format':'.0f',} ,
|
100 |
+
'ERA':{'table_header':'$\\bf{ERA}$','format':'.2f',} ,
|
101 |
+
'wOBA':{'table_header':'$\\bf{wOBA}$','format':'.3f',} ,
|
102 |
+
'G':{'table_header':'$\\bf{G}$','format':'.0f',},
|
103 |
+
'strikePercentage':{'table_header':'$\\bf{Strike\%}$','format':'.1%'} }
|
104 |
+
|
105 |
+
colour_palette = ['#FFB000','#648FFF','#785EF0',
|
106 |
+
'#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
|
107 |
+
|
108 |
+
### GET COLOURS ###
|
109 |
+
def get_color(value, normalize, cmap_sum):
|
110 |
+
"""
|
111 |
+
Get the color corresponding to a value based on a colormap and normalization.
|
112 |
+
|
113 |
+
Parameters
|
114 |
+
----------
|
115 |
+
value : float
|
116 |
+
The value to be mapped to a color.
|
117 |
+
normalize : matplotlib.colors.Normalize
|
118 |
+
The normalization function to scale the value.
|
119 |
+
cmap_sum : matplotlib.colors.Colormap
|
120 |
+
The colormap to use for mapping the value to a color.
|
121 |
+
|
122 |
+
Returns
|
123 |
+
-------
|
124 |
+
str
|
125 |
+
The hexadecimal color code corresponding to the value.
|
126 |
+
"""
|
127 |
+
color = cmap_sum(normalize(value))
|
128 |
+
return mcolors.to_hex(color)
|
129 |
+
|
130 |
+
### PITCH ELLIPSE ###
|
131 |
+
def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
|
132 |
+
"""
|
133 |
+
Create a plot of the covariance confidence ellipse of *x* and *y*.
|
134 |
+
|
135 |
+
Parameters
|
136 |
+
----------
|
137 |
+
x, y : array-like, shape (n, )
|
138 |
+
Input data.
|
139 |
+
|
140 |
+
ax : matplotlib.axes.Axes
|
141 |
+
The axes object to draw the ellipse into.
|
142 |
+
|
143 |
+
n_std : float
|
144 |
+
The number of standard deviations to determine the ellipse's radiuses.
|
145 |
+
|
146 |
+
**kwargs
|
147 |
+
Forwarded to `~matplotlib.patches.Ellipse`
|
148 |
+
|
149 |
+
Returns
|
150 |
+
-------
|
151 |
+
matplotlib.patches.Ellipse
|
152 |
+
"""
|
153 |
+
|
154 |
+
if len(x) != len(y):
|
155 |
+
raise ValueError("x and y must be the same size")
|
156 |
+
try:
|
157 |
+
cov = np.cov(x, y)
|
158 |
+
pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
|
159 |
+
# Using a special case to obtain the eigenvalues of this
|
160 |
+
# two-dimensional dataset.
|
161 |
+
ell_radius_x = np.sqrt(1 + pearson)
|
162 |
+
ell_radius_y = np.sqrt(1 - pearson)
|
163 |
+
ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
|
164 |
+
facecolor=facecolor,linewidth=2,linestyle='--', **kwargs)
|
165 |
+
|
166 |
+
|
167 |
+
# Calculating the standard deviation of x from
|
168 |
+
# the squareroot of the variance and multiplying
|
169 |
+
# with the given number of standard deviations.
|
170 |
+
scale_x = np.sqrt(cov[0, 0]) * n_std
|
171 |
+
mean_x = x.mean()
|
172 |
+
|
173 |
+
|
174 |
+
# calculating the standard deviation of y ...
|
175 |
+
scale_y = np.sqrt(cov[1, 1]) * n_std
|
176 |
+
mean_y = y.mean()
|
177 |
+
|
178 |
+
|
179 |
+
transf = transforms.Affine2D() \
|
180 |
+
.rotate_deg(45) \
|
181 |
+
.scale(scale_x, scale_y) \
|
182 |
+
.translate(mean_x, mean_y)
|
183 |
+
|
184 |
+
|
185 |
+
|
186 |
+
ellipse.set_transform(transf + ax.transData)
|
187 |
+
except ValueError:
|
188 |
+
return
|
189 |
+
|
190 |
+
return ax.add_patch(ellipse)
|
191 |
+
### VELOCITY KDES ###
|
192 |
+
def velocity_kdes(df: pl.DataFrame,
|
193 |
+
ax: plt.Axes,
|
194 |
+
gs: gridspec.GridSpec,
|
195 |
+
gs_x: list,
|
196 |
+
gs_y: list,
|
197 |
+
fig: plt.Figure):
|
198 |
+
"""
|
199 |
+
Plot the velocity KDEs for different pitch types.
|
200 |
+
|
201 |
+
Parameters
|
202 |
+
----------
|
203 |
+
df : pl.DataFrame
|
204 |
+
The DataFrame containing pitch data.
|
205 |
+
ax : plt.Axes
|
206 |
+
The axis to plot on.
|
207 |
+
gs : GridSpec
|
208 |
+
The GridSpec for the subplot layout.
|
209 |
+
gs_x : list
|
210 |
+
The x-coordinates for the GridSpec.
|
211 |
+
gs_y : list
|
212 |
+
The y-coordinates for the GridSpec.
|
213 |
+
fig : plt.Figure
|
214 |
+
The figure to plot on.
|
215 |
+
"""
|
216 |
+
# Join the original DataFrame on 'pitch_type' with sorted counts to reorder
|
217 |
+
items_in_order = (df
|
218 |
+
.sort("pitch_count", descending=True)['pitch_type']
|
219 |
+
.unique(maintain_order=True)
|
220 |
+
.to_numpy()
|
221 |
+
)
|
222 |
+
|
223 |
+
# Create the inner subplot inside the outer subplot
|
224 |
+
import matplotlib.gridspec as gridspec
|
225 |
+
ax.axis('off')
|
226 |
+
ax.set_title('Pitch Velocity Distribution', fontdict={'family': 'calibi', 'size': 20})
|
227 |
+
|
228 |
+
inner_grid_1 = gridspec.GridSpecFromSubplotSpec(len(items_in_order), 1, subplot_spec=gs[gs_x[0]:gs_x[-1], gs_y[0]:gs_y[-1]])
|
229 |
+
ax_top = [fig.add_subplot(inner) for inner in inner_grid_1]
|
230 |
+
|
231 |
+
for idx, i in enumerate(items_in_order):
|
232 |
+
pitch_data = df.filter(pl.col('pitch_type') == i)['start_speed']
|
233 |
+
if np.unique(pitch_data).size == 1: # Check if all values are the same
|
234 |
+
ax_top[idx].plot([np.unique(pitch_data), np.unique(pitch_data)], [0, 1], linewidth=4,
|
235 |
+
color=dict_colour[i], zorder=20)
|
236 |
+
else:
|
237 |
+
sns.kdeplot(pitch_data, ax=ax_top[idx], fill=True,
|
238 |
+
clip=(pitch_data.min(), pitch_data.max()),
|
239 |
+
color=dict_colour[i])
|
240 |
+
|
241 |
+
# Plot the mean release speed for the current data
|
242 |
+
df_average = df.filter(df['pitch_type'] == i)['start_speed']
|
243 |
+
ax_top[idx].plot([df_average.mean(), df_average.mean()],
|
244 |
+
[ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]],
|
245 |
+
color=dict_colour[i],
|
246 |
+
linestyle='--')
|
247 |
+
df_statcast_group = pl.read_csv('functions/statcast_2024_grouped.csv')
|
248 |
+
|
249 |
+
# Plot the mean release speed for the statcast group data
|
250 |
+
df_average = df_statcast_group.filter(df_statcast_group['pitch_type'] == i)['release_speed']
|
251 |
+
ax_top[idx].plot([df_average.mean(), df_average.mean()],
|
252 |
+
[ax_top[idx].get_ylim()[0], ax_top[idx].get_ylim()[1]],
|
253 |
+
color=dict_colour[i],
|
254 |
+
linestyle=':')
|
255 |
+
|
256 |
+
|
257 |
+
ax_top[idx].set_xlim(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5)
|
258 |
+
ax_top[idx].set_xlabel('')
|
259 |
+
ax_top[idx].set_ylabel('')
|
260 |
+
if idx < len(items_in_order) - 1:
|
261 |
+
ax_top[idx].spines['top'].set_visible(False)
|
262 |
+
ax_top[idx].spines['right'].set_visible(False)
|
263 |
+
ax_top[idx].spines['left'].set_visible(False)
|
264 |
+
ax_top[idx].tick_params(axis='x', colors='none')
|
265 |
+
|
266 |
+
ax_top[idx].set_xticks(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5))
|
267 |
+
ax_top[idx].set_yticks([])
|
268 |
+
ax_top[idx].grid(axis='x', linestyle='--')
|
269 |
+
ax_top[idx].text(-0.01, 0.5, i, transform=ax_top[idx].transAxes,
|
270 |
+
fontsize=14, va='center', ha='right')
|
271 |
+
|
272 |
+
ax_top[-1].spines['top'].set_visible(False)
|
273 |
+
ax_top[-1].spines['right'].set_visible(False)
|
274 |
+
ax_top[-1].spines['left'].set_visible(False)
|
275 |
+
ax_top[-1].set_xticks(list(range(math.floor(df['start_speed'].min() / 5) * 5, math.ceil(df['start_speed'].max() / 5) * 5, 5)))
|
276 |
+
ax_top[-1].set_xlabel('Velocity (mph)')
|
277 |
+
|
278 |
+
|
279 |
+
### TJ STUFF+ ROLLING ###
|
280 |
+
def tj_stuff_roling(df: pl.DataFrame, window: int, ax: plt.Axes):
|
281 |
+
"""
|
282 |
+
Plot the rolling average of tjStuff+ for different pitch types.
|
283 |
+
|
284 |
+
Parameters
|
285 |
+
----------
|
286 |
+
df : pl.DataFrame
|
287 |
+
The DataFrame containing pitch data.
|
288 |
+
window : int
|
289 |
+
The window size for calculating the rolling average.
|
290 |
+
ax : plt.Axes
|
291 |
+
The axis to plot on.
|
292 |
+
"""
|
293 |
+
# Join the original DataFrame on 'pitch_type' with sorted counts to reorder
|
294 |
+
items_in_order = (
|
295 |
+
df.sort("pitch_count", descending=True)['pitch_type']
|
296 |
+
.unique(maintain_order=True)
|
297 |
+
.to_numpy()
|
298 |
+
)
|
299 |
+
|
300 |
+
# Plot the rolling average for each pitch type
|
301 |
+
for i in items_in_order:
|
302 |
+
if max(df.filter(pl.col('pitch_type') == i)['pitch_count']) >= window:
|
303 |
+
print('LENGTH',
|
304 |
+
len(range(1, max(df.filter(pl.col('pitch_type') == i)['pitch_count']) + 1)),
|
305 |
+
len(df.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window)))
|
306 |
+
sns.lineplot(
|
307 |
+
x=range(1, max(df.filter(pl.col('pitch_type') == i)['pitch_count']) + 1),
|
308 |
+
y=df.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window),
|
309 |
+
color=dict_colour[i],
|
310 |
+
ax=ax,
|
311 |
+
linewidth=3
|
312 |
+
)
|
313 |
+
|
314 |
+
# Adjust x-axis limits to start from 1
|
315 |
+
ax.set_xlim(window, max(df['pitch_count']))
|
316 |
+
ax.set_ylim(70, 130)
|
317 |
+
ax.set_xlabel('Pitches', fontdict=font_properties_axes)
|
318 |
+
ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
|
319 |
+
ax.set_title(f"{window} Pitch Rolling tjStuff+", fontdict=font_properties_titles)
|
320 |
+
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
|
321 |
+
|
322 |
+
|
323 |
+
### TJ STUFF+ ROLLING ###
|
324 |
+
def tj_stuff_roling_game(df: pl.DataFrame, window: int, ax: plt.Axes):
|
325 |
+
"""
|
326 |
+
Plot the rolling average of tjStuff+ for different pitch types over games.
|
327 |
+
|
328 |
+
Parameters
|
329 |
+
----------
|
330 |
+
df : pl.DataFrame
|
331 |
+
The DataFrame containing pitch data.
|
332 |
+
window : int
|
333 |
+
The window size for calculating the rolling average.
|
334 |
+
ax : plt.Axes
|
335 |
+
The axis to plot on.
|
336 |
+
"""
|
337 |
+
# Map game_id to sequential numbers
|
338 |
+
date_to_number = {date: i + 1 for i, date in enumerate(df['game_id'].unique(maintain_order=True))}
|
339 |
+
|
340 |
+
# Add a column with the sequential game numbers
|
341 |
+
df_plot = df.with_columns(
|
342 |
+
pl.col("game_id").map_elements(lambda x: date_to_number.get(x, x)).alias("start_number")
|
343 |
+
)
|
344 |
+
|
345 |
+
# Group by relevant columns and calculate mean tj_stuff_plus
|
346 |
+
plot_game_roll = df_plot.group_by(['start_number', 'game_id', 'game_date', 'pitch_type', 'pitch_description']).agg(
|
347 |
+
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus')
|
348 |
+
).sort('start_number', descending=False)
|
349 |
+
|
350 |
+
# Get the list of pitch types ordered by frequency
|
351 |
+
sorted_value_counts = df['pitch_type'].value_counts().sort('count', descending=True)
|
352 |
+
items_in_order = sorted_value_counts['pitch_type'].to_list()
|
353 |
+
|
354 |
+
# Plot the rolling average for each pitch type
|
355 |
+
for i in items_in_order:
|
356 |
+
df_item = plot_game_roll.filter(pl.col('pitch_type') == i)
|
357 |
+
df_item = df_item.with_columns(
|
358 |
+
pl.col("start_number").cast(pl.Int64)
|
359 |
+
).join(
|
360 |
+
pl.DataFrame({"start_number": list(date_to_number.values())}),
|
361 |
+
on="start_number",
|
362 |
+
how="outer"
|
363 |
+
).sort("start_number_right").with_columns([
|
364 |
+
pl.col("start_number").fill_null(strategy="forward").fill_null(strategy="backward"),
|
365 |
+
pl.col("tj_stuff_plus").fill_null(strategy="forward").fill_null(strategy="backward"),
|
366 |
+
pl.col("pitch_type").fill_null(strategy="forward").fill_null(strategy="backward"),
|
367 |
+
pl.col("pitch_description").fill_null(strategy="forward").fill_null(strategy="backward")
|
368 |
+
])
|
369 |
+
|
370 |
+
sns.lineplot(x=range(1, max(df_item['start_number_right']) + 1),
|
371 |
+
y=df_item.filter(pl.col('pitch_type') == i)['tj_stuff_plus'].rolling_mean(window),
|
372 |
+
color=dict_colour[i],
|
373 |
+
ax=ax, linewidth=3)
|
374 |
+
|
375 |
+
# Highlight missing game data points
|
376 |
+
for n in range(len(df_item)):
|
377 |
+
if df_item['game_id'].is_null()[n]:
|
378 |
+
sns.scatterplot(x=[df_item['start_number_right'][n]],
|
379 |
+
y=[df_item['tj_stuff_plus'][n]],
|
380 |
+
color='white',
|
381 |
+
ec='black',
|
382 |
+
ax=ax,
|
383 |
+
zorder=100)
|
384 |
+
|
385 |
+
# Adjust x-axis limits to start from 1
|
386 |
+
ax.set_xlim(window, max(df_item['start_number']))
|
387 |
+
ax.set_ylim(70, 130)
|
388 |
+
ax.set_xlabel('Games', fontdict=font_properties_axes)
|
389 |
+
ax.set_ylabel('tjStuff+', fontdict=font_properties_axes)
|
390 |
+
ax.set_title(f"{window} Game Rolling tjStuff+", fontdict=font_properties_titles)
|
391 |
+
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
|
392 |
+
|
393 |
+
|
394 |
+
def break_plot(df: pl.DataFrame, ax: plt.Axes):
|
395 |
+
"""
|
396 |
+
Plot the pitch breaks for different pitch types.
|
397 |
+
|
398 |
+
Parameters
|
399 |
+
----------
|
400 |
+
df : pl.DataFrame
|
401 |
+
The DataFrame containing pitch data.
|
402 |
+
ax : plt.Axes
|
403 |
+
The axis to plot on.
|
404 |
+
"""
|
405 |
+
# Get unique pitch types sorted by pitch count
|
406 |
+
label_labels = df.sort(by=['pitch_count', 'pitch_type'], descending=[False, True])['pitch_type'].unique(maintain_order=True).to_numpy()
|
407 |
+
|
408 |
+
# Plot confidence ellipses for each pitch type
|
409 |
+
for idx, label in enumerate(label_labels):
|
410 |
+
subset = df.filter(pl.col('pitch_type') == label)
|
411 |
+
if len(subset) > 4:
|
412 |
+
try:
|
413 |
+
confidence_ellipse(subset['hb'], subset['ivb'], ax=ax, edgecolor=dict_colour[label], n_std=2, facecolor=dict_colour[label], alpha=0.2)
|
414 |
+
except ValueError:
|
415 |
+
return
|
416 |
+
|
417 |
+
# Plot scatter plot for pitch breaks
|
418 |
+
if df['pitcher_hand'][0] == 'R':
|
419 |
+
sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'] * 1, hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
|
420 |
+
if df['pitcher_hand'][0] == 'L':
|
421 |
+
sns.scatterplot(ax=ax, x=df['hb'], y=df['ivb'] * 1, hue=df['pitch_type'], palette=dict_colour, ec='black', alpha=1, zorder=2)
|
422 |
+
|
423 |
+
# Set axis limits
|
424 |
+
ax.set_xlim((-25, 25))
|
425 |
+
ax.set_ylim((-25, 25))
|
426 |
+
|
427 |
+
# Add horizontal and vertical lines
|
428 |
+
ax.hlines(y=0, xmin=-50, xmax=50, color=colour_palette[8], alpha=0.5, linestyles='--', zorder=1)
|
429 |
+
ax.vlines(x=0, ymin=-50, ymax=50, color=colour_palette[8], alpha=0.5, linestyles='--', zorder=1)
|
430 |
+
|
431 |
+
# Set axis labels and title
|
432 |
+
ax.set_xlabel('Horizontal Break (in)', fontdict=font_properties_axes)
|
433 |
+
ax.set_ylabel('Induced Vertical Break (in)', fontdict=font_properties_axes)
|
434 |
+
ax.set_title("Pitch Breaks", fontdict=font_properties_titles)
|
435 |
+
|
436 |
+
# Remove legend
|
437 |
+
ax.get_legend().remove()
|
438 |
+
|
439 |
+
# Set tick labels
|
440 |
+
ax.set_xticklabels(ax.get_xticks(), fontdict=font_properties)
|
441 |
+
ax.set_yticklabels(ax.get_yticks(), fontdict=font_properties)
|
442 |
+
|
443 |
+
# Add text annotations for glove side and arm side
|
444 |
+
if df['pitcher_hand'][0] == 'R':
|
445 |
+
ax.text(-24.5, -24.5, s='← Glove Side', fontstyle='italic', ha='left', va='bottom',
|
446 |
+
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
|
447 |
+
ax.text(24.5, -24.5, s='Arm Side →', fontstyle='italic', ha='right', va='bottom',
|
448 |
+
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
|
449 |
+
if df['pitcher_hand'][0] == 'L':
|
450 |
+
ax.invert_xaxis()
|
451 |
+
ax.text(24.5, -24.5, s='← Arm Side', fontstyle='italic', ha='left', va='bottom',
|
452 |
+
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
|
453 |
+
ax.text(-24.5, -24.5, s='Glove Side →', fontstyle='italic', ha='right', va='bottom',
|
454 |
+
bbox=dict(facecolor='white', edgecolor='black'), fontsize=12, zorder=3)
|
455 |
+
|
456 |
+
# Set aspect ratio and format axis ticks
|
457 |
+
ax.set_aspect('equal', adjustable='box')
|
458 |
+
ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
|
459 |
+
ax.yaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
|
460 |
+
|
461 |
+
# DEFINE STRIKE ZONE
|
462 |
+
strike_zone = pl.DataFrame({
|
463 |
+
'PlateLocSide': [-0.9, -0.9, 0.9, 0.9, -0.9],
|
464 |
+
'PlateLocHeight': [1.5, 3.5, 3.5, 1.5, 1.5]
|
465 |
+
})
|
466 |
+
|
467 |
+
### STRIKE ZONE ###
|
468 |
+
def draw_line(axis, alpha_spot=1, catcher_p=True):
|
469 |
+
"""
|
470 |
+
Draw the strike zone and home plate on the given axis.
|
471 |
+
|
472 |
+
Parameters
|
473 |
+
----------
|
474 |
+
axis : matplotlib.axes.Axes
|
475 |
+
The axis to draw the strike zone on.
|
476 |
+
alpha_spot : float, optional
|
477 |
+
The transparency level of the lines (default is 1).
|
478 |
+
catcher_p : bool, optional
|
479 |
+
Whether to draw the catcher's perspective (default is True).
|
480 |
+
"""
|
481 |
+
# Draw the strike zone
|
482 |
+
axis.plot(strike_zone['PlateLocSide'].to_list(), strike_zone['PlateLocHeight'].to_list(),
|
483 |
+
color='black', linewidth=1.3, zorder=3, alpha=alpha_spot)
|
484 |
+
|
485 |
+
if catcher_p:
|
486 |
+
# Draw home plate from catcher's perspective
|
487 |
+
axis.plot([-0.708, 0.708], [0.15, 0.15], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
488 |
+
axis.plot([-0.708, -0.708], [0.15, 0.3], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
489 |
+
axis.plot([-0.708, 0], [0.3, 0.5], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
490 |
+
axis.plot([0, 0.708], [0.5, 0.3], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
491 |
+
axis.plot([0.708, 0.708], [0.3, 0.15], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
492 |
+
else:
|
493 |
+
# Draw home plate from pitcher's perspective
|
494 |
+
axis.plot([-0.708, 0.708], [0.4, 0.4], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
495 |
+
axis.plot([-0.708, -0.9], [0.4, -0.1], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
496 |
+
axis.plot([-0.9, 0], [-0.1, -0.35], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
497 |
+
axis.plot([0, 0.9], [-0.35, -0.1], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
498 |
+
axis.plot([0.9, 0.708], [-0.1, 0.4], color='black', linewidth=1, alpha=alpha_spot, zorder=1)
|
499 |
+
|
500 |
+
def location_plot(df: pl.DataFrame, ax: plt.Axes, hand: str):
|
501 |
+
"""
|
502 |
+
Plot the pitch locations for different pitch types against a specific batter hand.
|
503 |
+
|
504 |
+
Parameters
|
505 |
+
----------
|
506 |
+
df : pl.DataFrame
|
507 |
+
The DataFrame containing pitch data.
|
508 |
+
ax : plt.Axes
|
509 |
+
The axis to plot on.
|
510 |
+
hand : str
|
511 |
+
The batter hand ('L' for left-handed, 'R' for right-handed).
|
512 |
+
"""
|
513 |
+
# Get unique pitch types sorted by pitch count
|
514 |
+
label_labels = df.sort(by=['pitch_count', 'pitch_type'], descending=[False, True])['pitch_type'].unique(maintain_order=True).to_numpy()
|
515 |
+
|
516 |
+
# Plot confidence ellipses for each pitch type
|
517 |
+
for label in label_labels:
|
518 |
+
subset = df.filter((pl.col('pitch_type') == label) & (pl.col('batter_hand') == hand))
|
519 |
+
if len(subset) >= 5:
|
520 |
+
confidence_ellipse(subset['px'], subset['pz'], ax=ax, edgecolor=dict_colour[label], n_std=1.5, facecolor=dict_colour[label], alpha=0.3)
|
521 |
+
|
522 |
+
# Group pitch locations by pitch type and calculate mean values
|
523 |
+
pitch_location_group = (
|
524 |
+
df.filter(pl.col("batter_hand") == hand)
|
525 |
+
.group_by("pitch_type")
|
526 |
+
.agg([
|
527 |
+
pl.col("start_speed").count().alias("pitches"),
|
528 |
+
pl.col("px").mean().alias("px"),
|
529 |
+
pl.col("pz").mean().alias("pz")
|
530 |
+
])
|
531 |
+
)
|
532 |
+
|
533 |
+
# Calculate pitch percentages
|
534 |
+
total_pitches = pitch_location_group['pitches'].sum()
|
535 |
+
pitch_location_group = pitch_location_group.with_columns(
|
536 |
+
(pl.col("pitches") / total_pitches).alias("pitch_percent")
|
537 |
+
)
|
538 |
+
|
539 |
+
# Plot pitch locations
|
540 |
+
sns.scatterplot(ax=ax, x=pitch_location_group['px'], y=pitch_location_group['pz'],
|
541 |
+
hue=pitch_location_group['pitch_type'], palette=dict_colour, ec='black',
|
542 |
+
s=pitch_location_group['pitch_percent'] * 750, linewidth=2, zorder=2)
|
543 |
+
|
544 |
+
# Customize plot appearance
|
545 |
+
ax.axis('square')
|
546 |
+
draw_line(ax, alpha_spot=0.75, catcher_p=False)
|
547 |
+
ax.axis('off')
|
548 |
+
ax.set_xlim((-2.75, 2.75))
|
549 |
+
ax.set_ylim((-0.5, 5))
|
550 |
+
if len(pitch_location_group['px']) > 0:
|
551 |
+
ax.get_legend().remove()
|
552 |
+
ax.grid(False)
|
553 |
+
ax.set_title(f"Pitch Locations vs {hand}HB\n{pitch_location_group['pitches'].sum()} Pitches", fontdict=font_properties_titles)
|
554 |
+
|
555 |
+
|
556 |
+
def summary_table(df: pl.DataFrame, ax: plt.Axes):
|
557 |
+
"""
|
558 |
+
Create a summary table of pitch data.
|
559 |
+
|
560 |
+
Parameters
|
561 |
+
----------
|
562 |
+
df : pl.DataFrame
|
563 |
+
The DataFrame containing pitch data.
|
564 |
+
ax : plt.Axes
|
565 |
+
The axis to plot the table on.
|
566 |
+
"""
|
567 |
+
# Aggregate pitch data by pitch description
|
568 |
+
df_agg = df.group_by("pitch_description").agg(
|
569 |
+
pl.col('is_pitch').sum().alias('count'),
|
570 |
+
(pl.col('is_pitch').sum() / df.select(pl.col('is_pitch').sum())).alias('count_percent'),
|
571 |
+
pl.col('start_speed').mean().alias('start_speed'),
|
572 |
+
pl.col('ivb').mean().alias('ivb'),
|
573 |
+
pl.col('hb').mean().alias('hb'),
|
574 |
+
pl.col('spin_rate').mean().alias('spin_rate'),
|
575 |
+
pl.col('vaa').mean().alias('vaa'),
|
576 |
+
pl.col('haa').mean().alias('haa'),
|
577 |
+
pl.col('z0').mean().alias('z0'),
|
578 |
+
pl.col('x0').mean().alias('x0'),
|
579 |
+
pl.col('extension').mean().alias('extension'),
|
580 |
+
(((pl.col('spin_direction').mean() + 180) % 360 // 30) +
|
581 |
+
(((pl.col('spin_direction').mean() + 180) % 360 % 30 / 30 / 100 * 60).round(2) * 10).round(0) // 1.5 / 4)
|
582 |
+
.cast(pl.Float64).map_elements(lambda x: f"{int(x)}:{int((x % 1) * 60):02d}", return_dtype=pl.Utf8).alias('clock_time'),
|
583 |
+
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
|
584 |
+
pl.col('pitch_grade').mean().alias('pitch_grade'),
|
585 |
+
(pl.col('in_zone').sum() / pl.col('is_pitch').sum()).alias('zone_percent'),
|
586 |
+
(pl.col('ozone_swing').sum() / pl.col('out_zone').sum()).alias('chase_percent'),
|
587 |
+
(pl.col('whiffs').sum() / pl.col('swings').sum()).alias('whiff_percent'),
|
588 |
+
(pl.col('woba_pred_contact').sum() / pl.col('bip').sum()).alias('xwobacon')
|
589 |
+
).sort("count", descending=True)
|
590 |
+
|
591 |
+
# Aggregate all pitch data
|
592 |
+
df_agg_all = df.group_by(pl.lit("All").alias("pitch_description")).agg(
|
593 |
+
pl.col('is_pitch').sum().alias('count'),
|
594 |
+
(pl.col('is_pitch').sum() / df.select(pl.col('is_pitch').sum())).alias('count_percent'),
|
595 |
+
pl.lit(None).alias('start_speed'),
|
596 |
+
pl.lit(None).alias('ivb'),
|
597 |
+
pl.lit(None).alias('hb'),
|
598 |
+
pl.lit(None).alias('spin_rate'),
|
599 |
+
pl.lit(None).alias('vaa'),
|
600 |
+
pl.lit(None).alias('haa'),
|
601 |
+
pl.lit(None).alias('z0'),
|
602 |
+
pl.lit(None).alias('x0'),
|
603 |
+
pl.col('extension').mean().alias('extension'),
|
604 |
+
pl.lit(None).alias('clock_time'),
|
605 |
+
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
|
606 |
+
pl.lit(None).alias('pitch_grade'),
|
607 |
+
(pl.col('in_zone').sum() / pl.col('is_pitch').sum()).alias('zone_percent'),
|
608 |
+
(pl.col('ozone_swing').sum() / pl.col('out_zone').sum()).alias('chase_percent'),
|
609 |
+
(pl.col('whiffs').sum() / pl.col('swings').sum()).alias('whiff_percent'),
|
610 |
+
(pl.col('woba_pred_contact').sum() / pl.col('bip').sum()).alias('xwobacon')
|
611 |
+
)
|
612 |
+
|
613 |
+
# Concatenate aggregated data
|
614 |
+
df_agg = pl.concat([df_agg, df_agg_all]).fill_nan(None)
|
615 |
+
|
616 |
+
# Load statcast pitch summary data
|
617 |
+
statcast_pitch_summary = pl.read_csv('functions/statcast_2024_grouped.csv')
|
618 |
+
|
619 |
+
# Create table
|
620 |
+
table = ax.table(cellText=df_agg.fill_nan('—').fill_null('—').to_numpy(), colLabels=df_agg.columns, cellLoc='center',
|
621 |
+
colWidths=[2.3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], bbox=[0.0, 0, 1, 0.8])
|
622 |
+
|
623 |
+
# Set table properties
|
624 |
+
min_font_size = 14
|
625 |
+
table.auto_set_font_size(False)
|
626 |
+
table.set_fontsize(min_font_size)
|
627 |
+
table.scale(1, 0.5)
|
628 |
+
|
629 |
+
# Set font size for values
|
630 |
+
min_font_size = 18
|
631 |
+
for i in range(len(df_agg) + 1):
|
632 |
+
for j in range(len(df_agg.columns)):
|
633 |
+
if i > 0: # Skip the header row
|
634 |
+
cell = table.get_celld()[i, j]
|
635 |
+
cell.set_fontsize(min_font_size)
|
636 |
+
|
637 |
+
# Define color maps
|
638 |
+
cmap_sum = mcolors.LinearSegmentedColormap.from_list("", ['#648FFF', '#FFFFFF', '#FFB000'])
|
639 |
+
cmap_sum_r = mcolors.LinearSegmentedColormap.from_list("", ['#FFB000', '#FFFFFF', '#648FFF'])
|
640 |
+
|
641 |
+
# Update table cells with colors and text properties
|
642 |
+
for i in range(len(df_agg)):
|
643 |
+
pitch_check = dict_pitch_desc_type[df_agg['pitch_description'][i]]
|
644 |
+
cell_text = table.get_celld()[(i + 1, 0)].get_text().get_text()
|
645 |
+
|
646 |
+
if cell_text != 'All':
|
647 |
+
table.get_celld()[(i + 1, 0)].set_facecolor(dict_pitch_name[cell_text])
|
648 |
+
text_props = {'color': '#000000', 'fontweight': 'bold'} if cell_text in ['Split-Finger', 'Slider', 'Changeup'] else {'color': '#ffffff', 'fontweight': 'bold'}
|
649 |
+
table.get_celld()[(i + 1, 0)].set_text_props(**text_props)
|
650 |
+
if cell_text == 'Four-Seam Fastball':
|
651 |
+
table.get_celld()[(i + 1, 0)].get_text().set_text('4-Seam')
|
652 |
+
|
653 |
+
select_df = statcast_pitch_summary.filter(statcast_pitch_summary['pitch_type'] == pitch_check)
|
654 |
+
|
655 |
+
# Apply color to specific columns based on normalized values
|
656 |
+
columns_to_color = [(3, 'release_speed', 0.95, 1.05), (11, 'release_extension', 0.9, 1.1), (13, None, 80, 120),
|
657 |
+
(14, None, 30, 70), (15, 'in_zone_rate', 0.7, 1.3), (16, 'chase_rate', 0.7, 1.3),
|
658 |
+
(17, 'whiff_rate', 0.7, 1.3), (18, 'xwoba', 0.7, 1.3)]
|
659 |
+
|
660 |
+
for col, stat, vmin_factor, vmax_factor in columns_to_color:
|
661 |
+
cell_value = table.get_celld()[(i + 1, col)].get_text().get_text()
|
662 |
+
if cell_value != '—':
|
663 |
+
vmin = select_df[stat].mean() * vmin_factor if stat else vmin_factor
|
664 |
+
vmax = select_df[stat].mean() * vmax_factor if stat else vmax_factor
|
665 |
+
normalize = mcolors.Normalize(vmin=vmin, vmax=vmax)
|
666 |
+
cmap = cmap_sum if col != 18 else cmap_sum_r
|
667 |
+
table.get_celld()[(i + 1, col)].set_facecolor(get_color(float(cell_value.strip('%')), normalize, cmap))
|
668 |
+
|
669 |
+
# Set header text properties
|
670 |
+
table.get_celld()[(len(df_agg), 0)].set_text_props(color='#000000', fontweight='bold')
|
671 |
+
|
672 |
+
# Update column names
|
673 |
+
new_column_names = ['$\\bf{Pitch\\ Name}$', '$\\bf{Count}$', '$\\bf{Pitch\\%}$', '$\\bf{Velocity}$', '$\\bf{iVB}$',
|
674 |
+
'$\\bf{HB}$', '$\\bf{Spin}$', '$\\bf{VAA}$', '$\\bf{HAA}$', '$\\bf{vRel}$', '$\\bf{hRel}$',
|
675 |
+
'$\\bf{Ext.}$', '$\\bf{Axis}$', '$\\bf{tjStuff+}$', '$\\bf{Grade}$', '$\\bf{Zone\\%}$',
|
676 |
+
'$\\bf{Chase\\%}$', '$\\bf{Whiff\\%}$', '$\\bf{xwOBA}$\n$\\bf{Contact}$']
|
677 |
+
|
678 |
+
for i, col_name in enumerate(new_column_names):
|
679 |
+
table.get_celld()[(0, i)].get_text().set_text(col_name)
|
680 |
+
|
681 |
+
# Format cell values
|
682 |
+
def format_cells(columns, fmt):
|
683 |
+
for col in columns:
|
684 |
+
col_idx = df_agg.columns.index(col)
|
685 |
+
for row in range(1, len(df_agg) + 1):
|
686 |
+
cell_value = table.get_celld()[(row, col_idx)].get_text().get_text()
|
687 |
+
if cell_value != '—':
|
688 |
+
table.get_celld()[(row, col_idx)].get_text().set_text(fmt.format(float(cell_value.strip('%'))))
|
689 |
+
|
690 |
+
format_cells(['start_speed', 'ivb', 'hb', 'vaa', 'haa', 'z0', 'x0', 'extension'], '{:,.1f}')
|
691 |
+
format_cells(['xwobacon'], '{:,.3f}')
|
692 |
+
format_cells(['count_percent', 'zone_percent', 'chase_percent', 'whiff_percent'], '{:,.1%}')
|
693 |
+
format_cells(['tj_stuff_plus', 'pitch_grade', 'spin_rate'], '{:,.0f}')
|
694 |
+
|
695 |
+
# Create legend for pitch types
|
696 |
+
items_in_order = (df.sort("pitch_count", descending=True)['pitch_type'].unique(maintain_order=True).to_numpy())
|
697 |
+
colour_pitches = [dict_colour[x] for x in items_in_order]
|
698 |
+
label = [dict_pitch[x] for x in items_in_order]
|
699 |
+
handles = [plt.scatter([], [], color=color, marker='o', s=100) for color in colour_pitches]
|
700 |
+
if len(label) > 5:
|
701 |
+
ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5,
|
702 |
+
fancybox=True, loc='lower center', fontsize=16, framealpha=1.0, markerscale=1.7, prop={'family': 'calibi', 'size': 16})
|
703 |
+
else:
|
704 |
+
ax.legend(handles, label, bbox_to_anchor=(0.1, 0.81, 0.8, 0.14), ncol=5,
|
705 |
+
fancybox=True, loc='lower center', fontsize=20, framealpha=1.0, markerscale=2, prop={'family': 'calibi', 'size': 20})
|
706 |
+
ax.axis('off')
|
707 |
+
|
708 |
+
def plot_footer(ax:plt.Axes):
|
709 |
+
# Add footer text
|
710 |
+
ax.text(0, 1, 'By: @TJStats', ha='left', va='top', fontsize=24)
|
711 |
+
ax.text(0.5, 0.25,
|
712 |
+
'''
|
713 |
+
Colour Coding Compares to League Average By Pitch
|
714 |
+
tjStuff+ calculates the Expected Run Value (xRV) of a pitch regardless of type
|
715 |
+
tjStuff+ is normally distributed, where 100 is the mean and Standard Deviation is 10
|
716 |
+
Pitch Grade scales tjStuff+ to the traditional 20-80 Scouting Scale for a given pitch type
|
717 |
+
''',
|
718 |
+
ha='center', va='bottom', fontsize=16)
|
719 |
+
ax.text(1, 1, 'Data: MLB, Fangraphs\nImages: MLB, ESPN', ha='right', va='top', fontsize=24)
|
720 |
+
ax.axis('off')
|
721 |
+
|
722 |
+
|
723 |
+
# Function to get an image from a URL and display it on the given axis
|
724 |
+
def player_headshot(player_input: str, ax: plt.Axes, sport_id: int,season: int):
|
725 |
+
# Construct the URL for the player's headshot image
|
726 |
+
print('SPORT ID',sport_id)
|
727 |
+
try:
|
728 |
+
if int(sport_id) == 1:
|
729 |
+
url = f'https://img.mlbstatic.com/mlb-photos/image/'\
|
730 |
+
f'upload/d_people:generic:headshot:67:current.png'\
|
731 |
+
f'/w_640,q_auto:best/v1/people/{player_input}/headshot/silo/current.png'
|
732 |
+
|
733 |
+
# Send a GET request to the URL
|
734 |
+
response = requests.get(url)
|
735 |
+
|
736 |
+
# Open the image from the response content
|
737 |
+
img = Image.open(BytesIO(response.content))
|
738 |
+
|
739 |
+
|
740 |
+
# Display the image on the axis
|
741 |
+
ax.set_xlim(0, 1.3)
|
742 |
+
ax.set_ylim(0, 1)
|
743 |
+
ax.imshow(img, extent=[0, 1, 0, 1], origin='upper')
|
744 |
+
else:
|
745 |
+
url = f'https://img.mlbstatic.com/mlb-photos/image/upload/c_fill,g_auto/w_640/v1/people/{player_input}/headshot/milb/current.png'
|
746 |
+
response = requests.get(url)
|
747 |
+
img = Image.open(BytesIO(response.content))
|
748 |
+
ax.set_xlim(0, 1.3)
|
749 |
+
ax.set_ylim(0, 1)
|
750 |
+
ax.imshow(img, extent=[1/6, 5/6, 0, 1], origin='upper')
|
751 |
+
except PIL.UnidentifiedImageError as e:
|
752 |
+
ax.axis('off')
|
753 |
+
return
|
754 |
+
|
755 |
+
# Turn off the axis
|
756 |
+
ax.axis('off')
|
757 |
+
|
758 |
+
|
759 |
+
def player_bio(pitcher_id: str, ax: plt.Axes,sport_id: int,year_input: int):
|
760 |
+
# Construct the URL to fetch player data
|
761 |
+
url = f"https://statsapi.mlb.com/api/v1/people?personIds={pitcher_id}&hydrate=currentTeam"
|
762 |
+
|
763 |
+
# Send a GET request to the URL and parse the JSON response
|
764 |
+
data = requests.get(url).json()
|
765 |
+
|
766 |
+
# Extract player information from the JSON data
|
767 |
+
player_name = data['people'][0]['fullName']
|
768 |
+
pitcher_hand = data['people'][0]['pitchHand']['code']
|
769 |
+
age = data['people'][0]['currentAge']
|
770 |
+
height = data['people'][0]['height']
|
771 |
+
weight = data['people'][0]['weight']
|
772 |
+
|
773 |
+
# Display the player's name, handedness, age, height, and weight on the axis
|
774 |
+
ax.text(0.5, 1, f'{player_name}', va='top', ha='center', fontsize=56)
|
775 |
+
ax.text(0.5, 0.7, f'{pitcher_hand}HP, Age:{age}, {height}/{weight}', va='top', ha='center', fontsize=30)
|
776 |
+
ax.text(0.5, 0.45, f'Season Pitching Summary', va='top', ha='center', fontsize=40)
|
777 |
+
|
778 |
+
# Make API call to retrieve sports information
|
779 |
+
response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json()
|
780 |
+
|
781 |
+
# Convert the JSON response into a Polars DataFrame
|
782 |
+
df_sport_id = pl.DataFrame(response['sports'])
|
783 |
+
abb = df_sport_id.filter(pl.col('id') == sport_id)['abbreviation'][0]
|
784 |
+
|
785 |
+
ax.text(0.5, 0.20, f'{year_input} {abb} Season', va='top', ha='center', fontsize=30, fontstyle='italic')
|
786 |
+
|
787 |
+
# Turn off the axis
|
788 |
+
ax.axis('off')
|
789 |
+
|
790 |
+
|
791 |
+
def plot_logo(pitcher_id: str, ax: plt.Axes,df_team: pl.DataFrame,df_players : pl.DataFrame):
|
792 |
+
# List of MLB teams and their corresponding ESPN logo URLs
|
793 |
+
mlb_teams = [
|
794 |
+
{"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
|
795 |
+
{"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
|
796 |
+
{"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"},
|
797 |
+
{"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"},
|
798 |
+
{"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"},
|
799 |
+
{"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"},
|
800 |
+
{"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"},
|
801 |
+
{"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"},
|
802 |
+
{"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"},
|
803 |
+
{"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"},
|
804 |
+
{"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"},
|
805 |
+
{"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"},
|
806 |
+
{"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"},
|
807 |
+
{"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"},
|
808 |
+
{"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"},
|
809 |
+
{"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"},
|
810 |
+
{"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"},
|
811 |
+
{"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"},
|
812 |
+
{"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"},
|
813 |
+
{"team": "OAK", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
|
814 |
+
{"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"},
|
815 |
+
{"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"},
|
816 |
+
{"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"},
|
817 |
+
{"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"},
|
818 |
+
{"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"},
|
819 |
+
{"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"},
|
820 |
+
{"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"},
|
821 |
+
{"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"},
|
822 |
+
{"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"},
|
823 |
+
{"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
|
824 |
+
{"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
|
825 |
+
]
|
826 |
+
try:
|
827 |
+
# Create a DataFrame from the list of dictionaries
|
828 |
+
df_image = pd.DataFrame(mlb_teams)
|
829 |
+
image_dict = df_image.set_index('team')['logo_url'].to_dict()
|
830 |
+
|
831 |
+
team_id = df_players.filter(pl.col('player_id') == pitcher_id)['team'][0]
|
832 |
+
|
833 |
+
# Construct the URL to fetch team data
|
834 |
+
url_team = f'https://statsapi.mlb.com/api/v1/teams/{team_id}'
|
835 |
+
|
836 |
+
# Send a GET request to the team URL and parse the JSON response
|
837 |
+
data_team = requests.get(url_team).json()
|
838 |
+
|
839 |
+
# Extract the team abbreviation
|
840 |
+
if data_team['teams'][0]['id'] in df_team['parent_org_id']:
|
841 |
+
team_abb = df_team.filter(pl.col('team_id') == data_team['teams'][0]['id'])['parent_org_abbreviation'][0]
|
842 |
+
|
843 |
+
else:
|
844 |
+
team_abb = df_team.filter(pl.col('parent_org_id') == data_team['teams'][0]['parentOrgId'])['parent_org_abbreviation'][0]
|
845 |
+
|
846 |
+
# Get the logo URL from the image dictionary using the team abbreviation
|
847 |
+
logo_url = image_dict[team_abb]
|
848 |
+
|
849 |
+
# Send a GET request to the logo URL
|
850 |
+
response = requests.get(logo_url)
|
851 |
+
|
852 |
+
# Open the image from the response content
|
853 |
+
img = Image.open(BytesIO(response.content))
|
854 |
+
|
855 |
+
# Display the image on the axis
|
856 |
+
ax.set_xlim(0, 1.3)
|
857 |
+
ax.set_ylim(0, 1)
|
858 |
+
ax.imshow(img, extent=[0.3, 1.3, 0, 1], origin='upper')
|
859 |
+
|
860 |
+
# Turn off the axis
|
861 |
+
ax.axis('off')
|
862 |
+
except KeyError as e:
|
863 |
+
ax.axis('off')
|
864 |
+
return
|
865 |
+
|
866 |
+
splits = {
|
867 |
+
'All':0,
|
868 |
+
'LHH':13,
|
869 |
+
'RHH':14,
|
870 |
+
}
|
871 |
+
|
872 |
+
splits_title = {
|
873 |
+
|
874 |
+
'All':'',
|
875 |
+
'LHH':' vs LHH',
|
876 |
+
'RHH':' vs RHH',
|
877 |
+
|
878 |
+
}
|
879 |
+
|
880 |
+
|
881 |
+
def fangraphs_pitching_leaderboards(season: int,
|
882 |
+
split: str,
|
883 |
+
start_date: str = '2024-01-01',
|
884 |
+
end_date: str = '2024-12-31'):
|
885 |
+
"""
|
886 |
+
Fetch pitching leaderboards data from Fangraphs.
|
887 |
+
|
888 |
+
Parameters
|
889 |
+
----------
|
890 |
+
season : int
|
891 |
+
The season year.
|
892 |
+
split : str
|
893 |
+
The split type (e.g., 'All', 'LHH', 'RHH').
|
894 |
+
start_date : str, optional
|
895 |
+
The start date for the data (default is '2024-01-01').
|
896 |
+
end_date : str, optional
|
897 |
+
The end date for the data (default is '2024-12-31').
|
898 |
+
|
899 |
+
Returns
|
900 |
+
-------
|
901 |
+
pl.DataFrame
|
902 |
+
The DataFrame containing the pitching leaderboards data.
|
903 |
+
"""
|
904 |
+
url = f"""
|
905 |
+
https://www.fangraphs.com/api/leaders/major-league/data?age=&pos=all&stats=pit&lg=all&season={season}&season1={season}
|
906 |
+
&startdate={start_date}&enddate={end_date}&ind=0&qual=0&type=8&month={splits[split]}&pageitems=500000
|
907 |
+
"""
|
908 |
+
|
909 |
+
data = requests.get(url).json()
|
910 |
+
df = pl.DataFrame(data=data['data'], infer_schema_length=1000)
|
911 |
+
return df
|
912 |
+
|
913 |
+
|
914 |
+
def fangraphs_table(df: pl.DataFrame,
|
915 |
+
ax: plt.Axes,
|
916 |
+
player_input: str,
|
917 |
+
season: int,
|
918 |
+
split: str):
|
919 |
+
"""
|
920 |
+
Create a table of Fangraphs pitching leaderboards data for a specific player.
|
921 |
+
|
922 |
+
Parameters
|
923 |
+
----------
|
924 |
+
ax : plt.Axes
|
925 |
+
The axis to plot the table on.
|
926 |
+
season : int
|
927 |
+
The season year.
|
928 |
+
split : str
|
929 |
+
The split type (e.g., 'All', 'LHH', 'RHH').
|
930 |
+
"""
|
931 |
+
|
932 |
+
start_date = df['game_date'][0]
|
933 |
+
end_date = df['game_date'][-1]
|
934 |
+
|
935 |
+
# Fetch Fangraphs pitching leaderboards data
|
936 |
+
df_fangraphs = fangraphs_pitching_leaderboards(season=season,
|
937 |
+
split=split,
|
938 |
+
start_date=start_date,
|
939 |
+
end_date=end_date).filter(pl.col('xMLBAMID') == player_input)
|
940 |
+
|
941 |
+
df_fangraphs = df_fangraphs.with_columns(
|
942 |
+
((pl.col('Strikes')/pl.col('Pitches'))).alias('strikePercentage'),
|
943 |
+
|
944 |
+
)
|
945 |
+
|
946 |
+
# Select relevant columns for the table
|
947 |
+
plot_table = df_fangraphs.select(['IP', 'WHIP', 'ERA', 'TBF', 'FIP', 'K%', 'BB%', 'K-BB%','strikePercentage'])
|
948 |
+
|
949 |
+
# Format table values
|
950 |
+
plot_table_values = [format(plot_table[x][0], fangraphs_stats_dict[x]['format']) if plot_table[x][0] != '---' else '---' for x in plot_table.columns]
|
951 |
+
|
952 |
+
# Create the table
|
953 |
+
table_fg = ax.table(cellText=[plot_table_values], colLabels=plot_table.columns, cellLoc='center',
|
954 |
+
bbox=[0.0, 0.1, 1, 0.7])
|
955 |
+
|
956 |
+
# Set font size for the table
|
957 |
+
min_font_size = 20
|
958 |
+
table_fg.set_fontsize(min_font_size)
|
959 |
+
|
960 |
+
# Update column names with formatted headers
|
961 |
+
new_column_names = [fangraphs_stats_dict[col]['table_header'] for col in plot_table.columns]
|
962 |
+
for i, col_name in enumerate(new_column_names):
|
963 |
+
table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
|
964 |
+
|
965 |
+
# Set header text properties
|
966 |
+
ax.text(0.5, 0.9, f'{start_date} to {end_date}{splits_title[split]}', va='bottom', ha='center',
|
967 |
+
fontsize=36, fontstyle='italic')
|
968 |
+
ax.axis('off')
|
969 |
+
|
970 |
+
|
971 |
+
def stat_summary_table(df: pl.DataFrame,
|
972 |
+
player_input: int,
|
973 |
+
sport_id: int,
|
974 |
+
ax: plt.Axes,
|
975 |
+
split: str = 'All'):
|
976 |
+
start_date_format = str(pd.to_datetime(df['game_date'][0]).strftime('%m/%d/%Y'))
|
977 |
+
end_date_format = str(pd.to_datetime(df['game_date'][-1]).strftime('%m/%d/%Y'))
|
978 |
+
|
979 |
+
if sport_id == 1:
|
980 |
+
appContext = 'majorLeague'
|
981 |
+
else:
|
982 |
+
appContext = 'minorLeague'
|
983 |
+
|
984 |
+
pitcher_stats_call = requests.get(f'https://statsapi.mlb.com/api/v1/people/{player_input}?appContext={appContext}&hydrate=stats(group=[pitching],type=[byDateRange],sportId={sport_id},startDate={start_date_format},endDate={end_date_format})').json()
|
985 |
+
pitcher_stats_call_header = [x for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
|
986 |
+
pitcher_stats_call_values = [pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat'][x] for x in pitcher_stats_call['people'][0]['stats'][0]['splits'][-1]['stat']]
|
987 |
+
pitcher_stats_call_df = pl.DataFrame(data=dict(zip(pitcher_stats_call_header,pitcher_stats_call_values)))
|
988 |
+
|
989 |
+
pitcher_stats_call_df = pitcher_stats_call_df.with_columns(
|
990 |
+
pl.lit(df['is_whiff'].sum()).alias('whiffs'),
|
991 |
+
(pl.col('strikeOuts')/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('k_percent'),
|
992 |
+
(pl.col('baseOnBalls')/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('bb_percent'),
|
993 |
+
((pl.col('strikeOuts') - pl.col('baseOnBalls'))/pl.col('battersFaced')*100).round(1).cast(pl.Utf8).str.concat('%').alias('k_bb_percent'),
|
994 |
+
(((pl.col('homeRuns')*13 + 3*((pl.col('baseOnBalls'))+(pl.col('hitByPitch')))-2*(pl.col('strikeOuts'))))/((pl.col('outs'))/3)+3.15).round(2).map_elements(lambda x: f"{x:.2f}") .alias('fip'),
|
995 |
+
((pl.col('strikes')/pl.col('numberOfPitches')*100)).round(1).cast(pl.Utf8).str.concat('%').alias('strikePercentage'),
|
996 |
+
)
|
997 |
+
|
998 |
+
|
999 |
+
if df['game_id'][0] == df['game_id'][-1]:
|
1000 |
+
pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched','battersFaced','earnedRuns','hits','strikeOuts','baseOnBalls','hitByPitch','homeRuns','strikePercentage','whiffs'])
|
1001 |
+
new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{ER}$','$\\bf{H}$','$\\bf{K}$','$\\bf{BB}$','$\\bf{HBP}$','$\\bf{HR}$','$\\bf{Strike\%}$','$\\bf{Whiffs}$']
|
1002 |
+
title = f'{df["game_date"][0]} vs {df["batter_team"][0]}'
|
1003 |
+
elif sport_id != 1:
|
1004 |
+
pitcher_stats_call_df_small = pitcher_stats_call_df.select(['inningsPitched','battersFaced','whip','era','fip','k_percent','bb_percent','k_bb_percent','strikePercentage'])
|
1005 |
+
new_column_names = ['$\\bf{IP}$','$\\bf{PA}$','$\\bf{WHIP}$','$\\bf{ERA}$','$\\bf{FIP}$','$\\bf{K\%}$','$\\bf{BB\%}$','$\\bf{K-BB\%}$','$\\bf{Strike\%}$']
|
1006 |
+
title = f'{df["game_date"][0]} to {df["game_date"][-1]}'
|
1007 |
+
else:
|
1008 |
+
fangraphs_table(df=df,
|
1009 |
+
ax=ax,
|
1010 |
+
player_input=player_input,
|
1011 |
+
season=2024,
|
1012 |
+
split=split)
|
1013 |
+
return
|
1014 |
+
|
1015 |
+
import matplotlib.pyplot as plt
|
1016 |
+
table_fg = ax.table(cellText=pitcher_stats_call_df_small.to_numpy(), colLabels=pitcher_stats_call_df_small.columns, cellLoc='center',
|
1017 |
+
bbox=[0.0, 0.1, 1, 0.7])
|
1018 |
+
|
1019 |
+
min_font_size = 20
|
1020 |
+
table_fg.set_fontsize(min_font_size)
|
1021 |
+
|
1022 |
+
# #new_column_names = ['Pitch Name', 'Pitch%', 'Velocity', 'Spin Rate','Exit Velocity', 'Whiff%', 'CSW%']
|
1023 |
+
for i, col_name in enumerate(new_column_names):
|
1024 |
+
table_fg.get_celld()[(0, i)].get_text().set_text(col_name)
|
1025 |
+
|
1026 |
+
ax.text(0.5, 0.9, title, va='bottom', ha='center',
|
1027 |
+
fontsize=36, fontstyle='italic')
|
1028 |
+
|
1029 |
+
ax.axis('off')
|
functions/statcast_2024_grouped.csv
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pitch_type,pitch,release_speed,pfx_z,pfx_x,release_spin_rate,release_pos_x,release_pos_z,release_extension,delta_run_exp,swing,whiff,in_zone,out_zone,chase,xwoba,pitch_usage,whiff_rate,in_zone_rate,chase_rate,delta_run_exp_per_100,all
|
2 |
+
CH,74155,85.46226725895522,5.247514143364433,-3.9745011679246045,1803.342540762527,-0.5077629855663421,5.740925968432281,6.449406057002311,204.631,37385,11538,28912,45151,15250,0.28973564881286695,0.10218846333521206,0.30862645446034503,0.38988604949093114,0.3377555314389493,-0.27595037421616886,
|
3 |
+
CS,22,66.38181818181819,-7.232727272727273,5.176363636363637,2039.2727272727273,-1.7981818181818183,6.5177272727272735,6.0636363636363635,-0.6290000000000001,9,2,10,12,2,0.13466666666666668,3.0316852449257168e-05,0.2222222222222222,0.45454545454545453,0.16666666666666666,2.85909090909091,
|
4 |
+
CU,47579,79.40938533133989,-9.345106445703216,4.516206279348902,2568.8591051473077,-0.6765712059634863,5.9438438375202685,6.401792908519479,93.57199999999999,19910,6150,20751,26738,7749,0.28049767649520974,0.0655657055765094,0.3088900050226017,0.4361377918829736,0.28981225222529733,-0.1966665966077471,
|
5 |
+
EP,576,50.51909722222222,16.357291666666665,-3.8287500000000003,1256.7152777777778,-0.9668749999999999,6.647100694444444,4.442013888888889,23.643,252,7,207,369,106,0.3971430703517588,0.0007937503186714604,0.027777777777777776,0.359375,0.2872628726287263,-4.104687500000001,
|
6 |
+
FA,635,67.81354330708662,15.865511811023623,-3.7226456692913388,1674.0144694533763,-1.1163779527559055,6.317716535433071,4.92488188976378,15.495,284,29,296,339,73,0.43393490999999995,0.0008750546047853774,0.10211267605633803,0.46614173228346456,0.2153392330383481,-2.4401574803149604,
|
7 |
+
FC,58379,89.56435813713696,8.08895396195288,1.5509243697478992,2389.231715947733,-0.9745362684951281,5.8461769002079365,6.403954996645393,-20.390000000000015,28753,6674,30002,28189,7757,0.34077822947428493,0.08044852405159929,0.23211490974854798,0.5139176758765994,0.2751782610238036,0.034926942907552404,
|
8 |
+
FF,230412,94.27369496062718,15.720274827472318,-3.1074418968484365,2296.591789895323,-0.7685432927147252,5.821400777026439,6.524392110813926,-80.28400000000002,113157,24741,127386,102722,24808,0.3401256910065045,0.3175166639335565,0.21864312415493517,0.5528618301130149,0.2415062012032476,0.03484367133656234,
|
9 |
+
FO,168,82.07916666666667,1.7357142857142858,0.1378571428571428,946.8154761904761,-0.5333333333333333,5.8914285714285715,6.666666666666667,2.539,89,29,60,108,43,0.27798747368421056,0.0002315105096125093,0.3258426966292135,0.35714285714285715,0.39814814814814814,-1.511309523809524,
|
10 |
+
FS,21727,86.31228885718231,2.979608781700189,-8.76550651263405,1302.3992981808108,-1.4640824780227366,5.742066553136651,6.508958525345622,-16.641000000000005,11333,3906,7982,13745,4946,0.2548785060302361,0.02994064787113684,0.34465719579987647,0.3673769963639711,0.3598399417970171,0.07659133796658538,
|
11 |
+
KC,11916,81.79965592480698,-9.370896273917422,4.895297079556898,2444.1642796967144,-0.8788083249412554,5.940037764350453,6.434007553503986,-12.997000000000003,5312,1860,4858,7058,2316,0.25845137325418993,0.016420709717515837,0.3501506024096386,0.40768714333669015,0.32813828279965995,0.10907183618663985,
|
12 |
+
KN,971,76.94819773429454,-2.9453759011328526,-5.356498455200824,263.56326987681973,-1.2303398558187437,5.542131822863028,6.45653964984552,12.681,426,113,428,543,130,0.2870389181034483,0.0013380756240103959,0.2652582159624413,0.4407826982492276,0.23941068139963168,-1.3059732234809474,
|
13 |
+
PO,55,91.24909090909091,13.11709090909091,-6.399272727272727,2195.3818181818183,-1.494181818181818,5.861272727272727,6.305454545454546,0.0,0,0,1,54,0,,7.579213112314292e-05,,0.01818181818181818,0.0,-0.0,
|
14 |
+
SC,159,81.02264150943397,-3.1056603773584905,-8.001509433962264,2050.5974842767296,-1.0535849056603774,6.110377358490566,6.064150943396227,4.623,58,13,63,96,20,0.35349463636363637,0.0002191081608832677,0.22413793103448276,0.39622641509433965,0.20833333333333334,-2.9075471698113207,
|
15 |
+
SI,116002,93.34805382235511,7.567078832293412,-6.148476070311284,2147.3631502060834,-0.7671983511070397,5.622119363257688,6.435364206296976,-32.837000000000025,53318,7390,65492,50222,12474,0.3501967420378125,0.15985525080994228,0.13860234817510034,0.5645764728194341,0.2483772052088726,0.028307270564300636,
|
16 |
+
SL,116390,85.60138786052518,1.5759858803271631,2.7325110632802407,2435.5705519351436,-0.9811034007748601,5.761407576409815,6.433055359327349,-167.41500000000002,56606,19101,52478,63672,20396,0.2818607008786495,0.16038992984404735,0.337437727449387,0.45088065985050263,0.3203291870838045,0.14383967694819144,
|
17 |
+
ST,43821,81.8580155633144,1.4796932977339632,7.821825152324228,2575.3661920073496,-1.080187124894457,5.4607240820611125,6.40352674793587,-52.96800000000001,20035,6276,19349,24472,7531,0.25978070794500324,0.0603870359626772,0.3132518093336661,0.44154629059126904,0.30773945733899966,0.12087355377558708,
|
18 |
+
SV,2702,81.67483345669874,-4.788941524796447,7.356861584011844,2470.624858757062,-0.5779570688378979,5.420762398223538,6.227296392711045,0.19299999999999926,1117,339,1138,1564,479,0.2907683709923664,0.0037234606962678577,0.3034914950760967,0.42116950407105846,0.3062659846547315,-0.007142857142857115,
|
19 |
+
All,725669,89.1521052747817,7.058379139422499,-1.2140087540219224,2255.6768252515376,-0.8282529777063689,5.758824349487279,6.456550518555369,-20.178000000000118,352163,89742,359413,365054,104080,0.3147037524825,1.0,0.25483085957354973,0.4952850404247667,0.28510850449522535,0.002780606585095976,all
|
joblib_model/__pycache__/feature_engineering.cpython-39.pyc
ADDED
Binary file (2.14 kB). View file
|
|
joblib_model/barrel_model.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9428e89f2a408148377efb3cd169dc8790bcc89df9495cb895b9db5a955e8fb7
|
3 |
+
size 11447
|
joblib_model/in_zone.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5300b15a6ccfb1dd1e79c85bd9ea478a1945c454845e6be31cd8815e4063a3e
|
3 |
+
size 54459064
|
joblib_model/in_zone_model_knn_20240410.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82d6d95be88b006bea7efd4bbf0464a0a50f261f6f65f060bf022114300721ed
|
3 |
+
size 46782024
|
joblib_model/linear_reg_model_x.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:179663ae0fa65c626b9a941b6934bda1ce58bdf02a69c0daefc28abd28154201
|
3 |
+
size 579
|
joblib_model/linear_reg_model_z.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ceabc302949cdbe5515b428f900bce98d6f6bedf99153c8d8a645cb0240ef8b
|
3 |
+
size 579
|
joblib_model/model_attack_zone.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2671d4db2606cfee299dcffba2a94138fce77c1b7ef6ad14695a972a38dda3c8
|
3 |
+
size 50570139
|
joblib_model/no_swing.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3da3e7ab2b513b87d05e90ae30c788ac819dfcaa7cc1cd9943fc13d2958a00f
|
3 |
+
size 279409
|
joblib_model/swing.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fef4a66363e5f3fdc70ae45c5382bd986c800ff8bf9296a1f9b334461e70fd4
|
3 |
+
size 262137
|
joblib_model/xwoba_model.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05bade9c0420657d3f0dfe35f0b1adbd2d5ae25c87a07bdf6629987f29926438
|
3 |
+
size 10684246
|
stuff_model/__pycache__/feature_engineering.cpython-39.pyc
ADDED
Binary file (2.17 kB). View file
|
|
stuff_model/__pycache__/stuff_apply.cpython-39.pyc
ADDED
Binary file (1.33 kB). View file
|
|
stuff_model/feature_engineering.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import polars as pl
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
def feature_engineering(df: pl.DataFrame) -> pl.DataFrame:
|
5 |
+
# Extract the year from the game_date column
|
6 |
+
df = df.with_columns(
|
7 |
+
pl.col('game_date').str.slice(0, 4).alias('year')
|
8 |
+
)
|
9 |
+
|
10 |
+
df = df.with_columns([
|
11 |
+
|
12 |
+
(-(pl.col('vy0')**2 - (2 * pl.col('ay') * (pl.col('y0') - 17/12)))**0.5).alias('vy_f'),
|
13 |
+
])
|
14 |
+
|
15 |
+
df = df.with_columns([
|
16 |
+
((pl.col('vy_f') - pl.col('vy0')) / pl.col('ay')).alias('t'),
|
17 |
+
])
|
18 |
+
|
19 |
+
df = df.with_columns([
|
20 |
+
(pl.col('vz0') + (pl.col('az') * pl.col('t'))).alias('vz_f'),
|
21 |
+
(pl.col('vx0') + (pl.col('ax') * pl.col('t'))).alias('vx_f')
|
22 |
+
])
|
23 |
+
|
24 |
+
df = df.with_columns([
|
25 |
+
(-np.arctan(pl.col('vz_f') / pl.col('vy_f')) * (180 / np.pi)).alias('vaa'),
|
26 |
+
(-np.arctan(pl.col('vx_f') / pl.col('vy_f')) * (180 / np.pi)).alias('haa')
|
27 |
+
])
|
28 |
+
|
29 |
+
# Mirror horizontal break for left-handed pitchers
|
30 |
+
df = df.with_columns(
|
31 |
+
pl.when(pl.col('pitcher_hand') == 'L')
|
32 |
+
.then(-pl.col('ax'))
|
33 |
+
.otherwise(pl.col('ax'))
|
34 |
+
.alias('ax')
|
35 |
+
)
|
36 |
+
|
37 |
+
# Mirror horizontal break for left-handed pitchers
|
38 |
+
df = df.with_columns(
|
39 |
+
pl.when(pl.col('pitcher_hand') == 'L')
|
40 |
+
.then(-pl.col('hb'))
|
41 |
+
.otherwise(pl.col('hb'))
|
42 |
+
.alias('hb')
|
43 |
+
)
|
44 |
+
|
45 |
+
# Mirror horizontal release point for left-handed pitchers
|
46 |
+
df = df.with_columns(
|
47 |
+
pl.when(pl.col('pitcher_hand') == 'L')
|
48 |
+
.then(pl.col('x0'))
|
49 |
+
.otherwise(-pl.col('x0'))
|
50 |
+
.alias('x0')
|
51 |
+
)
|
52 |
+
|
53 |
+
# Define the pitch types to be considered
|
54 |
+
pitch_types = ['SI', 'FF', 'FC']
|
55 |
+
|
56 |
+
# Filter the DataFrame to include only the specified pitch types
|
57 |
+
df_filtered = df.filter(pl.col('pitch_type').is_in(pitch_types))
|
58 |
+
|
59 |
+
# Group by pitcher_id and year, then aggregate to calculate average speed and usage percentage
|
60 |
+
df_agg = df_filtered.group_by(['pitcher_id', 'year', 'pitch_type']).agg([
|
61 |
+
pl.col('start_speed').mean().alias('avg_fastball_speed'),
|
62 |
+
pl.col('az').mean().alias('avg_fastball_az'),
|
63 |
+
pl.col('ax').mean().alias('avg_fastball_ax'),
|
64 |
+
pl.len().alias('count')
|
65 |
+
])
|
66 |
+
|
67 |
+
# Sort the aggregated data by count and average fastball speed
|
68 |
+
df_agg = df_agg.sort(['count', 'avg_fastball_speed'], descending=[True, True])
|
69 |
+
df_agg = df_agg.unique(subset=['pitcher_id', 'year'], keep='first')
|
70 |
+
|
71 |
+
# Join the aggregated data with the main DataFrame
|
72 |
+
df = df.join(df_agg, on=['pitcher_id', 'year'])
|
73 |
+
|
74 |
+
# If no fastball, use the fastest pitch for avg_fastball_speed
|
75 |
+
df = df.with_columns(
|
76 |
+
pl.when(pl.col('avg_fastball_speed').is_null())
|
77 |
+
.then(pl.col('start_speed').max().over('pitcher_id'))
|
78 |
+
.otherwise(pl.col('avg_fastball_speed'))
|
79 |
+
.alias('avg_fastball_speed')
|
80 |
+
)
|
81 |
+
|
82 |
+
# If no fastball, use the fastest pitch for avg_fastball_az
|
83 |
+
df = df.with_columns(
|
84 |
+
pl.when(pl.col('avg_fastball_az').is_null())
|
85 |
+
.then(pl.col('az').max().over('pitcher_id'))
|
86 |
+
.otherwise(pl.col('avg_fastball_az'))
|
87 |
+
.alias('avg_fastball_az')
|
88 |
+
)
|
89 |
+
|
90 |
+
# If no fastball, use the fastest pitch for avg_fastball_ax
|
91 |
+
df = df.with_columns(
|
92 |
+
pl.when(pl.col('avg_fastball_ax').is_null())
|
93 |
+
.then(pl.col('ax').max().over('ax'))
|
94 |
+
.otherwise(pl.col('avg_fastball_ax'))
|
95 |
+
.alias('avg_fastball_ax')
|
96 |
+
)
|
97 |
+
|
98 |
+
# Calculate pitch differentials
|
99 |
+
df = df.with_columns(
|
100 |
+
(pl.col('start_speed') - pl.col('avg_fastball_speed')).alias('speed_diff'),
|
101 |
+
(pl.col('az') - pl.col('avg_fastball_az')).alias('az_diff'),
|
102 |
+
(pl.col('ax') - pl.col('avg_fastball_ax')).abs().alias('ax_diff')
|
103 |
+
)
|
104 |
+
|
105 |
+
# Cast the year column to integer type
|
106 |
+
df = df.with_columns(
|
107 |
+
pl.col('year').cast(pl.Int64)
|
108 |
+
)
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
+
df = df.with_columns([
|
113 |
+
pl.lit('All').alias('all')
|
114 |
+
])
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
+
return df
|
stuff_model/lgbm_model_2020_2023.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41001a1acf6ce7dbe247f1b8b7e68a1bb1b112f39d080b7e95a83479e56cb7c1
|
3 |
+
size 3092328
|
stuff_model/stuff_apply.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import polars as pl
|
2 |
+
import joblib
|
3 |
+
|
4 |
+
model = joblib.load('stuff_model/lgbm_model_2020_2023.joblib')
|
5 |
+
# Read the values from the text file
|
6 |
+
with open('stuff_model/target_stats.txt', 'r') as file:
|
7 |
+
lines = file.readlines()
|
8 |
+
target_mean = float(lines[0].strip())
|
9 |
+
target_std = float(lines[1].strip())
|
10 |
+
|
11 |
+
# Define the features to be used for training
|
12 |
+
features = ['start_speed',
|
13 |
+
'spin_rate',
|
14 |
+
'extension',
|
15 |
+
'az',
|
16 |
+
'ax',
|
17 |
+
'x0',
|
18 |
+
'z0',
|
19 |
+
'speed_diff',
|
20 |
+
'az_diff',
|
21 |
+
'ax_diff']
|
22 |
+
|
23 |
+
|
24 |
+
def stuff_apply(df:pl.DataFrame) -> pl.DataFrame:
|
25 |
+
# Filter the dataframe to include only the rows for the year 2024 and drop rows with null values in the specified features and target column
|
26 |
+
# df_test = df.drop_nulls(subset=features)
|
27 |
+
df_test = df.clone()
|
28 |
+
|
29 |
+
# Predict the target values for the 2024 data using the trained model
|
30 |
+
df_test = df_test.with_columns(
|
31 |
+
pl.Series(name="target", values=model.predict(df_test[features].to_numpy()))
|
32 |
+
)
|
33 |
+
# Standardize the target column to create a z-score
|
34 |
+
df_test = df_test.with_columns(
|
35 |
+
((pl.col('target') - target_mean) / target_std).alias('target_zscore')
|
36 |
+
)
|
37 |
+
|
38 |
+
# Convert the z-score to tj_stuff_plus
|
39 |
+
df_test = df_test.with_columns(
|
40 |
+
(100 - (pl.col('target_zscore') * 10)).alias('tj_stuff_plus')
|
41 |
+
)
|
42 |
+
|
43 |
+
df_pitch_types = pl.read_csv('stuff_model/tj_stuff_plus_pitch.csv')
|
44 |
+
|
45 |
+
# Join the pitch type statistics with the main DataFrame based on pitch_type
|
46 |
+
df_pitch_all = df_test.join(df_pitch_types, left_on='pitch_type', right_on='pitch_type')
|
47 |
+
|
48 |
+
# Normalize pitch_grade values to a range between -0.5 and 0.5 based on the percentiles
|
49 |
+
df_pitch_all = df_pitch_all.with_columns(
|
50 |
+
((pl.col('tj_stuff_plus') - pl.col('mean')) / pl.col('std')).alias('pitch_grade')
|
51 |
+
)
|
52 |
+
|
53 |
+
# Scale the pitch_grade values to a range between 20 and 80
|
54 |
+
df_pitch_all = df_pitch_all.with_columns(
|
55 |
+
(pl.col('pitch_grade') * 10 + 50).clip(20, 80)
|
56 |
+
)
|
57 |
+
return df_pitch_all
|
stuff_model/target_stats.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
0.0034732498406374636
|
2 |
+
0.006846752748626548
|
stuff_model/tj_stuff_plus_pitch.csv
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pitch_type,mean,std,median,min,max,percentile_1,percentile_99
|
2 |
+
ST,106.44784631565936,5.593943599731136,106.24878922952112,91.18894850636659,125.29541262167034,91.69322149368426,125.25688309207108
|
3 |
+
SV,103.73183202363764,3.001226780758946,103.50047554089315,93.3173875900245,111.34757479687066,93.32953434698274,111.33689503153641
|
4 |
+
SL,103.49296290610897,5.265572779780409,103.19144262214559,88.84957017284297,121.88798777026031,89.76670287371176,121.36013955239422
|
5 |
+
KC,101.8993919341341,4.271694896723436,100.79211889194949,93.69754063161618,119.4933202093256,93.75149298057133,119.38166236091195
|
6 |
+
All,99.9275100894791,5.01699442232884,99.65265124489378,84.73033633038408,116.94934527087541,86.65905811630736,116.7610246502804
|
7 |
+
CU,99.88832068607897,4.615228571103906,99.08993373693156,89.84495168337246,119.90089262632986,90.20429983334718,117.89567125997061
|
8 |
+
FC,98.83449547008738,5.811964883678063,98.54483029899575,83.20928731685326,119.78700324933075,83.34007602984008,118.21186533190846
|
9 |
+
FS,98.25541635267653,6.898952096824192,98.46204303842217,72.25450024197754,114.88400714657823,73.39595959354874,114.78967217449389
|
10 |
+
FO,98.15224613640243,1.081819065809178,99.94816563615653,94.0023252668585,100.50624750619224,94.0142169475971,100.50513134245217
|
11 |
+
FF,97.29024735737988,6.078459125845886,97.09670890504734,81.2230917971995,118.10419744965911,81.32311771953398,117.7938724746093
|
12 |
+
SC,97.27958020025409,1.2452898498180456,97.27958020025409,93.536223938276,101.02293646223218,93.54371065079995,101.01544974970822
|
13 |
+
CH,96.35866365133434,6.178939251378385,95.80884625564597,81.28802319264824,121.14136334013493,82.02275793969746,119.09639344796777
|
14 |
+
SI,95.14161603816645,4.9734372581529955,95.11657827702109,82.5850956341191,112.99618112461533,82.8856383780296,112.72626192694757
|
15 |
+
CS,93.97853627048322,0.0,93.97853627048322,93.97853627048322,93.97853627048322,93.97853627048322,93.97853627048322
|
16 |
+
KN,93.41890096234394,0.0,93.41890096234394,93.41890096234394,93.41890096234394,93.41890096234394,93.41890096234394
|