nesticot commited on
Commit
17a9bac
·
verified ·
1 Parent(s): b4afa01

Delete test_data_mlb.ipynb

Browse files
Files changed (1) hide show
  1. test_data_mlb.ipynb +0 -238
test_data_mlb.ipynb DELETED
@@ -1,238 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import time\n",
10
- "import requests\n",
11
- "import pandas as pd\n",
12
- "import seaborn as sns\n",
13
- "import matplotlib.pyplot as plt\n",
14
- "from matplotlib.pyplot import figure\n",
15
- "from matplotlib.offsetbox import OffsetImage, AnnotationBbox\n",
16
- "from scipy import stats\n",
17
- "import matplotlib.lines as mlines\n",
18
- "import matplotlib.transforms as mtransforms\n",
19
- "import numpy as np\n",
20
- "import time\n",
21
- "#import plotly.express as px\n",
22
- "#!pip install chart_studio\n",
23
- "#import chart_studio.tools as tls\n",
24
- "from bs4 import BeautifulSoup\n",
25
- "import matplotlib.pyplot as plt\n",
26
- "import numpy as np\n",
27
- "import matplotlib.font_manager as font_manager\n",
28
- "from datetime import datetime\n",
29
- "import pytz\n",
30
- "from matplotlib.ticker import MaxNLocator\n",
31
- "from matplotlib.patches import Ellipse\n",
32
- "import matplotlib.transforms as transforms\n",
33
- "from matplotlib.gridspec import GridSpec\n",
34
- "from datasets import load_dataset"
35
- ]
36
- },
37
- {
38
- "cell_type": "code",
39
- "execution_count": 2,
40
- "metadata": {},
41
- "outputs": [
42
- {
43
- "name": "stdout",
44
- "output_type": "stream",
45
- "text": [
46
- "Starting Everything:\n"
47
- ]
48
- },
49
- {
50
- "name": "stderr",
51
- "output_type": "stream",
52
- "text": [
53
- "Found cached dataset csv (C:/Users/thoma/.cache/huggingface/datasets/nesticot___csv/nesticot--mlb_data-a391519415fcbccf/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)\n",
54
- "100%|██████████| 1/1 [00:00<00:00, 2.02it/s]\n"
55
- ]
56
- }
57
- ],
58
- "source": [
59
- "\n",
60
- "colour_palette = ['#FFB000','#648FFF','#785EF0',\n",
61
- " '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']\n",
62
- "\n",
63
- "print('Starting Everything:')\n",
64
- "# exit_velo_df = milb_a_ev_df.append([triple_a_ev_df,double_a_ev_df,a_high_a_ev_df,single_a_ev_df]).reset_index(drop=True)\n",
65
- "# player_df_all = mlb_a_player_df.append([triple_a_player_df,double_a_player_df,a_high_a_player_df,single_a_player_df]).reset_index(drop=True)\n",
66
- "# exit_velo_df = pd.read_csv('exit_velo_df_all.csv',index_col=[0])\n",
67
- "# player_df_all = pd.read_csv('player_df_all.csv',index_col=[0])\n",
68
- "\n",
69
- "# pa_df = pd.read_csv('pa_df_all.csv',index_col=[0])\n",
70
- "# pa_df_full_na = pa_df.dropna()\n",
71
- "\n",
72
- "### Import Datasets\n",
73
- "dataset = load_dataset('nesticot/mlb_data', data_files=['a_pitch_data_2023.csv',\n",
74
- " ])\n",
75
- "dataset_train = dataset['train']\n",
76
- "exit_velo_df = dataset_train.to_pandas().set_index(list(dataset_train.features.keys())[0]).reset_index(drop=True)\n",
77
- "colour_palette = ['#FFB000','#648FFF','#785EF0',\n",
78
- " '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']\n",
79
- "\n",
80
- "\n"
81
- ]
82
- },
83
- {
84
- "cell_type": "code",
85
- "execution_count": 9,
86
- "metadata": {},
87
- "outputs": [
88
- {
89
- "data": {
90
- "text/plain": [
91
- "0 True\n",
92
- "1 True\n",
93
- "2 True\n",
94
- "3 True\n",
95
- "4 True\n",
96
- " ... \n",
97
- "575260 True\n",
98
- "575261 True\n",
99
- "575262 True\n",
100
- "575263 True\n",
101
- "575264 True\n",
102
- "Name: is_pitch, Length: 575265, dtype: bool"
103
- ]
104
- },
105
- "execution_count": 9,
106
- "metadata": {},
107
- "output_type": "execute_result"
108
- }
109
- ],
110
- "source": [
111
- "exit_velo_df['is_pitch']"
112
- ]
113
- },
114
- {
115
- "cell_type": "code",
116
- "execution_count": 18,
117
- "metadata": {},
118
- "outputs": [],
119
- "source": [
120
- "tl_df = exit_velo_df[exit_velo_df['batter_id'] == 699073].groupby(['batter_id','batter_name','batter_hand']).agg(\n",
121
- " pitches = ('is_pitch','sum'),\n",
122
- " swings = ('is_swing','sum'),\n",
123
- " whiffs = ('is_whiff','sum')\n",
124
- ")"
125
- ]
126
- },
127
- {
128
- "cell_type": "code",
129
- "execution_count": 19,
130
- "metadata": {},
131
- "outputs": [],
132
- "source": [
133
- "tl_df['whiff_rate'] = tl_df['whiffs'] / tl_df['swings']"
134
- ]
135
- },
136
- {
137
- "cell_type": "code",
138
- "execution_count": 20,
139
- "metadata": {},
140
- "outputs": [
141
- {
142
- "data": {
143
- "text/html": [
144
- "<div>\n",
145
- "<style scoped>\n",
146
- " .dataframe tbody tr th:only-of-type {\n",
147
- " vertical-align: middle;\n",
148
- " }\n",
149
- "\n",
150
- " .dataframe tbody tr th {\n",
151
- " vertical-align: top;\n",
152
- " }\n",
153
- "\n",
154
- " .dataframe thead th {\n",
155
- " text-align: right;\n",
156
- " }\n",
157
- "</style>\n",
158
- "<table border=\"1\" class=\"dataframe\">\n",
159
- " <thead>\n",
160
- " <tr style=\"text-align: right;\">\n",
161
- " <th></th>\n",
162
- " <th></th>\n",
163
- " <th></th>\n",
164
- " <th>pitches</th>\n",
165
- " <th>swings</th>\n",
166
- " <th>whiffs</th>\n",
167
- " <th>whiff_rate</th>\n",
168
- " </tr>\n",
169
- " <tr>\n",
170
- " <th>batter_id</th>\n",
171
- " <th>batter_name</th>\n",
172
- " <th>batter_hand</th>\n",
173
- " <th></th>\n",
174
- " <th></th>\n",
175
- " <th></th>\n",
176
- " <th></th>\n",
177
- " </tr>\n",
178
- " </thead>\n",
179
- " <tbody>\n",
180
- " <tr>\n",
181
- " <th rowspan=\"2\" valign=\"top\">699073</th>\n",
182
- " <th rowspan=\"2\" valign=\"top\">Thayron Liranzo</th>\n",
183
- " <th>L</th>\n",
184
- " <td>1344</td>\n",
185
- " <td>554</td>\n",
186
- " <td>189</td>\n",
187
- " <td>0.341155</td>\n",
188
- " </tr>\n",
189
- " <tr>\n",
190
- " <th>R</th>\n",
191
- " <td>343</td>\n",
192
- " <td>160</td>\n",
193
- " <td>60</td>\n",
194
- " <td>0.375</td>\n",
195
- " </tr>\n",
196
- " </tbody>\n",
197
- "</table>\n",
198
- "</div>"
199
- ],
200
- "text/plain": [
201
- " pitches swings whiffs whiff_rate\n",
202
- "batter_id batter_name batter_hand \n",
203
- "699073 Thayron Liranzo L 1344 554 189 0.341155\n",
204
- " R 343 160 60 0.375"
205
- ]
206
- },
207
- "execution_count": 20,
208
- "metadata": {},
209
- "output_type": "execute_result"
210
- }
211
- ],
212
- "source": [
213
- "tl_df"
214
- ]
215
- }
216
- ],
217
- "metadata": {
218
- "kernelspec": {
219
- "display_name": "Python 3",
220
- "language": "python",
221
- "name": "python3"
222
- },
223
- "language_info": {
224
- "codemirror_mode": {
225
- "name": "ipython",
226
- "version": 3
227
- },
228
- "file_extension": ".py",
229
- "mimetype": "text/x-python",
230
- "name": "python",
231
- "nbconvert_exporter": "python",
232
- "pygments_lexer": "ipython3",
233
- "version": "3.9.13"
234
- }
235
- },
236
- "nbformat": 4,
237
- "nbformat_minor": 2
238
- }