OmPrakashSingh1704 commited on
Commit
d6e2b99
1 Parent(s): d5bcd11

Upload Train_Test.ipynb

Browse files
Files changed (1) hide show
  1. Train_Test.ipynb +1316 -0
Train_Test.ipynb ADDED
@@ -0,0 +1,1316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "id": "initial_id",
6
+ "metadata": {
7
+ "ExecuteTime": {
8
+ "end_time": "2024-07-30T12:35:06.718909Z",
9
+ "start_time": "2024-07-30T12:35:06.081202Z"
10
+ }
11
+ },
12
+ "source": [
13
+ "import pandas as pd\n",
14
+ "import numpy as np"
15
+ ],
16
+ "outputs": [],
17
+ "execution_count": 1
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "id": "13ad721e84c34936",
22
+ "metadata": {
23
+ "ExecuteTime": {
24
+ "end_time": "2024-07-30T12:35:08.731721Z",
25
+ "start_time": "2024-07-30T12:35:06.720903Z"
26
+ }
27
+ },
28
+ "source": [
29
+ "df=pd.read_csv(r'D:\\pynb\\Walmart\\KNN\\archive\\WMT_Grocery_202209.csv')\n",
30
+ "df"
31
+ ],
32
+ "outputs": [
33
+ {
34
+ "name": "stderr",
35
+ "output_type": "stream",
36
+ "text": [
37
+ "C:\\Users\\thaku\\AppData\\Local\\Temp\\ipykernel_13136\\1153799610.py:1: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
38
+ " df=pd.read_csv(r'D:\\pynb\\Walmart\\KNN\\archive\\WMT_Grocery_202209.csv')\n"
39
+ ]
40
+ },
41
+ {
42
+ "data": {
43
+ "text/plain": [
44
+ " SHIPPING_LOCATION DEPARTMENT CATEGORY SUBCATEGORY \\\n",
45
+ "0 79936 Deli Hummus, Dips, & Salsa NaN \n",
46
+ "1 79936 Deli Hummus, Dips, & Salsa NaN \n",
47
+ "2 79936 Deli Hummus, Dips, & Salsa NaN \n",
48
+ "3 79936 Deli Hummus, Dips, & Salsa NaN \n",
49
+ "4 79936 Deli Hummus, Dips, & Salsa NaN \n",
50
+ "... ... ... ... ... \n",
51
+ "568529 70072 Alcohol Wine White Wine \n",
52
+ "568530 70072 Alcohol Wine White Wine \n",
53
+ "568531 70072 Alcohol Wine White Wine \n",
54
+ "568532 70072 Alcohol Wine White Wine \n",
55
+ "568533 70072 Alcohol Wine White Wine \n",
56
+ "\n",
57
+ " BREADCRUMBS SKU \\\n",
58
+ "0 Deli/Hummus, Dips, & Salsa 110895339 \n",
59
+ "1 Deli/Hummus, Dips, & Salsa 105455228 \n",
60
+ "2 Deli/Hummus, Dips, & Salsa 128642379 \n",
61
+ "3 Deli/Hummus, Dips, & Salsa 366126367 \n",
62
+ "4 Deli/Hummus, Dips, & Salsa 160090316 \n",
63
+ "... ... ... \n",
64
+ "568529 Alcohol/Wine 593600139 \n",
65
+ "568530 Alcohol/Wine 333403243 \n",
66
+ "568531 Alcohol/Wine 526588325 \n",
67
+ "568532 Alcohol/Wine 286992782 \n",
68
+ "568533 Alcohol/Wine 160015930 \n",
69
+ "\n",
70
+ " PRODUCT_URL \\\n",
71
+ "0 https://www.walmart.com/ip/Marketside-Roasted-... \n",
72
+ "1 https://www.walmart.com/ip/Marketside-Roasted-... \n",
73
+ "2 https://www.walmart.com/ip/Marketside-Classic-... \n",
74
+ "3 https://www.walmart.com/ip/Marketside-Everythi... \n",
75
+ "4 https://www.walmart.com/ip/Price-s-Jalapeno-Di... \n",
76
+ "... ... \n",
77
+ "568529 https://www.walmart.com/ip/Farm-Fresh-Blueberr... \n",
78
+ "568530 https://www.walmart.com/ip/Farm-Fresh-Peach-Mo... \n",
79
+ "568531 https://www.walmart.com/ip/Farm-Fresh-Raspberr... \n",
80
+ "568532 https://www.walmart.com/ip/Farm-Fresh-Mango-Mo... \n",
81
+ "568533 https://www.walmart.com/ip/Ole-Orleans-Heritag... \n",
82
+ "\n",
83
+ " PRODUCT_NAME BRAND \\\n",
84
+ "0 Marketside Roasted Red Pepper Hummus, 10 Oz Marketside \n",
85
+ "1 Marketside Roasted Garlic Hummus, 10 Oz Marketside \n",
86
+ "2 Marketside Classic Hummus, 10 Oz Marketside \n",
87
+ "3 Marketside Everything Hummus, 10 oz Marketside \n",
88
+ "4 Price's Jalapeno Dip, 12 Oz. Price's \n",
89
+ "... ... ... \n",
90
+ "568529 Farm Fresh Blueberry Moscato 750ml Farm Fresh Wine Company \n",
91
+ "568530 Farm Fresh Peach Moscato 750 Ml Farm Fresh Wine Company \n",
92
+ "568531 Farm Fresh Raspberry Moscato 750ml Farm Fresh Wine Company \n",
93
+ "568532 Farm Fresh Mango Moscato 750ml Farm Fresh Wine Company \n",
94
+ "568533 Ole Orleans Heritage Riesling 750ml Ole Orleans \n",
95
+ "\n",
96
+ " PRICE_RETAIL PRICE_CURRENT PRODUCT_SIZE PROMOTION \\\n",
97
+ "0 2.67 2.67 10 NaN \n",
98
+ "1 2.67 2.67 10 NaN \n",
99
+ "2 2.67 2.67 10 NaN \n",
100
+ "3 2.67 2.67 10 NaN \n",
101
+ "4 3.12 3.12 12 NaN \n",
102
+ "... ... ... ... ... \n",
103
+ "568529 9.98 9.98 750 NaN \n",
104
+ "568530 9.98 9.98 750 NaN \n",
105
+ "568531 9.98 9.98 750 NaN \n",
106
+ "568532 9.98 9.98 750 NaN \n",
107
+ "568533 18.98 18.98 750 NaN \n",
108
+ "\n",
109
+ " RunDate tid \n",
110
+ "0 2022-09-11 21:20:04 16163804 \n",
111
+ "1 2022-09-11 21:20:04 16163805 \n",
112
+ "2 2022-09-11 21:20:04 16163806 \n",
113
+ "3 2022-09-11 21:20:04 16163807 \n",
114
+ "4 2022-09-11 21:20:04 16163808 \n",
115
+ "... ... ... \n",
116
+ "568529 2022-09-11 21:20:04 16732333 \n",
117
+ "568530 2022-09-11 21:20:04 16732334 \n",
118
+ "568531 2022-09-11 21:20:04 16732335 \n",
119
+ "568532 2022-09-11 21:20:04 16732336 \n",
120
+ "568533 2022-09-11 21:20:04 16732337 \n",
121
+ "\n",
122
+ "[568534 rows x 15 columns]"
123
+ ],
124
+ "text/html": [
125
+ "<div>\n",
126
+ "<style scoped>\n",
127
+ " .dataframe tbody tr th:only-of-type {\n",
128
+ " vertical-align: middle;\n",
129
+ " }\n",
130
+ "\n",
131
+ " .dataframe tbody tr th {\n",
132
+ " vertical-align: top;\n",
133
+ " }\n",
134
+ "\n",
135
+ " .dataframe thead th {\n",
136
+ " text-align: right;\n",
137
+ " }\n",
138
+ "</style>\n",
139
+ "<table border=\"1\" class=\"dataframe\">\n",
140
+ " <thead>\n",
141
+ " <tr style=\"text-align: right;\">\n",
142
+ " <th></th>\n",
143
+ " <th>SHIPPING_LOCATION</th>\n",
144
+ " <th>DEPARTMENT</th>\n",
145
+ " <th>CATEGORY</th>\n",
146
+ " <th>SUBCATEGORY</th>\n",
147
+ " <th>BREADCRUMBS</th>\n",
148
+ " <th>SKU</th>\n",
149
+ " <th>PRODUCT_URL</th>\n",
150
+ " <th>PRODUCT_NAME</th>\n",
151
+ " <th>BRAND</th>\n",
152
+ " <th>PRICE_RETAIL</th>\n",
153
+ " <th>PRICE_CURRENT</th>\n",
154
+ " <th>PRODUCT_SIZE</th>\n",
155
+ " <th>PROMOTION</th>\n",
156
+ " <th>RunDate</th>\n",
157
+ " <th>tid</th>\n",
158
+ " </tr>\n",
159
+ " </thead>\n",
160
+ " <tbody>\n",
161
+ " <tr>\n",
162
+ " <th>0</th>\n",
163
+ " <td>79936</td>\n",
164
+ " <td>Deli</td>\n",
165
+ " <td>Hummus, Dips, &amp; Salsa</td>\n",
166
+ " <td>NaN</td>\n",
167
+ " <td>Deli/Hummus, Dips, &amp; Salsa</td>\n",
168
+ " <td>110895339</td>\n",
169
+ " <td>https://www.walmart.com/ip/Marketside-Roasted-...</td>\n",
170
+ " <td>Marketside Roasted Red Pepper Hummus, 10 Oz</td>\n",
171
+ " <td>Marketside</td>\n",
172
+ " <td>2.67</td>\n",
173
+ " <td>2.67</td>\n",
174
+ " <td>10</td>\n",
175
+ " <td>NaN</td>\n",
176
+ " <td>2022-09-11 21:20:04</td>\n",
177
+ " <td>16163804</td>\n",
178
+ " </tr>\n",
179
+ " <tr>\n",
180
+ " <th>1</th>\n",
181
+ " <td>79936</td>\n",
182
+ " <td>Deli</td>\n",
183
+ " <td>Hummus, Dips, &amp; Salsa</td>\n",
184
+ " <td>NaN</td>\n",
185
+ " <td>Deli/Hummus, Dips, &amp; Salsa</td>\n",
186
+ " <td>105455228</td>\n",
187
+ " <td>https://www.walmart.com/ip/Marketside-Roasted-...</td>\n",
188
+ " <td>Marketside Roasted Garlic Hummus, 10 Oz</td>\n",
189
+ " <td>Marketside</td>\n",
190
+ " <td>2.67</td>\n",
191
+ " <td>2.67</td>\n",
192
+ " <td>10</td>\n",
193
+ " <td>NaN</td>\n",
194
+ " <td>2022-09-11 21:20:04</td>\n",
195
+ " <td>16163805</td>\n",
196
+ " </tr>\n",
197
+ " <tr>\n",
198
+ " <th>2</th>\n",
199
+ " <td>79936</td>\n",
200
+ " <td>Deli</td>\n",
201
+ " <td>Hummus, Dips, &amp; Salsa</td>\n",
202
+ " <td>NaN</td>\n",
203
+ " <td>Deli/Hummus, Dips, &amp; Salsa</td>\n",
204
+ " <td>128642379</td>\n",
205
+ " <td>https://www.walmart.com/ip/Marketside-Classic-...</td>\n",
206
+ " <td>Marketside Classic Hummus, 10 Oz</td>\n",
207
+ " <td>Marketside</td>\n",
208
+ " <td>2.67</td>\n",
209
+ " <td>2.67</td>\n",
210
+ " <td>10</td>\n",
211
+ " <td>NaN</td>\n",
212
+ " <td>2022-09-11 21:20:04</td>\n",
213
+ " <td>16163806</td>\n",
214
+ " </tr>\n",
215
+ " <tr>\n",
216
+ " <th>3</th>\n",
217
+ " <td>79936</td>\n",
218
+ " <td>Deli</td>\n",
219
+ " <td>Hummus, Dips, &amp; Salsa</td>\n",
220
+ " <td>NaN</td>\n",
221
+ " <td>Deli/Hummus, Dips, &amp; Salsa</td>\n",
222
+ " <td>366126367</td>\n",
223
+ " <td>https://www.walmart.com/ip/Marketside-Everythi...</td>\n",
224
+ " <td>Marketside Everything Hummus, 10 oz</td>\n",
225
+ " <td>Marketside</td>\n",
226
+ " <td>2.67</td>\n",
227
+ " <td>2.67</td>\n",
228
+ " <td>10</td>\n",
229
+ " <td>NaN</td>\n",
230
+ " <td>2022-09-11 21:20:04</td>\n",
231
+ " <td>16163807</td>\n",
232
+ " </tr>\n",
233
+ " <tr>\n",
234
+ " <th>4</th>\n",
235
+ " <td>79936</td>\n",
236
+ " <td>Deli</td>\n",
237
+ " <td>Hummus, Dips, &amp; Salsa</td>\n",
238
+ " <td>NaN</td>\n",
239
+ " <td>Deli/Hummus, Dips, &amp; Salsa</td>\n",
240
+ " <td>160090316</td>\n",
241
+ " <td>https://www.walmart.com/ip/Price-s-Jalapeno-Di...</td>\n",
242
+ " <td>Price's Jalapeno Dip, 12 Oz.</td>\n",
243
+ " <td>Price's</td>\n",
244
+ " <td>3.12</td>\n",
245
+ " <td>3.12</td>\n",
246
+ " <td>12</td>\n",
247
+ " <td>NaN</td>\n",
248
+ " <td>2022-09-11 21:20:04</td>\n",
249
+ " <td>16163808</td>\n",
250
+ " </tr>\n",
251
+ " <tr>\n",
252
+ " <th>...</th>\n",
253
+ " <td>...</td>\n",
254
+ " <td>...</td>\n",
255
+ " <td>...</td>\n",
256
+ " <td>...</td>\n",
257
+ " <td>...</td>\n",
258
+ " <td>...</td>\n",
259
+ " <td>...</td>\n",
260
+ " <td>...</td>\n",
261
+ " <td>...</td>\n",
262
+ " <td>...</td>\n",
263
+ " <td>...</td>\n",
264
+ " <td>...</td>\n",
265
+ " <td>...</td>\n",
266
+ " <td>...</td>\n",
267
+ " <td>...</td>\n",
268
+ " </tr>\n",
269
+ " <tr>\n",
270
+ " <th>568529</th>\n",
271
+ " <td>70072</td>\n",
272
+ " <td>Alcohol</td>\n",
273
+ " <td>Wine</td>\n",
274
+ " <td>White Wine</td>\n",
275
+ " <td>Alcohol/Wine</td>\n",
276
+ " <td>593600139</td>\n",
277
+ " <td>https://www.walmart.com/ip/Farm-Fresh-Blueberr...</td>\n",
278
+ " <td>Farm Fresh Blueberry Moscato 750ml</td>\n",
279
+ " <td>Farm Fresh Wine Company</td>\n",
280
+ " <td>9.98</td>\n",
281
+ " <td>9.98</td>\n",
282
+ " <td>750</td>\n",
283
+ " <td>NaN</td>\n",
284
+ " <td>2022-09-11 21:20:04</td>\n",
285
+ " <td>16732333</td>\n",
286
+ " </tr>\n",
287
+ " <tr>\n",
288
+ " <th>568530</th>\n",
289
+ " <td>70072</td>\n",
290
+ " <td>Alcohol</td>\n",
291
+ " <td>Wine</td>\n",
292
+ " <td>White Wine</td>\n",
293
+ " <td>Alcohol/Wine</td>\n",
294
+ " <td>333403243</td>\n",
295
+ " <td>https://www.walmart.com/ip/Farm-Fresh-Peach-Mo...</td>\n",
296
+ " <td>Farm Fresh Peach Moscato 750 Ml</td>\n",
297
+ " <td>Farm Fresh Wine Company</td>\n",
298
+ " <td>9.98</td>\n",
299
+ " <td>9.98</td>\n",
300
+ " <td>750</td>\n",
301
+ " <td>NaN</td>\n",
302
+ " <td>2022-09-11 21:20:04</td>\n",
303
+ " <td>16732334</td>\n",
304
+ " </tr>\n",
305
+ " <tr>\n",
306
+ " <th>568531</th>\n",
307
+ " <td>70072</td>\n",
308
+ " <td>Alcohol</td>\n",
309
+ " <td>Wine</td>\n",
310
+ " <td>White Wine</td>\n",
311
+ " <td>Alcohol/Wine</td>\n",
312
+ " <td>526588325</td>\n",
313
+ " <td>https://www.walmart.com/ip/Farm-Fresh-Raspberr...</td>\n",
314
+ " <td>Farm Fresh Raspberry Moscato 750ml</td>\n",
315
+ " <td>Farm Fresh Wine Company</td>\n",
316
+ " <td>9.98</td>\n",
317
+ " <td>9.98</td>\n",
318
+ " <td>750</td>\n",
319
+ " <td>NaN</td>\n",
320
+ " <td>2022-09-11 21:20:04</td>\n",
321
+ " <td>16732335</td>\n",
322
+ " </tr>\n",
323
+ " <tr>\n",
324
+ " <th>568532</th>\n",
325
+ " <td>70072</td>\n",
326
+ " <td>Alcohol</td>\n",
327
+ " <td>Wine</td>\n",
328
+ " <td>White Wine</td>\n",
329
+ " <td>Alcohol/Wine</td>\n",
330
+ " <td>286992782</td>\n",
331
+ " <td>https://www.walmart.com/ip/Farm-Fresh-Mango-Mo...</td>\n",
332
+ " <td>Farm Fresh Mango Moscato 750ml</td>\n",
333
+ " <td>Farm Fresh Wine Company</td>\n",
334
+ " <td>9.98</td>\n",
335
+ " <td>9.98</td>\n",
336
+ " <td>750</td>\n",
337
+ " <td>NaN</td>\n",
338
+ " <td>2022-09-11 21:20:04</td>\n",
339
+ " <td>16732336</td>\n",
340
+ " </tr>\n",
341
+ " <tr>\n",
342
+ " <th>568533</th>\n",
343
+ " <td>70072</td>\n",
344
+ " <td>Alcohol</td>\n",
345
+ " <td>Wine</td>\n",
346
+ " <td>White Wine</td>\n",
347
+ " <td>Alcohol/Wine</td>\n",
348
+ " <td>160015930</td>\n",
349
+ " <td>https://www.walmart.com/ip/Ole-Orleans-Heritag...</td>\n",
350
+ " <td>Ole Orleans Heritage Riesling 750ml</td>\n",
351
+ " <td>Ole Orleans</td>\n",
352
+ " <td>18.98</td>\n",
353
+ " <td>18.98</td>\n",
354
+ " <td>750</td>\n",
355
+ " <td>NaN</td>\n",
356
+ " <td>2022-09-11 21:20:04</td>\n",
357
+ " <td>16732337</td>\n",
358
+ " </tr>\n",
359
+ " </tbody>\n",
360
+ "</table>\n",
361
+ "<p>568534 rows × 15 columns</p>\n",
362
+ "</div>"
363
+ ]
364
+ },
365
+ "execution_count": 2,
366
+ "metadata": {},
367
+ "output_type": "execute_result"
368
+ }
369
+ ],
370
+ "execution_count": 2
371
+ },
372
+ {
373
+ "cell_type": "code",
374
+ "id": "e65e1fbd9770b4",
375
+ "metadata": {
376
+ "ExecuteTime": {
377
+ "end_time": "2024-07-30T12:35:08.778596Z",
378
+ "start_time": "2024-07-30T12:35:08.732718Z"
379
+ }
380
+ },
381
+ "source": [
382
+ "df=df[['PRODUCT_NAME','DEPARTMENT','CATEGORY','BREADCRUMBS','BRAND']]\n",
383
+ "df['PRODUCT']=df['PRODUCT_NAME']"
384
+ ],
385
+ "outputs": [
386
+ {
387
+ "name": "stderr",
388
+ "output_type": "stream",
389
+ "text": [
390
+ "C:\\Users\\thaku\\AppData\\Local\\Temp\\ipykernel_13136\\2027505516.py:2: SettingWithCopyWarning: \n",
391
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
392
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
393
+ "\n",
394
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
395
+ " df['PRODUCT']=df['PRODUCT_NAME']\n"
396
+ ]
397
+ }
398
+ ],
399
+ "execution_count": 3
400
+ },
401
+ {
402
+ "metadata": {
403
+ "ExecuteTime": {
404
+ "end_time": "2024-07-30T12:35:08.903031Z",
405
+ "start_time": "2024-07-30T12:35:08.780590Z"
406
+ }
407
+ },
408
+ "cell_type": "code",
409
+ "source": "df.isnull().sum()",
410
+ "id": "fa1760637e52808f",
411
+ "outputs": [
412
+ {
413
+ "data": {
414
+ "text/plain": [
415
+ "PRODUCT_NAME 0\n",
416
+ "DEPARTMENT 0\n",
417
+ "CATEGORY 0\n",
418
+ "BREADCRUMBS 0\n",
419
+ "BRAND 27\n",
420
+ "PRODUCT 0\n",
421
+ "dtype: int64"
422
+ ]
423
+ },
424
+ "execution_count": 4,
425
+ "metadata": {},
426
+ "output_type": "execute_result"
427
+ }
428
+ ],
429
+ "execution_count": 4
430
+ },
431
+ {
432
+ "metadata": {
433
+ "ExecuteTime": {
434
+ "end_time": "2024-07-30T12:35:09.045434Z",
435
+ "start_time": "2024-07-30T12:35:08.905158Z"
436
+ }
437
+ },
438
+ "cell_type": "code",
439
+ "source": "df = df[df['BRAND'].apply(lambda x: isinstance(x, str))]",
440
+ "id": "e33c2a31c09617ba",
441
+ "outputs": [],
442
+ "execution_count": 5
443
+ },
444
+ {
445
+ "cell_type": "code",
446
+ "id": "dc247f93acd769a",
447
+ "metadata": {
448
+ "ExecuteTime": {
449
+ "end_time": "2024-07-30T12:35:09.406463Z",
450
+ "start_time": "2024-07-30T12:35:09.046431Z"
451
+ }
452
+ },
453
+ "source": [
454
+ "df.dropna()\n",
455
+ "def is_string(value):\n",
456
+ " return isinstance(value, str)\n",
457
+ "\n",
458
+ "# Identify rows in 'BRAND' column where the value is not a string\n",
459
+ "non_string_rows = df[~df['BRAND'].apply(is_string)].index\n",
460
+ "print(non_string_rows)\n",
461
+ "# Drop those rows from the DataFrame\n",
462
+ "df.drop(index=non_string_rows)"
463
+ ],
464
+ "outputs": [
465
+ {
466
+ "name": "stdout",
467
+ "output_type": "stream",
468
+ "text": [
469
+ "Index([], dtype='int64')\n"
470
+ ]
471
+ },
472
+ {
473
+ "data": {
474
+ "text/plain": [
475
+ " PRODUCT_NAME DEPARTMENT \\\n",
476
+ "0 Marketside Roasted Red Pepper Hummus, 10 Oz Deli \n",
477
+ "1 Marketside Roasted Garlic Hummus, 10 Oz Deli \n",
478
+ "2 Marketside Classic Hummus, 10 Oz Deli \n",
479
+ "3 Marketside Everything Hummus, 10 oz Deli \n",
480
+ "4 Price's Jalapeno Dip, 12 Oz. Deli \n",
481
+ "... ... ... \n",
482
+ "568529 Farm Fresh Blueberry Moscato 750ml Alcohol \n",
483
+ "568530 Farm Fresh Peach Moscato 750 Ml Alcohol \n",
484
+ "568531 Farm Fresh Raspberry Moscato 750ml Alcohol \n",
485
+ "568532 Farm Fresh Mango Moscato 750ml Alcohol \n",
486
+ "568533 Ole Orleans Heritage Riesling 750ml Alcohol \n",
487
+ "\n",
488
+ " CATEGORY BREADCRUMBS \\\n",
489
+ "0 Hummus, Dips, & Salsa Deli/Hummus, Dips, & Salsa \n",
490
+ "1 Hummus, Dips, & Salsa Deli/Hummus, Dips, & Salsa \n",
491
+ "2 Hummus, Dips, & Salsa Deli/Hummus, Dips, & Salsa \n",
492
+ "3 Hummus, Dips, & Salsa Deli/Hummus, Dips, & Salsa \n",
493
+ "4 Hummus, Dips, & Salsa Deli/Hummus, Dips, & Salsa \n",
494
+ "... ... ... \n",
495
+ "568529 Wine Alcohol/Wine \n",
496
+ "568530 Wine Alcohol/Wine \n",
497
+ "568531 Wine Alcohol/Wine \n",
498
+ "568532 Wine Alcohol/Wine \n",
499
+ "568533 Wine Alcohol/Wine \n",
500
+ "\n",
501
+ " BRAND PRODUCT \n",
502
+ "0 Marketside Marketside Roasted Red Pepper Hummus, 10 Oz \n",
503
+ "1 Marketside Marketside Roasted Garlic Hummus, 10 Oz \n",
504
+ "2 Marketside Marketside Classic Hummus, 10 Oz \n",
505
+ "3 Marketside Marketside Everything Hummus, 10 oz \n",
506
+ "4 Price's Price's Jalapeno Dip, 12 Oz. \n",
507
+ "... ... ... \n",
508
+ "568529 Farm Fresh Wine Company Farm Fresh Blueberry Moscato 750ml \n",
509
+ "568530 Farm Fresh Wine Company Farm Fresh Peach Moscato 750 Ml \n",
510
+ "568531 Farm Fresh Wine Company Farm Fresh Raspberry Moscato 750ml \n",
511
+ "568532 Farm Fresh Wine Company Farm Fresh Mango Moscato 750ml \n",
512
+ "568533 Ole Orleans Ole Orleans Heritage Riesling 750ml \n",
513
+ "\n",
514
+ "[568507 rows x 6 columns]"
515
+ ],
516
+ "text/html": [
517
+ "<div>\n",
518
+ "<style scoped>\n",
519
+ " .dataframe tbody tr th:only-of-type {\n",
520
+ " vertical-align: middle;\n",
521
+ " }\n",
522
+ "\n",
523
+ " .dataframe tbody tr th {\n",
524
+ " vertical-align: top;\n",
525
+ " }\n",
526
+ "\n",
527
+ " .dataframe thead th {\n",
528
+ " text-align: right;\n",
529
+ " }\n",
530
+ "</style>\n",
531
+ "<table border=\"1\" class=\"dataframe\">\n",
532
+ " <thead>\n",
533
+ " <tr style=\"text-align: right;\">\n",
534
+ " <th></th>\n",
535
+ " <th>PRODUCT_NAME</th>\n",
536
+ " <th>DEPARTMENT</th>\n",
537
+ " <th>CATEGORY</th>\n",
538
+ " <th>BREADCRUMBS</th>\n",
539
+ " <th>BRAND</th>\n",
540
+ " <th>PRODUCT</th>\n",
541
+ " </tr>\n",
542
+ " </thead>\n",
543
+ " <tbody>\n",
544
+ " <tr>\n",
545
+ " <th>0</th>\n",
546
+ " <td>Marketside Roasted Red Pepper Hummus, 10 Oz</td>\n",
547
+ " <td>Deli</td>\n",
548
+ " <td>Hummus, Dips, &amp; Salsa</td>\n",
549
+ " <td>Deli/Hummus, Dips, &amp; Salsa</td>\n",
550
+ " <td>Marketside</td>\n",
551
+ " <td>Marketside Roasted Red Pepper Hummus, 10 Oz</td>\n",
552
+ " </tr>\n",
553
+ " <tr>\n",
554
+ " <th>1</th>\n",
555
+ " <td>Marketside Roasted Garlic Hummus, 10 Oz</td>\n",
556
+ " <td>Deli</td>\n",
557
+ " <td>Hummus, Dips, &amp; Salsa</td>\n",
558
+ " <td>Deli/Hummus, Dips, &amp; Salsa</td>\n",
559
+ " <td>Marketside</td>\n",
560
+ " <td>Marketside Roasted Garlic Hummus, 10 Oz</td>\n",
561
+ " </tr>\n",
562
+ " <tr>\n",
563
+ " <th>2</th>\n",
564
+ " <td>Marketside Classic Hummus, 10 Oz</td>\n",
565
+ " <td>Deli</td>\n",
566
+ " <td>Hummus, Dips, &amp; Salsa</td>\n",
567
+ " <td>Deli/Hummus, Dips, &amp; Salsa</td>\n",
568
+ " <td>Marketside</td>\n",
569
+ " <td>Marketside Classic Hummus, 10 Oz</td>\n",
570
+ " </tr>\n",
571
+ " <tr>\n",
572
+ " <th>3</th>\n",
573
+ " <td>Marketside Everything Hummus, 10 oz</td>\n",
574
+ " <td>Deli</td>\n",
575
+ " <td>Hummus, Dips, &amp; Salsa</td>\n",
576
+ " <td>Deli/Hummus, Dips, &amp; Salsa</td>\n",
577
+ " <td>Marketside</td>\n",
578
+ " <td>Marketside Everything Hummus, 10 oz</td>\n",
579
+ " </tr>\n",
580
+ " <tr>\n",
581
+ " <th>4</th>\n",
582
+ " <td>Price's Jalapeno Dip, 12 Oz.</td>\n",
583
+ " <td>Deli</td>\n",
584
+ " <td>Hummus, Dips, &amp; Salsa</td>\n",
585
+ " <td>Deli/Hummus, Dips, &amp; Salsa</td>\n",
586
+ " <td>Price's</td>\n",
587
+ " <td>Price's Jalapeno Dip, 12 Oz.</td>\n",
588
+ " </tr>\n",
589
+ " <tr>\n",
590
+ " <th>...</th>\n",
591
+ " <td>...</td>\n",
592
+ " <td>...</td>\n",
593
+ " <td>...</td>\n",
594
+ " <td>...</td>\n",
595
+ " <td>...</td>\n",
596
+ " <td>...</td>\n",
597
+ " </tr>\n",
598
+ " <tr>\n",
599
+ " <th>568529</th>\n",
600
+ " <td>Farm Fresh Blueberry Moscato 750ml</td>\n",
601
+ " <td>Alcohol</td>\n",
602
+ " <td>Wine</td>\n",
603
+ " <td>Alcohol/Wine</td>\n",
604
+ " <td>Farm Fresh Wine Company</td>\n",
605
+ " <td>Farm Fresh Blueberry Moscato 750ml</td>\n",
606
+ " </tr>\n",
607
+ " <tr>\n",
608
+ " <th>568530</th>\n",
609
+ " <td>Farm Fresh Peach Moscato 750 Ml</td>\n",
610
+ " <td>Alcohol</td>\n",
611
+ " <td>Wine</td>\n",
612
+ " <td>Alcohol/Wine</td>\n",
613
+ " <td>Farm Fresh Wine Company</td>\n",
614
+ " <td>Farm Fresh Peach Moscato 750 Ml</td>\n",
615
+ " </tr>\n",
616
+ " <tr>\n",
617
+ " <th>568531</th>\n",
618
+ " <td>Farm Fresh Raspberry Moscato 750ml</td>\n",
619
+ " <td>Alcohol</td>\n",
620
+ " <td>Wine</td>\n",
621
+ " <td>Alcohol/Wine</td>\n",
622
+ " <td>Farm Fresh Wine Company</td>\n",
623
+ " <td>Farm Fresh Raspberry Moscato 750ml</td>\n",
624
+ " </tr>\n",
625
+ " <tr>\n",
626
+ " <th>568532</th>\n",
627
+ " <td>Farm Fresh Mango Moscato 750ml</td>\n",
628
+ " <td>Alcohol</td>\n",
629
+ " <td>Wine</td>\n",
630
+ " <td>Alcohol/Wine</td>\n",
631
+ " <td>Farm Fresh Wine Company</td>\n",
632
+ " <td>Farm Fresh Mango Moscato 750ml</td>\n",
633
+ " </tr>\n",
634
+ " <tr>\n",
635
+ " <th>568533</th>\n",
636
+ " <td>Ole Orleans Heritage Riesling 750ml</td>\n",
637
+ " <td>Alcohol</td>\n",
638
+ " <td>Wine</td>\n",
639
+ " <td>Alcohol/Wine</td>\n",
640
+ " <td>Ole Orleans</td>\n",
641
+ " <td>Ole Orleans Heritage Riesling 750ml</td>\n",
642
+ " </tr>\n",
643
+ " </tbody>\n",
644
+ "</table>\n",
645
+ "<p>568507 rows × 6 columns</p>\n",
646
+ "</div>"
647
+ ]
648
+ },
649
+ "execution_count": 6,
650
+ "metadata": {},
651
+ "output_type": "execute_result"
652
+ }
653
+ ],
654
+ "execution_count": 6
655
+ },
656
+ {
657
+ "cell_type": "code",
658
+ "id": "3478dbf45d0de013",
659
+ "metadata": {
660
+ "ExecuteTime": {
661
+ "end_time": "2024-07-30T12:35:09.421458Z",
662
+ "start_time": "2024-07-30T12:35:09.407461Z"
663
+ }
664
+ },
665
+ "source": [
666
+ "import ast,re\n",
667
+ "def preprocess_text(text):\n",
668
+ " # Remove non-alphabet characters and extra spaces\n",
669
+ " text = re.sub(r'[^a-zA-Z\\s]', '', text)\n",
670
+ " text = re.sub(r'\\s+', ' ', text).strip()\n",
671
+ " return text.lower()"
672
+ ],
673
+ "outputs": [],
674
+ "execution_count": 7
675
+ },
676
+ {
677
+ "cell_type": "code",
678
+ "id": "47b4b465b97821bb",
679
+ "metadata": {
680
+ "ExecuteTime": {
681
+ "end_time": "2024-07-30T12:35:17.915264Z",
682
+ "start_time": "2024-07-30T12:35:09.422456Z"
683
+ }
684
+ },
685
+ "source": [
686
+ "df['PRODUCT']=df['PRODUCT'].apply(preprocess_text)\n",
687
+ "df['DEPARTMENT']=df['DEPARTMENT'].apply(preprocess_text)\n",
688
+ "df['CATEGORY']=df['CATEGORY'].apply(preprocess_text)\n",
689
+ "df['BREADCRUMBS']=df['BREADCRUMBS'].apply(preprocess_text)\n",
690
+ "df['BRAND']=df['BRAND'].apply(preprocess_text)"
691
+ ],
692
+ "outputs": [],
693
+ "execution_count": 8
694
+ },
695
+ {
696
+ "cell_type": "code",
697
+ "id": "bca9973bcd761828",
698
+ "metadata": {
699
+ "ExecuteTime": {
700
+ "end_time": "2024-07-30T12:35:21.683604Z",
701
+ "start_time": "2024-07-30T12:35:17.918253Z"
702
+ }
703
+ },
704
+ "source": [
705
+ "df['PRODUCT']=df['PRODUCT'].apply(lambda x:x.split())\n",
706
+ "df['DEPARTMENT']=df['DEPARTMENT'].apply(lambda x:x.split())\n",
707
+ "df['CATEGORY']=df['CATEGORY'].apply(lambda x:x.split())\n",
708
+ "df['BREADCRUMBS']=df['BREADCRUMBS'].apply(lambda x:x.split())\n",
709
+ "df['BRAND']=df['BRAND'].apply(lambda x:x.split())"
710
+ ],
711
+ "outputs": [],
712
+ "execution_count": 9
713
+ },
714
+ {
715
+ "cell_type": "code",
716
+ "id": "8240161bbf8dd746",
717
+ "metadata": {
718
+ "ExecuteTime": {
719
+ "end_time": "2024-07-30T12:35:24.181188Z",
720
+ "start_time": "2024-07-30T12:35:21.686396Z"
721
+ }
722
+ },
723
+ "source": "df['tags']=df['PRODUCT']+df['DEPARTMENT']+df['CATEGORY']+df['BREADCRUMBS']+df['BRAND']",
724
+ "outputs": [],
725
+ "execution_count": 10
726
+ },
727
+ {
728
+ "cell_type": "code",
729
+ "id": "620840a23eee5d5e",
730
+ "metadata": {
731
+ "ExecuteTime": {
732
+ "end_time": "2024-07-30T12:35:24.212213Z",
733
+ "start_time": "2024-07-30T12:35:24.182142Z"
734
+ }
735
+ },
736
+ "source": [
737
+ "df"
738
+ ],
739
+ "outputs": [
740
+ {
741
+ "data": {
742
+ "text/plain": [
743
+ " PRODUCT_NAME DEPARTMENT \\\n",
744
+ "0 Marketside Roasted Red Pepper Hummus, 10 Oz [deli] \n",
745
+ "1 Marketside Roasted Garlic Hummus, 10 Oz [deli] \n",
746
+ "2 Marketside Classic Hummus, 10 Oz [deli] \n",
747
+ "3 Marketside Everything Hummus, 10 oz [deli] \n",
748
+ "4 Price's Jalapeno Dip, 12 Oz. [deli] \n",
749
+ "... ... ... \n",
750
+ "568529 Farm Fresh Blueberry Moscato 750ml [alcohol] \n",
751
+ "568530 Farm Fresh Peach Moscato 750 Ml [alcohol] \n",
752
+ "568531 Farm Fresh Raspberry Moscato 750ml [alcohol] \n",
753
+ "568532 Farm Fresh Mango Moscato 750ml [alcohol] \n",
754
+ "568533 Ole Orleans Heritage Riesling 750ml [alcohol] \n",
755
+ "\n",
756
+ " CATEGORY BREADCRUMBS \\\n",
757
+ "0 [hummus, dips, salsa] [delihummus, dips, salsa] \n",
758
+ "1 [hummus, dips, salsa] [delihummus, dips, salsa] \n",
759
+ "2 [hummus, dips, salsa] [delihummus, dips, salsa] \n",
760
+ "3 [hummus, dips, salsa] [delihummus, dips, salsa] \n",
761
+ "4 [hummus, dips, salsa] [delihummus, dips, salsa] \n",
762
+ "... ... ... \n",
763
+ "568529 [wine] [alcoholwine] \n",
764
+ "568530 [wine] [alcoholwine] \n",
765
+ "568531 [wine] [alcoholwine] \n",
766
+ "568532 [wine] [alcoholwine] \n",
767
+ "568533 [wine] [alcoholwine] \n",
768
+ "\n",
769
+ " BRAND \\\n",
770
+ "0 [marketside] \n",
771
+ "1 [marketside] \n",
772
+ "2 [marketside] \n",
773
+ "3 [marketside] \n",
774
+ "4 [prices] \n",
775
+ "... ... \n",
776
+ "568529 [farm, fresh, wine, company] \n",
777
+ "568530 [farm, fresh, wine, company] \n",
778
+ "568531 [farm, fresh, wine, company] \n",
779
+ "568532 [farm, fresh, wine, company] \n",
780
+ "568533 [ole, orleans] \n",
781
+ "\n",
782
+ " PRODUCT \\\n",
783
+ "0 [marketside, roasted, red, pepper, hummus, oz] \n",
784
+ "1 [marketside, roasted, garlic, hummus, oz] \n",
785
+ "2 [marketside, classic, hummus, oz] \n",
786
+ "3 [marketside, everything, hummus, oz] \n",
787
+ "4 [prices, jalapeno, dip, oz] \n",
788
+ "... ... \n",
789
+ "568529 [farm, fresh, blueberry, moscato, ml] \n",
790
+ "568530 [farm, fresh, peach, moscato, ml] \n",
791
+ "568531 [farm, fresh, raspberry, moscato, ml] \n",
792
+ "568532 [farm, fresh, mango, moscato, ml] \n",
793
+ "568533 [ole, orleans, heritage, riesling, ml] \n",
794
+ "\n",
795
+ " tags \n",
796
+ "0 [marketside, roasted, red, pepper, hummus, oz,... \n",
797
+ "1 [marketside, roasted, garlic, hummus, oz, deli... \n",
798
+ "2 [marketside, classic, hummus, oz, deli, hummus... \n",
799
+ "3 [marketside, everything, hummus, oz, deli, hum... \n",
800
+ "4 [prices, jalapeno, dip, oz, deli, hummus, dips... \n",
801
+ "... ... \n",
802
+ "568529 [farm, fresh, blueberry, moscato, ml, alcohol,... \n",
803
+ "568530 [farm, fresh, peach, moscato, ml, alcohol, win... \n",
804
+ "568531 [farm, fresh, raspberry, moscato, ml, alcohol,... \n",
805
+ "568532 [farm, fresh, mango, moscato, ml, alcohol, win... \n",
806
+ "568533 [ole, orleans, heritage, riesling, ml, alcohol... \n",
807
+ "\n",
808
+ "[568507 rows x 7 columns]"
809
+ ],
810
+ "text/html": [
811
+ "<div>\n",
812
+ "<style scoped>\n",
813
+ " .dataframe tbody tr th:only-of-type {\n",
814
+ " vertical-align: middle;\n",
815
+ " }\n",
816
+ "\n",
817
+ " .dataframe tbody tr th {\n",
818
+ " vertical-align: top;\n",
819
+ " }\n",
820
+ "\n",
821
+ " .dataframe thead th {\n",
822
+ " text-align: right;\n",
823
+ " }\n",
824
+ "</style>\n",
825
+ "<table border=\"1\" class=\"dataframe\">\n",
826
+ " <thead>\n",
827
+ " <tr style=\"text-align: right;\">\n",
828
+ " <th></th>\n",
829
+ " <th>PRODUCT_NAME</th>\n",
830
+ " <th>DEPARTMENT</th>\n",
831
+ " <th>CATEGORY</th>\n",
832
+ " <th>BREADCRUMBS</th>\n",
833
+ " <th>BRAND</th>\n",
834
+ " <th>PRODUCT</th>\n",
835
+ " <th>tags</th>\n",
836
+ " </tr>\n",
837
+ " </thead>\n",
838
+ " <tbody>\n",
839
+ " <tr>\n",
840
+ " <th>0</th>\n",
841
+ " <td>Marketside Roasted Red Pepper Hummus, 10 Oz</td>\n",
842
+ " <td>[deli]</td>\n",
843
+ " <td>[hummus, dips, salsa]</td>\n",
844
+ " <td>[delihummus, dips, salsa]</td>\n",
845
+ " <td>[marketside]</td>\n",
846
+ " <td>[marketside, roasted, red, pepper, hummus, oz]</td>\n",
847
+ " <td>[marketside, roasted, red, pepper, hummus, oz,...</td>\n",
848
+ " </tr>\n",
849
+ " <tr>\n",
850
+ " <th>1</th>\n",
851
+ " <td>Marketside Roasted Garlic Hummus, 10 Oz</td>\n",
852
+ " <td>[deli]</td>\n",
853
+ " <td>[hummus, dips, salsa]</td>\n",
854
+ " <td>[delihummus, dips, salsa]</td>\n",
855
+ " <td>[marketside]</td>\n",
856
+ " <td>[marketside, roasted, garlic, hummus, oz]</td>\n",
857
+ " <td>[marketside, roasted, garlic, hummus, oz, deli...</td>\n",
858
+ " </tr>\n",
859
+ " <tr>\n",
860
+ " <th>2</th>\n",
861
+ " <td>Marketside Classic Hummus, 10 Oz</td>\n",
862
+ " <td>[deli]</td>\n",
863
+ " <td>[hummus, dips, salsa]</td>\n",
864
+ " <td>[delihummus, dips, salsa]</td>\n",
865
+ " <td>[marketside]</td>\n",
866
+ " <td>[marketside, classic, hummus, oz]</td>\n",
867
+ " <td>[marketside, classic, hummus, oz, deli, hummus...</td>\n",
868
+ " </tr>\n",
869
+ " <tr>\n",
870
+ " <th>3</th>\n",
871
+ " <td>Marketside Everything Hummus, 10 oz</td>\n",
872
+ " <td>[deli]</td>\n",
873
+ " <td>[hummus, dips, salsa]</td>\n",
874
+ " <td>[delihummus, dips, salsa]</td>\n",
875
+ " <td>[marketside]</td>\n",
876
+ " <td>[marketside, everything, hummus, oz]</td>\n",
877
+ " <td>[marketside, everything, hummus, oz, deli, hum...</td>\n",
878
+ " </tr>\n",
879
+ " <tr>\n",
880
+ " <th>4</th>\n",
881
+ " <td>Price's Jalapeno Dip, 12 Oz.</td>\n",
882
+ " <td>[deli]</td>\n",
883
+ " <td>[hummus, dips, salsa]</td>\n",
884
+ " <td>[delihummus, dips, salsa]</td>\n",
885
+ " <td>[prices]</td>\n",
886
+ " <td>[prices, jalapeno, dip, oz]</td>\n",
887
+ " <td>[prices, jalapeno, dip, oz, deli, hummus, dips...</td>\n",
888
+ " </tr>\n",
889
+ " <tr>\n",
890
+ " <th>...</th>\n",
891
+ " <td>...</td>\n",
892
+ " <td>...</td>\n",
893
+ " <td>...</td>\n",
894
+ " <td>...</td>\n",
895
+ " <td>...</td>\n",
896
+ " <td>...</td>\n",
897
+ " <td>...</td>\n",
898
+ " </tr>\n",
899
+ " <tr>\n",
900
+ " <th>568529</th>\n",
901
+ " <td>Farm Fresh Blueberry Moscato 750ml</td>\n",
902
+ " <td>[alcohol]</td>\n",
903
+ " <td>[wine]</td>\n",
904
+ " <td>[alcoholwine]</td>\n",
905
+ " <td>[farm, fresh, wine, company]</td>\n",
906
+ " <td>[farm, fresh, blueberry, moscato, ml]</td>\n",
907
+ " <td>[farm, fresh, blueberry, moscato, ml, alcohol,...</td>\n",
908
+ " </tr>\n",
909
+ " <tr>\n",
910
+ " <th>568530</th>\n",
911
+ " <td>Farm Fresh Peach Moscato 750 Ml</td>\n",
912
+ " <td>[alcohol]</td>\n",
913
+ " <td>[wine]</td>\n",
914
+ " <td>[alcoholwine]</td>\n",
915
+ " <td>[farm, fresh, wine, company]</td>\n",
916
+ " <td>[farm, fresh, peach, moscato, ml]</td>\n",
917
+ " <td>[farm, fresh, peach, moscato, ml, alcohol, win...</td>\n",
918
+ " </tr>\n",
919
+ " <tr>\n",
920
+ " <th>568531</th>\n",
921
+ " <td>Farm Fresh Raspberry Moscato 750ml</td>\n",
922
+ " <td>[alcohol]</td>\n",
923
+ " <td>[wine]</td>\n",
924
+ " <td>[alcoholwine]</td>\n",
925
+ " <td>[farm, fresh, wine, company]</td>\n",
926
+ " <td>[farm, fresh, raspberry, moscato, ml]</td>\n",
927
+ " <td>[farm, fresh, raspberry, moscato, ml, alcohol,...</td>\n",
928
+ " </tr>\n",
929
+ " <tr>\n",
930
+ " <th>568532</th>\n",
931
+ " <td>Farm Fresh Mango Moscato 750ml</td>\n",
932
+ " <td>[alcohol]</td>\n",
933
+ " <td>[wine]</td>\n",
934
+ " <td>[alcoholwine]</td>\n",
935
+ " <td>[farm, fresh, wine, company]</td>\n",
936
+ " <td>[farm, fresh, mango, moscato, ml]</td>\n",
937
+ " <td>[farm, fresh, mango, moscato, ml, alcohol, win...</td>\n",
938
+ " </tr>\n",
939
+ " <tr>\n",
940
+ " <th>568533</th>\n",
941
+ " <td>Ole Orleans Heritage Riesling 750ml</td>\n",
942
+ " <td>[alcohol]</td>\n",
943
+ " <td>[wine]</td>\n",
944
+ " <td>[alcoholwine]</td>\n",
945
+ " <td>[ole, orleans]</td>\n",
946
+ " <td>[ole, orleans, heritage, riesling, ml]</td>\n",
947
+ " <td>[ole, orleans, heritage, riesling, ml, alcohol...</td>\n",
948
+ " </tr>\n",
949
+ " </tbody>\n",
950
+ "</table>\n",
951
+ "<p>568507 rows × 7 columns</p>\n",
952
+ "</div>"
953
+ ]
954
+ },
955
+ "execution_count": 11,
956
+ "metadata": {},
957
+ "output_type": "execute_result"
958
+ }
959
+ ],
960
+ "execution_count": 11
961
+ },
962
+ {
963
+ "cell_type": "code",
964
+ "id": "3a28e095285cd9a",
965
+ "metadata": {
966
+ "ExecuteTime": {
967
+ "end_time": "2024-07-30T12:35:24.243648Z",
968
+ "start_time": "2024-07-30T12:35:24.213211Z"
969
+ }
970
+ },
971
+ "source": "new_df=df[['PRODUCT_NAME',\"tags\"]]",
972
+ "outputs": [],
973
+ "execution_count": 12
974
+ },
975
+ {
976
+ "cell_type": "code",
977
+ "id": "c672ae1826940651",
978
+ "metadata": {
979
+ "ExecuteTime": {
980
+ "end_time": "2024-07-30T12:35:24.258999Z",
981
+ "start_time": "2024-07-30T12:35:24.245254Z"
982
+ }
983
+ },
984
+ "source": [
985
+ "new_df"
986
+ ],
987
+ "outputs": [
988
+ {
989
+ "data": {
990
+ "text/plain": [
991
+ " PRODUCT_NAME \\\n",
992
+ "0 Marketside Roasted Red Pepper Hummus, 10 Oz \n",
993
+ "1 Marketside Roasted Garlic Hummus, 10 Oz \n",
994
+ "2 Marketside Classic Hummus, 10 Oz \n",
995
+ "3 Marketside Everything Hummus, 10 oz \n",
996
+ "4 Price's Jalapeno Dip, 12 Oz. \n",
997
+ "... ... \n",
998
+ "568529 Farm Fresh Blueberry Moscato 750ml \n",
999
+ "568530 Farm Fresh Peach Moscato 750 Ml \n",
1000
+ "568531 Farm Fresh Raspberry Moscato 750ml \n",
1001
+ "568532 Farm Fresh Mango Moscato 750ml \n",
1002
+ "568533 Ole Orleans Heritage Riesling 750ml \n",
1003
+ "\n",
1004
+ " tags \n",
1005
+ "0 [marketside, roasted, red, pepper, hummus, oz,... \n",
1006
+ "1 [marketside, roasted, garlic, hummus, oz, deli... \n",
1007
+ "2 [marketside, classic, hummus, oz, deli, hummus... \n",
1008
+ "3 [marketside, everything, hummus, oz, deli, hum... \n",
1009
+ "4 [prices, jalapeno, dip, oz, deli, hummus, dips... \n",
1010
+ "... ... \n",
1011
+ "568529 [farm, fresh, blueberry, moscato, ml, alcohol,... \n",
1012
+ "568530 [farm, fresh, peach, moscato, ml, alcohol, win... \n",
1013
+ "568531 [farm, fresh, raspberry, moscato, ml, alcohol,... \n",
1014
+ "568532 [farm, fresh, mango, moscato, ml, alcohol, win... \n",
1015
+ "568533 [ole, orleans, heritage, riesling, ml, alcohol... \n",
1016
+ "\n",
1017
+ "[568507 rows x 2 columns]"
1018
+ ],
1019
+ "text/html": [
1020
+ "<div>\n",
1021
+ "<style scoped>\n",
1022
+ " .dataframe tbody tr th:only-of-type {\n",
1023
+ " vertical-align: middle;\n",
1024
+ " }\n",
1025
+ "\n",
1026
+ " .dataframe tbody tr th {\n",
1027
+ " vertical-align: top;\n",
1028
+ " }\n",
1029
+ "\n",
1030
+ " .dataframe thead th {\n",
1031
+ " text-align: right;\n",
1032
+ " }\n",
1033
+ "</style>\n",
1034
+ "<table border=\"1\" class=\"dataframe\">\n",
1035
+ " <thead>\n",
1036
+ " <tr style=\"text-align: right;\">\n",
1037
+ " <th></th>\n",
1038
+ " <th>PRODUCT_NAME</th>\n",
1039
+ " <th>tags</th>\n",
1040
+ " </tr>\n",
1041
+ " </thead>\n",
1042
+ " <tbody>\n",
1043
+ " <tr>\n",
1044
+ " <th>0</th>\n",
1045
+ " <td>Marketside Roasted Red Pepper Hummus, 10 Oz</td>\n",
1046
+ " <td>[marketside, roasted, red, pepper, hummus, oz,...</td>\n",
1047
+ " </tr>\n",
1048
+ " <tr>\n",
1049
+ " <th>1</th>\n",
1050
+ " <td>Marketside Roasted Garlic Hummus, 10 Oz</td>\n",
1051
+ " <td>[marketside, roasted, garlic, hummus, oz, deli...</td>\n",
1052
+ " </tr>\n",
1053
+ " <tr>\n",
1054
+ " <th>2</th>\n",
1055
+ " <td>Marketside Classic Hummus, 10 Oz</td>\n",
1056
+ " <td>[marketside, classic, hummus, oz, deli, hummus...</td>\n",
1057
+ " </tr>\n",
1058
+ " <tr>\n",
1059
+ " <th>3</th>\n",
1060
+ " <td>Marketside Everything Hummus, 10 oz</td>\n",
1061
+ " <td>[marketside, everything, hummus, oz, deli, hum...</td>\n",
1062
+ " </tr>\n",
1063
+ " <tr>\n",
1064
+ " <th>4</th>\n",
1065
+ " <td>Price's Jalapeno Dip, 12 Oz.</td>\n",
1066
+ " <td>[prices, jalapeno, dip, oz, deli, hummus, dips...</td>\n",
1067
+ " </tr>\n",
1068
+ " <tr>\n",
1069
+ " <th>...</th>\n",
1070
+ " <td>...</td>\n",
1071
+ " <td>...</td>\n",
1072
+ " </tr>\n",
1073
+ " <tr>\n",
1074
+ " <th>568529</th>\n",
1075
+ " <td>Farm Fresh Blueberry Moscato 750ml</td>\n",
1076
+ " <td>[farm, fresh, blueberry, moscato, ml, alcohol,...</td>\n",
1077
+ " </tr>\n",
1078
+ " <tr>\n",
1079
+ " <th>568530</th>\n",
1080
+ " <td>Farm Fresh Peach Moscato 750 Ml</td>\n",
1081
+ " <td>[farm, fresh, peach, moscato, ml, alcohol, win...</td>\n",
1082
+ " </tr>\n",
1083
+ " <tr>\n",
1084
+ " <th>568531</th>\n",
1085
+ " <td>Farm Fresh Raspberry Moscato 750ml</td>\n",
1086
+ " <td>[farm, fresh, raspberry, moscato, ml, alcohol,...</td>\n",
1087
+ " </tr>\n",
1088
+ " <tr>\n",
1089
+ " <th>568532</th>\n",
1090
+ " <td>Farm Fresh Mango Moscato 750ml</td>\n",
1091
+ " <td>[farm, fresh, mango, moscato, ml, alcohol, win...</td>\n",
1092
+ " </tr>\n",
1093
+ " <tr>\n",
1094
+ " <th>568533</th>\n",
1095
+ " <td>Ole Orleans Heritage Riesling 750ml</td>\n",
1096
+ " <td>[ole, orleans, heritage, riesling, ml, alcohol...</td>\n",
1097
+ " </tr>\n",
1098
+ " </tbody>\n",
1099
+ "</table>\n",
1100
+ "<p>568507 rows × 2 columns</p>\n",
1101
+ "</div>"
1102
+ ]
1103
+ },
1104
+ "execution_count": 13,
1105
+ "metadata": {},
1106
+ "output_type": "execute_result"
1107
+ }
1108
+ ],
1109
+ "execution_count": 13
1110
+ },
1111
+ {
1112
+ "cell_type": "code",
1113
+ "id": "9f206d66e3a02e2d",
1114
+ "metadata": {
1115
+ "ExecuteTime": {
1116
+ "end_time": "2024-07-30T12:35:26.205605Z",
1117
+ "start_time": "2024-07-30T12:35:24.260533Z"
1118
+ }
1119
+ },
1120
+ "source": [
1121
+ "from sklearn.feature_extraction.text import CountVectorizer\n",
1122
+ "from nltk.stem.porter import PorterStemmer\n",
1123
+ "ps=PorterStemmer()\n",
1124
+ "cv=CountVectorizer(max_features=5000,stop_words='english')"
1125
+ ],
1126
+ "outputs": [],
1127
+ "execution_count": 14
1128
+ },
1129
+ {
1130
+ "cell_type": "code",
1131
+ "id": "179547695cf71375",
1132
+ "metadata": {
1133
+ "ExecuteTime": {
1134
+ "end_time": "2024-07-30T12:35:26.221713Z",
1135
+ "start_time": "2024-07-30T12:35:26.206601Z"
1136
+ }
1137
+ },
1138
+ "source": [
1139
+ "def stem(text):\n",
1140
+ " y=[]\n",
1141
+ " for i in text:\n",
1142
+ " y.append(ps.stem(i))\n",
1143
+ " return \" \".join(y)"
1144
+ ],
1145
+ "outputs": [],
1146
+ "execution_count": 15
1147
+ },
1148
+ {
1149
+ "cell_type": "code",
1150
+ "id": "40e19aacfa32d7f9",
1151
+ "metadata": {
1152
+ "ExecuteTime": {
1153
+ "end_time": "2024-07-30T12:37:14.151419Z",
1154
+ "start_time": "2024-07-30T12:35:26.222722Z"
1155
+ }
1156
+ },
1157
+ "source": [
1158
+ "new_df['tags']=new_df['tags'].apply(stem)\n"
1159
+ ],
1160
+ "outputs": [
1161
+ {
1162
+ "name": "stderr",
1163
+ "output_type": "stream",
1164
+ "text": [
1165
+ "C:\\Users\\thaku\\AppData\\Local\\Temp\\ipykernel_13136\\1459480162.py:1: SettingWithCopyWarning: \n",
1166
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
1167
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
1168
+ "\n",
1169
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
1170
+ " new_df['tags']=new_df['tags'].apply(stem)\n"
1171
+ ]
1172
+ }
1173
+ ],
1174
+ "execution_count": 16
1175
+ },
1176
+ {
1177
+ "cell_type": "code",
1178
+ "id": "24975d8282c44c17",
1179
+ "metadata": {
1180
+ "ExecuteTime": {
1181
+ "end_time": "2024-07-30T12:37:21.969928Z",
1182
+ "start_time": "2024-07-30T12:37:14.153418Z"
1183
+ }
1184
+ },
1185
+ "source": [
1186
+ "vectors=cv.fit_transform(new_df['tags']).toarray()"
1187
+ ],
1188
+ "outputs": [
1189
+ {
1190
+ "ename": "MemoryError",
1191
+ "evalue": "Unable to allocate 21.2 GiB for an array with shape (568507, 5000) and data type int64",
1192
+ "output_type": "error",
1193
+ "traceback": [
1194
+ "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
1195
+ "\u001B[1;31mMemoryError\u001B[0m Traceback (most recent call last)",
1196
+ "Cell \u001B[1;32mIn[17], line 1\u001B[0m\n\u001B[1;32m----> 1\u001B[0m vectors\u001B[38;5;241m=\u001B[39m\u001B[43mcv\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfit_transform\u001B[49m\u001B[43m(\u001B[49m\u001B[43mnew_df\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mtags\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m]\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mtoarray\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n",
1197
+ "File \u001B[1;32mD:\\pynb\\Walmart\\venv\\lib\\site-packages\\scipy\\sparse\\_compressed.py:1181\u001B[0m, in \u001B[0;36m_cs_matrix.toarray\u001B[1;34m(self, order, out)\u001B[0m\n\u001B[0;32m 1179\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m out \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;129;01mand\u001B[39;00m order \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m 1180\u001B[0m order \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_swap(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mcf\u001B[39m\u001B[38;5;124m'\u001B[39m)[\u001B[38;5;241m0\u001B[39m]\n\u001B[1;32m-> 1181\u001B[0m out \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_process_toarray_args\u001B[49m\u001B[43m(\u001B[49m\u001B[43morder\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mout\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 1182\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m (out\u001B[38;5;241m.\u001B[39mflags\u001B[38;5;241m.\u001B[39mc_contiguous \u001B[38;5;129;01mor\u001B[39;00m out\u001B[38;5;241m.\u001B[39mflags\u001B[38;5;241m.\u001B[39mf_contiguous):\n\u001B[0;32m 1183\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mOutput array must be C or F contiguous\u001B[39m\u001B[38;5;124m'\u001B[39m)\n",
1198
+ "File \u001B[1;32mD:\\pynb\\Walmart\\venv\\lib\\site-packages\\scipy\\sparse\\_base.py:1301\u001B[0m, in \u001B[0;36m_spbase._process_toarray_args\u001B[1;34m(self, order, out)\u001B[0m\n\u001B[0;32m 1299\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m out\n\u001B[0;32m 1300\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m-> 1301\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mnp\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mzeros\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mshape\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdtype\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mdtype\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43morder\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43morder\u001B[49m\u001B[43m)\u001B[49m\n",
1199
+ "\u001B[1;31mMemoryError\u001B[0m: Unable to allocate 21.2 GiB for an array with shape (568507, 5000) and data type int64"
1200
+ ]
1201
+ }
1202
+ ],
1203
+ "execution_count": 17
1204
+ },
1205
+ {
1206
+ "cell_type": "code",
1207
+ "id": "84d50839b49ad1ce",
1208
+ "metadata": {
1209
+ "ExecuteTime": {
1210
+ "end_time": "2024-07-30T12:37:21.971885Z",
1211
+ "start_time": "2024-07-30T12:37:21.971885Z"
1212
+ }
1213
+ },
1214
+ "source": [
1215
+ "from sklearn.metrics.pairwise import cosine_similarity\n",
1216
+ "similarity=cosine_similarity(vectors)\n"
1217
+ ],
1218
+ "outputs": [],
1219
+ "execution_count": null
1220
+ },
1221
+ {
1222
+ "cell_type": "code",
1223
+ "id": "9f8b1afa332ca4b7",
1224
+ "metadata": {},
1225
+ "source": [
1226
+ "def recommend(item):\n",
1227
+ " item_index=new_df[new_df['PRODUCT_NAME']==item].index[0]\n",
1228
+ " distance=similarity[item_index]\n",
1229
+ " items_list=sorted(list(enumerate(distance)),reverse=True,key=lambda x:x[1])[1:6]\n",
1230
+ " \n",
1231
+ " for i in items_list:\n",
1232
+ " print(new_df.iloc[i[0]]['PRODUCT_NAME'])\n",
1233
+ "\n",
1234
+ "def get_recommendations(user_description, count_vectorizer, count_matrix):\n",
1235
+ " # Preprocess the user-provided description\n",
1236
+ " user_description = preprocess_text(user_description)\n",
1237
+ " \n",
1238
+ " # Transform the user description into the same feature space\n",
1239
+ " user_vector = count_vectorizer.transform([user_description])\n",
1240
+ " \n",
1241
+ " # Compute cosine similarity between user description and item descriptions\n",
1242
+ " cosine_similarities = cosine_similarity(user_vector, count_matrix).flatten()\n",
1243
+ " \n",
1244
+ " # Get indices of the most similar items\n",
1245
+ " similar_indices = cosine_similarities.argsort()[::-1]\n",
1246
+ " \n",
1247
+ " return similar_indices\n"
1248
+ ],
1249
+ "outputs": [],
1250
+ "execution_count": null
1251
+ },
1252
+ {
1253
+ "cell_type": "code",
1254
+ "id": "72c21ab855a6ba41",
1255
+ "metadata": {},
1256
+ "source": [
1257
+ "recommend(\"THE FIRST YEARS\")"
1258
+ ],
1259
+ "outputs": [],
1260
+ "execution_count": null
1261
+ },
1262
+ {
1263
+ "cell_type": "code",
1264
+ "id": "206cf57c6ce8bbf9",
1265
+ "metadata": {},
1266
+ "source": [
1267
+ "new_df.iloc[get_recommendations('milk', cv, vectors)]"
1268
+ ],
1269
+ "outputs": [],
1270
+ "execution_count": null
1271
+ },
1272
+ {
1273
+ "cell_type": "code",
1274
+ "id": "76143eea-ffb5-4d4c-a700-b0d98de0bb01",
1275
+ "metadata": {},
1276
+ "source": [
1277
+ "import pickle\n",
1278
+ "with open(\"cv.pkl\",\"wb\") as file:\n",
1279
+ " pickle.dump(cv,file)\n",
1280
+ "with open(\"vectors.pkl\",\"wb\")as file:\n",
1281
+ " pickle.dump(vectors,file)"
1282
+ ],
1283
+ "outputs": [],
1284
+ "execution_count": null
1285
+ },
1286
+ {
1287
+ "cell_type": "code",
1288
+ "id": "c5ee2911-a4bc-4a39-bde9-a8ee8378cc88",
1289
+ "metadata": {},
1290
+ "source": [],
1291
+ "outputs": [],
1292
+ "execution_count": null
1293
+ }
1294
+ ],
1295
+ "metadata": {
1296
+ "kernelspec": {
1297
+ "display_name": "Python 3 (ipykernel)",
1298
+ "language": "python",
1299
+ "name": "python3"
1300
+ },
1301
+ "language_info": {
1302
+ "codemirror_mode": {
1303
+ "name": "ipython",
1304
+ "version": 3
1305
+ },
1306
+ "file_extension": ".py",
1307
+ "mimetype": "text/x-python",
1308
+ "name": "python",
1309
+ "nbconvert_exporter": "python",
1310
+ "pygments_lexer": "ipython3",
1311
+ "version": "3.8.19"
1312
+ }
1313
+ },
1314
+ "nbformat": 4,
1315
+ "nbformat_minor": 5
1316
+ }