Sebastian Urrea commited on
Commit
b59c73f
β€’
1 Parent(s): 558a5db
Files changed (1) hide show
  1. Model.ipynb +1326 -0
Model.ipynb ADDED
@@ -0,0 +1,1326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "name": "Model",
7
+ "provenance": []
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "widgets": {
17
+ "application/vnd.jupyter.widget-state+json": {
18
+ "429c6e78aec043d0b77eb34cafb16e1b": {
19
+ "model_module": "@jupyter-widgets/controls",
20
+ "model_name": "HBoxModel",
21
+ "model_module_version": "1.5.0",
22
+ "state": {
23
+ "_dom_classes": [],
24
+ "_model_module": "@jupyter-widgets/controls",
25
+ "_model_module_version": "1.5.0",
26
+ "_model_name": "HBoxModel",
27
+ "_view_count": null,
28
+ "_view_module": "@jupyter-widgets/controls",
29
+ "_view_module_version": "1.5.0",
30
+ "_view_name": "HBoxView",
31
+ "box_style": "",
32
+ "children": [
33
+ "IPY_MODEL_494f319e19d54a91ab454f1e472552ce",
34
+ "IPY_MODEL_0d275830f6e44acc828911fbf156cc1d",
35
+ "IPY_MODEL_6d0c8a919baa45eea6b23f39512968ce"
36
+ ],
37
+ "layout": "IPY_MODEL_8a10fee82fdc4690a313ad54a990555f"
38
+ }
39
+ },
40
+ "494f319e19d54a91ab454f1e472552ce": {
41
+ "model_module": "@jupyter-widgets/controls",
42
+ "model_name": "HTMLModel",
43
+ "model_module_version": "1.5.0",
44
+ "state": {
45
+ "_dom_classes": [],
46
+ "_model_module": "@jupyter-widgets/controls",
47
+ "_model_module_version": "1.5.0",
48
+ "_model_name": "HTMLModel",
49
+ "_view_count": null,
50
+ "_view_module": "@jupyter-widgets/controls",
51
+ "_view_module_version": "1.5.0",
52
+ "_view_name": "HTMLView",
53
+ "description": "",
54
+ "description_tooltip": null,
55
+ "layout": "IPY_MODEL_b5790264731046f495ff046be1b36ab5",
56
+ "placeholder": "​",
57
+ "style": "IPY_MODEL_168cd756b824410d87bf6207e3b50627",
58
+ "value": "100%"
59
+ }
60
+ },
61
+ "0d275830f6e44acc828911fbf156cc1d": {
62
+ "model_module": "@jupyter-widgets/controls",
63
+ "model_name": "FloatProgressModel",
64
+ "model_module_version": "1.5.0",
65
+ "state": {
66
+ "_dom_classes": [],
67
+ "_model_module": "@jupyter-widgets/controls",
68
+ "_model_module_version": "1.5.0",
69
+ "_model_name": "FloatProgressModel",
70
+ "_view_count": null,
71
+ "_view_module": "@jupyter-widgets/controls",
72
+ "_view_module_version": "1.5.0",
73
+ "_view_name": "ProgressView",
74
+ "bar_style": "success",
75
+ "description": "",
76
+ "description_tooltip": null,
77
+ "layout": "IPY_MODEL_bd86beccde9d4e1e97d1ecb6927383e3",
78
+ "max": 3,
79
+ "min": 0,
80
+ "orientation": "horizontal",
81
+ "style": "IPY_MODEL_63513ddda83741528278a72af036d825",
82
+ "value": 3
83
+ }
84
+ },
85
+ "6d0c8a919baa45eea6b23f39512968ce": {
86
+ "model_module": "@jupyter-widgets/controls",
87
+ "model_name": "HTMLModel",
88
+ "model_module_version": "1.5.0",
89
+ "state": {
90
+ "_dom_classes": [],
91
+ "_model_module": "@jupyter-widgets/controls",
92
+ "_model_module_version": "1.5.0",
93
+ "_model_name": "HTMLModel",
94
+ "_view_count": null,
95
+ "_view_module": "@jupyter-widgets/controls",
96
+ "_view_module_version": "1.5.0",
97
+ "_view_name": "HTMLView",
98
+ "description": "",
99
+ "description_tooltip": null,
100
+ "layout": "IPY_MODEL_d012a21eb0854e1cabf8c46dbd23857c",
101
+ "placeholder": "​",
102
+ "style": "IPY_MODEL_7472528de85d48a1a545f3436ddb00e5",
103
+ "value": " 3/3 [00:00<00:00, 40.27it/s]"
104
+ }
105
+ },
106
+ "8a10fee82fdc4690a313ad54a990555f": {
107
+ "model_module": "@jupyter-widgets/base",
108
+ "model_name": "LayoutModel",
109
+ "model_module_version": "1.2.0",
110
+ "state": {
111
+ "_model_module": "@jupyter-widgets/base",
112
+ "_model_module_version": "1.2.0",
113
+ "_model_name": "LayoutModel",
114
+ "_view_count": null,
115
+ "_view_module": "@jupyter-widgets/base",
116
+ "_view_module_version": "1.2.0",
117
+ "_view_name": "LayoutView",
118
+ "align_content": null,
119
+ "align_items": null,
120
+ "align_self": null,
121
+ "border": null,
122
+ "bottom": null,
123
+ "display": null,
124
+ "flex": null,
125
+ "flex_flow": null,
126
+ "grid_area": null,
127
+ "grid_auto_columns": null,
128
+ "grid_auto_flow": null,
129
+ "grid_auto_rows": null,
130
+ "grid_column": null,
131
+ "grid_gap": null,
132
+ "grid_row": null,
133
+ "grid_template_areas": null,
134
+ "grid_template_columns": null,
135
+ "grid_template_rows": null,
136
+ "height": null,
137
+ "justify_content": null,
138
+ "justify_items": null,
139
+ "left": null,
140
+ "margin": null,
141
+ "max_height": null,
142
+ "max_width": null,
143
+ "min_height": null,
144
+ "min_width": null,
145
+ "object_fit": null,
146
+ "object_position": null,
147
+ "order": null,
148
+ "overflow": null,
149
+ "overflow_x": null,
150
+ "overflow_y": null,
151
+ "padding": null,
152
+ "right": null,
153
+ "top": null,
154
+ "visibility": null,
155
+ "width": null
156
+ }
157
+ },
158
+ "b5790264731046f495ff046be1b36ab5": {
159
+ "model_module": "@jupyter-widgets/base",
160
+ "model_name": "LayoutModel",
161
+ "model_module_version": "1.2.0",
162
+ "state": {
163
+ "_model_module": "@jupyter-widgets/base",
164
+ "_model_module_version": "1.2.0",
165
+ "_model_name": "LayoutModel",
166
+ "_view_count": null,
167
+ "_view_module": "@jupyter-widgets/base",
168
+ "_view_module_version": "1.2.0",
169
+ "_view_name": "LayoutView",
170
+ "align_content": null,
171
+ "align_items": null,
172
+ "align_self": null,
173
+ "border": null,
174
+ "bottom": null,
175
+ "display": null,
176
+ "flex": null,
177
+ "flex_flow": null,
178
+ "grid_area": null,
179
+ "grid_auto_columns": null,
180
+ "grid_auto_flow": null,
181
+ "grid_auto_rows": null,
182
+ "grid_column": null,
183
+ "grid_gap": null,
184
+ "grid_row": null,
185
+ "grid_template_areas": null,
186
+ "grid_template_columns": null,
187
+ "grid_template_rows": null,
188
+ "height": null,
189
+ "justify_content": null,
190
+ "justify_items": null,
191
+ "left": null,
192
+ "margin": null,
193
+ "max_height": null,
194
+ "max_width": null,
195
+ "min_height": null,
196
+ "min_width": null,
197
+ "object_fit": null,
198
+ "object_position": null,
199
+ "order": null,
200
+ "overflow": null,
201
+ "overflow_x": null,
202
+ "overflow_y": null,
203
+ "padding": null,
204
+ "right": null,
205
+ "top": null,
206
+ "visibility": null,
207
+ "width": null
208
+ }
209
+ },
210
+ "168cd756b824410d87bf6207e3b50627": {
211
+ "model_module": "@jupyter-widgets/controls",
212
+ "model_name": "DescriptionStyleModel",
213
+ "model_module_version": "1.5.0",
214
+ "state": {
215
+ "_model_module": "@jupyter-widgets/controls",
216
+ "_model_module_version": "1.5.0",
217
+ "_model_name": "DescriptionStyleModel",
218
+ "_view_count": null,
219
+ "_view_module": "@jupyter-widgets/base",
220
+ "_view_module_version": "1.2.0",
221
+ "_view_name": "StyleView",
222
+ "description_width": ""
223
+ }
224
+ },
225
+ "bd86beccde9d4e1e97d1ecb6927383e3": {
226
+ "model_module": "@jupyter-widgets/base",
227
+ "model_name": "LayoutModel",
228
+ "model_module_version": "1.2.0",
229
+ "state": {
230
+ "_model_module": "@jupyter-widgets/base",
231
+ "_model_module_version": "1.2.0",
232
+ "_model_name": "LayoutModel",
233
+ "_view_count": null,
234
+ "_view_module": "@jupyter-widgets/base",
235
+ "_view_module_version": "1.2.0",
236
+ "_view_name": "LayoutView",
237
+ "align_content": null,
238
+ "align_items": null,
239
+ "align_self": null,
240
+ "border": null,
241
+ "bottom": null,
242
+ "display": null,
243
+ "flex": null,
244
+ "flex_flow": null,
245
+ "grid_area": null,
246
+ "grid_auto_columns": null,
247
+ "grid_auto_flow": null,
248
+ "grid_auto_rows": null,
249
+ "grid_column": null,
250
+ "grid_gap": null,
251
+ "grid_row": null,
252
+ "grid_template_areas": null,
253
+ "grid_template_columns": null,
254
+ "grid_template_rows": null,
255
+ "height": null,
256
+ "justify_content": null,
257
+ "justify_items": null,
258
+ "left": null,
259
+ "margin": null,
260
+ "max_height": null,
261
+ "max_width": null,
262
+ "min_height": null,
263
+ "min_width": null,
264
+ "object_fit": null,
265
+ "object_position": null,
266
+ "order": null,
267
+ "overflow": null,
268
+ "overflow_x": null,
269
+ "overflow_y": null,
270
+ "padding": null,
271
+ "right": null,
272
+ "top": null,
273
+ "visibility": null,
274
+ "width": null
275
+ }
276
+ },
277
+ "63513ddda83741528278a72af036d825": {
278
+ "model_module": "@jupyter-widgets/controls",
279
+ "model_name": "ProgressStyleModel",
280
+ "model_module_version": "1.5.0",
281
+ "state": {
282
+ "_model_module": "@jupyter-widgets/controls",
283
+ "_model_module_version": "1.5.0",
284
+ "_model_name": "ProgressStyleModel",
285
+ "_view_count": null,
286
+ "_view_module": "@jupyter-widgets/base",
287
+ "_view_module_version": "1.2.0",
288
+ "_view_name": "StyleView",
289
+ "bar_color": null,
290
+ "description_width": ""
291
+ }
292
+ },
293
+ "d012a21eb0854e1cabf8c46dbd23857c": {
294
+ "model_module": "@jupyter-widgets/base",
295
+ "model_name": "LayoutModel",
296
+ "model_module_version": "1.2.0",
297
+ "state": {
298
+ "_model_module": "@jupyter-widgets/base",
299
+ "_model_module_version": "1.2.0",
300
+ "_model_name": "LayoutModel",
301
+ "_view_count": null,
302
+ "_view_module": "@jupyter-widgets/base",
303
+ "_view_module_version": "1.2.0",
304
+ "_view_name": "LayoutView",
305
+ "align_content": null,
306
+ "align_items": null,
307
+ "align_self": null,
308
+ "border": null,
309
+ "bottom": null,
310
+ "display": null,
311
+ "flex": null,
312
+ "flex_flow": null,
313
+ "grid_area": null,
314
+ "grid_auto_columns": null,
315
+ "grid_auto_flow": null,
316
+ "grid_auto_rows": null,
317
+ "grid_column": null,
318
+ "grid_gap": null,
319
+ "grid_row": null,
320
+ "grid_template_areas": null,
321
+ "grid_template_columns": null,
322
+ "grid_template_rows": null,
323
+ "height": null,
324
+ "justify_content": null,
325
+ "justify_items": null,
326
+ "left": null,
327
+ "margin": null,
328
+ "max_height": null,
329
+ "max_width": null,
330
+ "min_height": null,
331
+ "min_width": null,
332
+ "object_fit": null,
333
+ "object_position": null,
334
+ "order": null,
335
+ "overflow": null,
336
+ "overflow_x": null,
337
+ "overflow_y": null,
338
+ "padding": null,
339
+ "right": null,
340
+ "top": null,
341
+ "visibility": null,
342
+ "width": null
343
+ }
344
+ },
345
+ "7472528de85d48a1a545f3436ddb00e5": {
346
+ "model_module": "@jupyter-widgets/controls",
347
+ "model_name": "DescriptionStyleModel",
348
+ "model_module_version": "1.5.0",
349
+ "state": {
350
+ "_model_module": "@jupyter-widgets/controls",
351
+ "_model_module_version": "1.5.0",
352
+ "_model_name": "DescriptionStyleModel",
353
+ "_view_count": null,
354
+ "_view_module": "@jupyter-widgets/base",
355
+ "_view_module_version": "1.2.0",
356
+ "_view_name": "StyleView",
357
+ "description_width": ""
358
+ }
359
+ },
360
+ "f0b51ffeb656453589ecdb407522dea3": {
361
+ "model_module": "@jupyter-widgets/controls",
362
+ "model_name": "HBoxModel",
363
+ "model_module_version": "1.5.0",
364
+ "state": {
365
+ "_dom_classes": [],
366
+ "_model_module": "@jupyter-widgets/controls",
367
+ "_model_module_version": "1.5.0",
368
+ "_model_name": "HBoxModel",
369
+ "_view_count": null,
370
+ "_view_module": "@jupyter-widgets/controls",
371
+ "_view_module_version": "1.5.0",
372
+ "_view_name": "HBoxView",
373
+ "box_style": "",
374
+ "children": [
375
+ "IPY_MODEL_37a1c33bb8284ecbb33ef163fa7b7fd8",
376
+ "IPY_MODEL_a2428193e74442508eb1cb33ed697c43",
377
+ "IPY_MODEL_180b0aaca969493eb17aec2eba9de6b8"
378
+ ],
379
+ "layout": "IPY_MODEL_227dcef044b64faf8a91eaf4f0bb93d3"
380
+ }
381
+ },
382
+ "37a1c33bb8284ecbb33ef163fa7b7fd8": {
383
+ "model_module": "@jupyter-widgets/controls",
384
+ "model_name": "HTMLModel",
385
+ "model_module_version": "1.5.0",
386
+ "state": {
387
+ "_dom_classes": [],
388
+ "_model_module": "@jupyter-widgets/controls",
389
+ "_model_module_version": "1.5.0",
390
+ "_model_name": "HTMLModel",
391
+ "_view_count": null,
392
+ "_view_module": "@jupyter-widgets/controls",
393
+ "_view_module_version": "1.5.0",
394
+ "_view_name": "HTMLView",
395
+ "description": "",
396
+ "description_tooltip": null,
397
+ "layout": "IPY_MODEL_4e6495dc492d400bb3f3a9035087d9a4",
398
+ "placeholder": "​",
399
+ "style": "IPY_MODEL_fac9fe5ef6a44c3587f09d516a288c43",
400
+ "value": "100%"
401
+ }
402
+ },
403
+ "a2428193e74442508eb1cb33ed697c43": {
404
+ "model_module": "@jupyter-widgets/controls",
405
+ "model_name": "FloatProgressModel",
406
+ "model_module_version": "1.5.0",
407
+ "state": {
408
+ "_dom_classes": [],
409
+ "_model_module": "@jupyter-widgets/controls",
410
+ "_model_module_version": "1.5.0",
411
+ "_model_name": "FloatProgressModel",
412
+ "_view_count": null,
413
+ "_view_module": "@jupyter-widgets/controls",
414
+ "_view_module_version": "1.5.0",
415
+ "_view_name": "ProgressView",
416
+ "bar_style": "success",
417
+ "description": "",
418
+ "description_tooltip": null,
419
+ "layout": "IPY_MODEL_b23035bb26d841b38a646bd6f0c69652",
420
+ "max": 5,
421
+ "min": 0,
422
+ "orientation": "horizontal",
423
+ "style": "IPY_MODEL_5d454a22797c405688e73c1765040618",
424
+ "value": 5
425
+ }
426
+ },
427
+ "180b0aaca969493eb17aec2eba9de6b8": {
428
+ "model_module": "@jupyter-widgets/controls",
429
+ "model_name": "HTMLModel",
430
+ "model_module_version": "1.5.0",
431
+ "state": {
432
+ "_dom_classes": [],
433
+ "_model_module": "@jupyter-widgets/controls",
434
+ "_model_module_version": "1.5.0",
435
+ "_model_name": "HTMLModel",
436
+ "_view_count": null,
437
+ "_view_module": "@jupyter-widgets/controls",
438
+ "_view_module_version": "1.5.0",
439
+ "_view_name": "HTMLView",
440
+ "description": "",
441
+ "description_tooltip": null,
442
+ "layout": "IPY_MODEL_bd0857febb4249bb8f7ceb9d5777ba0a",
443
+ "placeholder": "​",
444
+ "style": "IPY_MODEL_f74c65693dd845d8a6da1fdd7577925f",
445
+ "value": " 5/5 [00:01<00:00, 4.77ba/s]"
446
+ }
447
+ },
448
+ "227dcef044b64faf8a91eaf4f0bb93d3": {
449
+ "model_module": "@jupyter-widgets/base",
450
+ "model_name": "LayoutModel",
451
+ "model_module_version": "1.2.0",
452
+ "state": {
453
+ "_model_module": "@jupyter-widgets/base",
454
+ "_model_module_version": "1.2.0",
455
+ "_model_name": "LayoutModel",
456
+ "_view_count": null,
457
+ "_view_module": "@jupyter-widgets/base",
458
+ "_view_module_version": "1.2.0",
459
+ "_view_name": "LayoutView",
460
+ "align_content": null,
461
+ "align_items": null,
462
+ "align_self": null,
463
+ "border": null,
464
+ "bottom": null,
465
+ "display": null,
466
+ "flex": null,
467
+ "flex_flow": null,
468
+ "grid_area": null,
469
+ "grid_auto_columns": null,
470
+ "grid_auto_flow": null,
471
+ "grid_auto_rows": null,
472
+ "grid_column": null,
473
+ "grid_gap": null,
474
+ "grid_row": null,
475
+ "grid_template_areas": null,
476
+ "grid_template_columns": null,
477
+ "grid_template_rows": null,
478
+ "height": null,
479
+ "justify_content": null,
480
+ "justify_items": null,
481
+ "left": null,
482
+ "margin": null,
483
+ "max_height": null,
484
+ "max_width": null,
485
+ "min_height": null,
486
+ "min_width": null,
487
+ "object_fit": null,
488
+ "object_position": null,
489
+ "order": null,
490
+ "overflow": null,
491
+ "overflow_x": null,
492
+ "overflow_y": null,
493
+ "padding": null,
494
+ "right": null,
495
+ "top": null,
496
+ "visibility": null,
497
+ "width": null
498
+ }
499
+ },
500
+ "4e6495dc492d400bb3f3a9035087d9a4": {
501
+ "model_module": "@jupyter-widgets/base",
502
+ "model_name": "LayoutModel",
503
+ "model_module_version": "1.2.0",
504
+ "state": {
505
+ "_model_module": "@jupyter-widgets/base",
506
+ "_model_module_version": "1.2.0",
507
+ "_model_name": "LayoutModel",
508
+ "_view_count": null,
509
+ "_view_module": "@jupyter-widgets/base",
510
+ "_view_module_version": "1.2.0",
511
+ "_view_name": "LayoutView",
512
+ "align_content": null,
513
+ "align_items": null,
514
+ "align_self": null,
515
+ "border": null,
516
+ "bottom": null,
517
+ "display": null,
518
+ "flex": null,
519
+ "flex_flow": null,
520
+ "grid_area": null,
521
+ "grid_auto_columns": null,
522
+ "grid_auto_flow": null,
523
+ "grid_auto_rows": null,
524
+ "grid_column": null,
525
+ "grid_gap": null,
526
+ "grid_row": null,
527
+ "grid_template_areas": null,
528
+ "grid_template_columns": null,
529
+ "grid_template_rows": null,
530
+ "height": null,
531
+ "justify_content": null,
532
+ "justify_items": null,
533
+ "left": null,
534
+ "margin": null,
535
+ "max_height": null,
536
+ "max_width": null,
537
+ "min_height": null,
538
+ "min_width": null,
539
+ "object_fit": null,
540
+ "object_position": null,
541
+ "order": null,
542
+ "overflow": null,
543
+ "overflow_x": null,
544
+ "overflow_y": null,
545
+ "padding": null,
546
+ "right": null,
547
+ "top": null,
548
+ "visibility": null,
549
+ "width": null
550
+ }
551
+ },
552
+ "fac9fe5ef6a44c3587f09d516a288c43": {
553
+ "model_module": "@jupyter-widgets/controls",
554
+ "model_name": "DescriptionStyleModel",
555
+ "model_module_version": "1.5.0",
556
+ "state": {
557
+ "_model_module": "@jupyter-widgets/controls",
558
+ "_model_module_version": "1.5.0",
559
+ "_model_name": "DescriptionStyleModel",
560
+ "_view_count": null,
561
+ "_view_module": "@jupyter-widgets/base",
562
+ "_view_module_version": "1.2.0",
563
+ "_view_name": "StyleView",
564
+ "description_width": ""
565
+ }
566
+ },
567
+ "b23035bb26d841b38a646bd6f0c69652": {
568
+ "model_module": "@jupyter-widgets/base",
569
+ "model_name": "LayoutModel",
570
+ "model_module_version": "1.2.0",
571
+ "state": {
572
+ "_model_module": "@jupyter-widgets/base",
573
+ "_model_module_version": "1.2.0",
574
+ "_model_name": "LayoutModel",
575
+ "_view_count": null,
576
+ "_view_module": "@jupyter-widgets/base",
577
+ "_view_module_version": "1.2.0",
578
+ "_view_name": "LayoutView",
579
+ "align_content": null,
580
+ "align_items": null,
581
+ "align_self": null,
582
+ "border": null,
583
+ "bottom": null,
584
+ "display": null,
585
+ "flex": null,
586
+ "flex_flow": null,
587
+ "grid_area": null,
588
+ "grid_auto_columns": null,
589
+ "grid_auto_flow": null,
590
+ "grid_auto_rows": null,
591
+ "grid_column": null,
592
+ "grid_gap": null,
593
+ "grid_row": null,
594
+ "grid_template_areas": null,
595
+ "grid_template_columns": null,
596
+ "grid_template_rows": null,
597
+ "height": null,
598
+ "justify_content": null,
599
+ "justify_items": null,
600
+ "left": null,
601
+ "margin": null,
602
+ "max_height": null,
603
+ "max_width": null,
604
+ "min_height": null,
605
+ "min_width": null,
606
+ "object_fit": null,
607
+ "object_position": null,
608
+ "order": null,
609
+ "overflow": null,
610
+ "overflow_x": null,
611
+ "overflow_y": null,
612
+ "padding": null,
613
+ "right": null,
614
+ "top": null,
615
+ "visibility": null,
616
+ "width": null
617
+ }
618
+ },
619
+ "5d454a22797c405688e73c1765040618": {
620
+ "model_module": "@jupyter-widgets/controls",
621
+ "model_name": "ProgressStyleModel",
622
+ "model_module_version": "1.5.0",
623
+ "state": {
624
+ "_model_module": "@jupyter-widgets/controls",
625
+ "_model_module_version": "1.5.0",
626
+ "_model_name": "ProgressStyleModel",
627
+ "_view_count": null,
628
+ "_view_module": "@jupyter-widgets/base",
629
+ "_view_module_version": "1.2.0",
630
+ "_view_name": "StyleView",
631
+ "bar_color": null,
632
+ "description_width": ""
633
+ }
634
+ },
635
+ "bd0857febb4249bb8f7ceb9d5777ba0a": {
636
+ "model_module": "@jupyter-widgets/base",
637
+ "model_name": "LayoutModel",
638
+ "model_module_version": "1.2.0",
639
+ "state": {
640
+ "_model_module": "@jupyter-widgets/base",
641
+ "_model_module_version": "1.2.0",
642
+ "_model_name": "LayoutModel",
643
+ "_view_count": null,
644
+ "_view_module": "@jupyter-widgets/base",
645
+ "_view_module_version": "1.2.0",
646
+ "_view_name": "LayoutView",
647
+ "align_content": null,
648
+ "align_items": null,
649
+ "align_self": null,
650
+ "border": null,
651
+ "bottom": null,
652
+ "display": null,
653
+ "flex": null,
654
+ "flex_flow": null,
655
+ "grid_area": null,
656
+ "grid_auto_columns": null,
657
+ "grid_auto_flow": null,
658
+ "grid_auto_rows": null,
659
+ "grid_column": null,
660
+ "grid_gap": null,
661
+ "grid_row": null,
662
+ "grid_template_areas": null,
663
+ "grid_template_columns": null,
664
+ "grid_template_rows": null,
665
+ "height": null,
666
+ "justify_content": null,
667
+ "justify_items": null,
668
+ "left": null,
669
+ "margin": null,
670
+ "max_height": null,
671
+ "max_width": null,
672
+ "min_height": null,
673
+ "min_width": null,
674
+ "object_fit": null,
675
+ "object_position": null,
676
+ "order": null,
677
+ "overflow": null,
678
+ "overflow_x": null,
679
+ "overflow_y": null,
680
+ "padding": null,
681
+ "right": null,
682
+ "top": null,
683
+ "visibility": null,
684
+ "width": null
685
+ }
686
+ },
687
+ "f74c65693dd845d8a6da1fdd7577925f": {
688
+ "model_module": "@jupyter-widgets/controls",
689
+ "model_name": "DescriptionStyleModel",
690
+ "model_module_version": "1.5.0",
691
+ "state": {
692
+ "_model_module": "@jupyter-widgets/controls",
693
+ "_model_module_version": "1.5.0",
694
+ "_model_name": "DescriptionStyleModel",
695
+ "_view_count": null,
696
+ "_view_module": "@jupyter-widgets/base",
697
+ "_view_module_version": "1.2.0",
698
+ "_view_name": "StyleView",
699
+ "description_width": ""
700
+ }
701
+ }
702
+ }
703
+ }
704
+ },
705
+ "cells": [
706
+ {
707
+ "cell_type": "code",
708
+ "source": [
709
+ "!pip install pip install datasets"
710
+ ],
711
+ "metadata": {
712
+ "colab": {
713
+ "base_uri": "https://localhost:8080/"
714
+ },
715
+ "id": "Wfi3o3CGfibZ",
716
+ "outputId": "5e023a28-ccae-4951-be60-7d839c1284c6"
717
+ },
718
+ "execution_count": 44,
719
+ "outputs": [
720
+ {
721
+ "output_type": "stream",
722
+ "name": "stdout",
723
+ "text": [
724
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
725
+ "Requirement already satisfied: pip in /usr/local/lib/python3.7/dist-packages (21.1.3)\n",
726
+ "Requirement already satisfied: install in /usr/local/lib/python3.7/dist-packages (1.3.5)\n",
727
+ "Requirement already satisfied: datasets in /usr/local/lib/python3.7/dist-packages (2.2.2)\n",
728
+ "Requirement already satisfied: xxhash in /usr/local/lib/python3.7/dist-packages (from datasets) (3.0.0)\n",
729
+ "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets) (4.64.0)\n",
730
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets) (1.3.5)\n",
731
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets) (1.21.6)\n",
732
+ "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (2.23.0)\n",
733
+ "Requirement already satisfied: huggingface-hub<1.0.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (0.7.0)\n",
734
+ "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets) (0.70.12.2)\n",
735
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets) (21.3)\n",
736
+ "Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (2022.5.0)\n",
737
+ "Requirement already satisfied: dill<0.3.5 in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.4)\n",
738
+ "Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (6.0.1)\n",
739
+ "Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.7/dist-packages (from datasets) (0.18.0)\n",
740
+ "Requirement already satisfied: aiohttp in /usr/local/lib/python3.7/dist-packages (from datasets) (3.8.1)\n",
741
+ "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets) (4.11.3)\n",
742
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (6.0)\n",
743
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (3.7.0)\n",
744
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (4.2.0)\n",
745
+ "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets) (3.0.9)\n",
746
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2022.5.18.1)\n",
747
+ "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2.10)\n",
748
+ "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (1.25.11)\n",
749
+ "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (3.0.4)\n",
750
+ "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (2.0.12)\n",
751
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (1.3.0)\n",
752
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (21.4.0)\n",
753
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (6.0.2)\n",
754
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (1.7.2)\n",
755
+ "Requirement already satisfied: asynctest==0.13.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (0.13.0)\n",
756
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (1.2.0)\n",
757
+ "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets) (4.0.2)\n",
758
+ "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->datasets) (3.8.0)\n",
759
+ "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2.8.2)\n",
760
+ "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2022.1)\n",
761
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n"
762
+ ]
763
+ }
764
+ ]
765
+ },
766
+ {
767
+ "cell_type": "code",
768
+ "source": [
769
+ "!pip install transformers"
770
+ ],
771
+ "metadata": {
772
+ "colab": {
773
+ "base_uri": "https://localhost:8080/"
774
+ },
775
+ "id": "PcDXuQ0Vfj8V",
776
+ "outputId": "e036b413-32d0-463e-ce13-133748eb4680"
777
+ },
778
+ "execution_count": 45,
779
+ "outputs": [
780
+ {
781
+ "output_type": "stream",
782
+ "name": "stdout",
783
+ "text": [
784
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
785
+ "Requirement already satisfied: transformers in /usr/local/lib/python3.7/dist-packages (4.19.2)\n",
786
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n",
787
+ "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.64.0)\n",
788
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n",
789
+ "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
790
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.7.0)\n",
791
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.7.0)\n",
792
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n",
793
+ "Requirement already satisfied: tokenizers!=0.11.3,<0.13,>=0.11.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.12.1)\n",
794
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.6)\n",
795
+ "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.11.3)\n",
796
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (4.2.0)\n",
797
+ "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.9)\n",
798
+ "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.8.0)\n",
799
+ "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.25.11)\n",
800
+ "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
801
+ "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
802
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2022.5.18.1)\n"
803
+ ]
804
+ }
805
+ ]
806
+ },
807
+ {
808
+ "cell_type": "markdown",
809
+ "source": [
810
+ "https://huggingface.co/datasets/amazon_reviews_multi/viewer/all_languages/train\n",
811
+ "\n",
812
+ "https://stackoverflow.com/questions/70814490/uploading-models-with-custom-forward-functions-to-the-huggingface-model-hub\n",
813
+ "\n",
814
+ "https://huggingface.co/luisu0124/Amazon_review/tree/main"
815
+ ],
816
+ "metadata": {
817
+ "id": "5FzhqM6OolIo"
818
+ }
819
+ },
820
+ {
821
+ "cell_type": "code",
822
+ "source": [
823
+ "from google.colab import drive\n",
824
+ "drive.mount('/content/drive')"
825
+ ],
826
+ "metadata": {
827
+ "colab": {
828
+ "base_uri": "https://localhost:8080/"
829
+ },
830
+ "id": "xW22d65ulA8P",
831
+ "outputId": "5298332f-c9e7-4788-caec-20d770f24714"
832
+ },
833
+ "execution_count": 46,
834
+ "outputs": [
835
+ {
836
+ "output_type": "stream",
837
+ "name": "stdout",
838
+ "text": [
839
+ "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
840
+ ]
841
+ }
842
+ ]
843
+ },
844
+ {
845
+ "cell_type": "code",
846
+ "execution_count": 47,
847
+ "metadata": {
848
+ "colab": {
849
+ "base_uri": "https://localhost:8080/"
850
+ },
851
+ "id": "ZVOTHjNifWfB",
852
+ "outputId": "72570153-20df-4551-93cf-f39a2916781e"
853
+ },
854
+ "outputs": [
855
+ {
856
+ "output_type": "stream",
857
+ "name": "stderr",
858
+ "text": [
859
+ "Some weights of the model checkpoint at google/bert_uncased_L-2_H-128_A-2 were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']\n",
860
+ "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
861
+ "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
862
+ ]
863
+ }
864
+ ],
865
+ "source": [
866
+ "import tqdm\n",
867
+ "\n",
868
+ "from datasets import load_dataset\n",
869
+ "import transformers\n",
870
+ "from transformers import AutoTokenizer, AutoModel, BertConfig\n",
871
+ "from transformers import AdamW\n",
872
+ "from transformers import get_scheduler\n",
873
+ "\n",
874
+ "import torch\n",
875
+ "import torch.nn as nn\n",
876
+ "from torch.utils.data import Dataset, DataLoader\n",
877
+ "\n",
878
+ "# setting device to `cuda` if gpu exists\n",
879
+ "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
880
+ "\n",
881
+ "# initialising the tokenizer and model\n",
882
+ "tokenizer = AutoTokenizer.from_pretrained(\"google/bert_uncased_L-2_H-128_A-2\")\n",
883
+ "#tokenizer = AutoTokenizer.from_pretrained(\"pysentimiento/robertuito-sentiment-analysis\")\n",
884
+ "#bert = AutoModel.from_pretrained(\"google/bert_uncased_L-2_H-128_A-2\")\n",
885
+ "bert = AutoModel.from_pretrained(\"google/bert_uncased_L-2_H-128_A-2\")\n"
886
+ ]
887
+ },
888
+ {
889
+ "cell_type": "markdown",
890
+ "source": [
891
+ "### Cargue de dataset"
892
+ ],
893
+ "metadata": {
894
+ "id": "76K5Uj0W71yU"
895
+ }
896
+ },
897
+ {
898
+ "cell_type": "code",
899
+ "source": [
900
+ "def tokenize_function(examples):\n",
901
+ " '''Function for tokenizing raw texts'''\n",
902
+ " return tokenizer(examples[\"review_body\"], padding=\"max_length\", truncation=True, max_length=128)\n",
903
+ " #return tokenizer(examples[\"text\"], padding=\"max_length\", truncation=True, max_length=128)\n",
904
+ "\n",
905
+ "\n",
906
+ "# downloading IMDB dataset from πŸ€— `datasets`\n",
907
+ "#raw_datasets = load_dataset(\"amazon_reviews_multi\")\n",
908
+ "raw_datasets = load_dataset(\"amazon_reviews_multi\",\"es\")\n",
909
+ "\n"
910
+ ],
911
+ "metadata": {
912
+ "colab": {
913
+ "base_uri": "https://localhost:8080/",
914
+ "height": 86,
915
+ "referenced_widgets": [
916
+ "429c6e78aec043d0b77eb34cafb16e1b",
917
+ "494f319e19d54a91ab454f1e472552ce",
918
+ "0d275830f6e44acc828911fbf156cc1d",
919
+ "6d0c8a919baa45eea6b23f39512968ce",
920
+ "8a10fee82fdc4690a313ad54a990555f",
921
+ "b5790264731046f495ff046be1b36ab5",
922
+ "168cd756b824410d87bf6207e3b50627",
923
+ "bd86beccde9d4e1e97d1ecb6927383e3",
924
+ "63513ddda83741528278a72af036d825",
925
+ "d012a21eb0854e1cabf8c46dbd23857c",
926
+ "7472528de85d48a1a545f3436ddb00e5"
927
+ ]
928
+ },
929
+ "id": "EwVxX4Zg70Aa",
930
+ "outputId": "20e9c36f-c0af-4ad6-824f-2820573dbf4a"
931
+ },
932
+ "execution_count": 48,
933
+ "outputs": [
934
+ {
935
+ "output_type": "stream",
936
+ "name": "stderr",
937
+ "text": [
938
+ "Reusing dataset amazon_reviews_multi (/root/.cache/huggingface/datasets/amazon_reviews_multi/es/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609)\n"
939
+ ]
940
+ },
941
+ {
942
+ "output_type": "display_data",
943
+ "data": {
944
+ "text/plain": [
945
+ " 0%| | 0/3 [00:00<?, ?it/s]"
946
+ ],
947
+ "application/vnd.jupyter.widget-view+json": {
948
+ "version_major": 2,
949
+ "version_minor": 0,
950
+ "model_id": "429c6e78aec043d0b77eb34cafb16e1b"
951
+ }
952
+ },
953
+ "metadata": {}
954
+ }
955
+ ]
956
+ },
957
+ {
958
+ "cell_type": "code",
959
+ "source": [
960
+ "# Running tokenizing function on the raw texts\n",
961
+ "tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)\n",
962
+ "\n",
963
+ "# for simplicity I have taken only the train split\n",
964
+ "tokenized_datasets = tokenized_datasets[\"train\"].shuffle(seed=42).select(range(1000))"
965
+ ],
966
+ "metadata": {
967
+ "colab": {
968
+ "base_uri": "https://localhost:8080/",
969
+ "height": 121,
970
+ "referenced_widgets": [
971
+ "f0b51ffeb656453589ecdb407522dea3",
972
+ "37a1c33bb8284ecbb33ef163fa7b7fd8",
973
+ "a2428193e74442508eb1cb33ed697c43",
974
+ "180b0aaca969493eb17aec2eba9de6b8",
975
+ "227dcef044b64faf8a91eaf4f0bb93d3",
976
+ "4e6495dc492d400bb3f3a9035087d9a4",
977
+ "fac9fe5ef6a44c3587f09d516a288c43",
978
+ "b23035bb26d841b38a646bd6f0c69652",
979
+ "5d454a22797c405688e73c1765040618",
980
+ "bd0857febb4249bb8f7ceb9d5777ba0a",
981
+ "f74c65693dd845d8a6da1fdd7577925f"
982
+ ]
983
+ },
984
+ "id": "M6hDICwh7pQv",
985
+ "outputId": "f944ff63-0a40-4b3b-c7d4-b549e4bd123b"
986
+ },
987
+ "execution_count": 49,
988
+ "outputs": [
989
+ {
990
+ "output_type": "stream",
991
+ "name": "stderr",
992
+ "text": [
993
+ "Loading cached processed dataset at /root/.cache/huggingface/datasets/amazon_reviews_multi/es/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-46cf96799dcd2584.arrow\n"
994
+ ]
995
+ },
996
+ {
997
+ "output_type": "display_data",
998
+ "data": {
999
+ "text/plain": [
1000
+ " 0%| | 0/5 [00:00<?, ?ba/s]"
1001
+ ],
1002
+ "application/vnd.jupyter.widget-view+json": {
1003
+ "version_major": 2,
1004
+ "version_minor": 0,
1005
+ "model_id": "f0b51ffeb656453589ecdb407522dea3"
1006
+ }
1007
+ },
1008
+ "metadata": {}
1009
+ },
1010
+ {
1011
+ "output_type": "stream",
1012
+ "name": "stderr",
1013
+ "text": [
1014
+ "Loading cached processed dataset at /root/.cache/huggingface/datasets/amazon_reviews_multi/es/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-69ce6d7f8f0abb0e.arrow\n",
1015
+ "Loading cached shuffled indices for dataset at /root/.cache/huggingface/datasets/amazon_reviews_multi/es/1.0.0/724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609/cache-d0478a74f9a092bf.arrow\n"
1016
+ ]
1017
+ }
1018
+ ]
1019
+ },
1020
+ {
1021
+ "cell_type": "code",
1022
+ "source": [
1023
+ "\n",
1024
+ "# Now lets create the torch Dataset class\n",
1025
+ "class ClassificationDataset(Dataset):\n",
1026
+ "\n",
1027
+ " def __init__(self, dataset):\n",
1028
+ " self.dataset = dataset\n",
1029
+ "\n",
1030
+ " def __len__(self):\n",
1031
+ " return len(self.dataset)\n",
1032
+ "\n",
1033
+ " def __getitem__(self, idx):\n",
1034
+ " d = self.dataset[idx]\n",
1035
+ "\n",
1036
+ " ids = torch.tensor(d['input_ids'])\n",
1037
+ " mask = torch.tensor(d['attention_mask'])\n",
1038
+ " label = torch.tensor(d['stars'])\n",
1039
+ " #label = torch.tensor(d['label'])\n",
1040
+ " return ids, mask, label\n"
1041
+ ],
1042
+ "metadata": {
1043
+ "id": "il2NccBehMG5"
1044
+ },
1045
+ "execution_count": 50,
1046
+ "outputs": []
1047
+ },
1048
+ {
1049
+ "cell_type": "code",
1050
+ "source": [
1051
+ "\n",
1052
+ "# Preparing the dataset and the Dataloader\n",
1053
+ "dataset = ClassificationDataset(tokenized_datasets)\n",
1054
+ "train_dataloader = DataLoader(dataset, shuffle=True, batch_size=8)\n"
1055
+ ],
1056
+ "metadata": {
1057
+ "id": "HathhLEjAS1E"
1058
+ },
1059
+ "execution_count": 51,
1060
+ "outputs": []
1061
+ },
1062
+ {
1063
+ "cell_type": "code",
1064
+ "source": [
1065
+ "\n",
1066
+ "# Now lets create a custom Bert model\n",
1067
+ "class CustomBert(transformers.PreTrainedModel):\n",
1068
+ " '''Custom model class\n",
1069
+ " ------------------\n",
1070
+ " Now the trick is not to inherit the class from `nn.Module` but `transformers.PretrainedModel`\n",
1071
+ " Also you need to pass the model config during initialisation'''\n",
1072
+ "\n",
1073
+ " def __init__(self, bert):\n",
1074
+ " super(CustomBert, self).__init__(config=BertConfig.from_pretrained('google/bert_uncased_L-2_H-128_A-2'))\n",
1075
+ " self.bert = bert\n",
1076
+ "\n",
1077
+ " self.l1 = nn.Linear(128, 1)\n",
1078
+ "\n",
1079
+ " self.do = nn.Dropout(0.1)\n",
1080
+ " self.relu = nn.ReLU()\n",
1081
+ " self.sigmoid = nn.Sigmoid()\n",
1082
+ "\n",
1083
+ " def forward(self, sent_id, mask):\n",
1084
+ " '''For simplicity I have added only one linear layer, you can create any type of network you want'''\n",
1085
+ " \n",
1086
+ " bert_out = self.bert(sent_id, attention_mask=mask)\n",
1087
+ " o = bert_out.last_hidden_state[:,0,:]\n",
1088
+ " o = self.do(o)\n",
1089
+ " o = self.relu(o)\n",
1090
+ " o = self.l1(o)\n",
1091
+ " o = self.sigmoid(o)\n",
1092
+ " return o\n",
1093
+ "\n"
1094
+ ],
1095
+ "metadata": {
1096
+ "id": "DJhL9wPMAgTC"
1097
+ },
1098
+ "execution_count": 52,
1099
+ "outputs": []
1100
+ },
1101
+ {
1102
+ "cell_type": "code",
1103
+ "source": [
1104
+ "# initialising model, loss and optimizer\n",
1105
+ "model = CustomBert(bert)\n",
1106
+ "model.to(device)\n",
1107
+ "criterion = torch.nn.BCELoss()\n",
1108
+ "optimizer = AdamW(model.parameters(), lr=5e-5)\n"
1109
+ ],
1110
+ "metadata": {
1111
+ "colab": {
1112
+ "base_uri": "https://localhost:8080/"
1113
+ },
1114
+ "id": "hpiM-RFRBHcO",
1115
+ "outputId": "2a3699b4-a263-48bd-ad53-4d66c15c3bd5"
1116
+ },
1117
+ "execution_count": 53,
1118
+ "outputs": [
1119
+ {
1120
+ "output_type": "stream",
1121
+ "name": "stderr",
1122
+ "text": [
1123
+ "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
1124
+ " FutureWarning,\n"
1125
+ ]
1126
+ }
1127
+ ]
1128
+ },
1129
+ {
1130
+ "cell_type": "code",
1131
+ "source": [
1132
+ "\n",
1133
+ "# setting epochs, num_training_steps and the lr_scheduler\n",
1134
+ "num_epochs = 3\n",
1135
+ "num_training_steps = num_epochs * len(train_dataloader)\n",
1136
+ "lr_scheduler = get_scheduler(\n",
1137
+ " \"linear\",\n",
1138
+ " optimizer=optimizer,\n",
1139
+ " num_warmup_steps=0,\n",
1140
+ " num_training_steps=num_training_steps\n",
1141
+ ")\n"
1142
+ ],
1143
+ "metadata": {
1144
+ "id": "A60Axe6LlxAH"
1145
+ },
1146
+ "execution_count": 54,
1147
+ "outputs": []
1148
+ },
1149
+ {
1150
+ "cell_type": "code",
1151
+ "source": [
1152
+ "\n",
1153
+ "# training loop\n",
1154
+ "model.train()\n",
1155
+ "for epoch in tqdm.tqdm(range(num_epochs)):\n",
1156
+ " for batch in train_dataloader:\n",
1157
+ " ids, masks, labels = batch\n",
1158
+ " labels = labels.type(torch.float32)\n",
1159
+ " o = model(ids.to(device), masks.to(device))\n",
1160
+ " loss = criterion(torch.squeeze(o), labels.to(device))\n",
1161
+ " loss.backward()\n",
1162
+ "\n",
1163
+ " optimizer.step()\n",
1164
+ " lr_scheduler.step()\n",
1165
+ " optimizer.zero_grad()\n",
1166
+ "\n",
1167
+ "# save the tokenizer and the model in `./test-model/` directory \n",
1168
+ "tokenizer.save_pretrained(\"/content/drive/MyDrive/Models/amazon_reviews\")\n",
1169
+ "model.save_pretrained(\"/content/drive/MyDrive/Models/amazon_reviews\", push_to_hub=False)"
1170
+ ],
1171
+ "metadata": {
1172
+ "colab": {
1173
+ "base_uri": "https://localhost:8080/"
1174
+ },
1175
+ "id": "wh0I7w0NBkWQ",
1176
+ "outputId": "ed82ef5d-c58c-40c6-e6e9-4bb619604835"
1177
+ },
1178
+ "execution_count": 55,
1179
+ "outputs": [
1180
+ {
1181
+ "output_type": "stream",
1182
+ "name": "stderr",
1183
+ "text": [
1184
+ "\n",
1185
+ " 0%| | 0/3 [00:00<?, ?it/s]\u001b[A\n",
1186
+ " 33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:17<00:34, 17.41s/it]\u001b[A\n",
1187
+ " 67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:34<00:17, 17.27s/it]\u001b[A\n",
1188
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3/3 [00:51<00:00, 17.23s/it]\n"
1189
+ ]
1190
+ }
1191
+ ]
1192
+ },
1193
+ {
1194
+ "cell_type": "code",
1195
+ "source": [
1196
+ "from transformers import pipeline\n",
1197
+ "classifier = pipeline('text-classification', model='luisu0124/Amazon_review')"
1198
+ ],
1199
+ "metadata": {
1200
+ "colab": {
1201
+ "base_uri": "https://localhost:8080/"
1202
+ },
1203
+ "id": "VuGgkDgv91m3",
1204
+ "outputId": "0016e442-0b31-4da4-f00a-a7233a096963"
1205
+ },
1206
+ "execution_count": 61,
1207
+ "outputs": [
1208
+ {
1209
+ "output_type": "stream",
1210
+ "name": "stderr",
1211
+ "text": [
1212
+ "Some weights of the model checkpoint at luisu0124/Amazon_review were not used when initializing BertForSequenceClassification: ['l1.weight', 'l1.bias']\n",
1213
+ "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
1214
+ "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
1215
+ "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at luisu0124/Amazon_review and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
1216
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
1217
+ ]
1218
+ }
1219
+ ]
1220
+ },
1221
+ {
1222
+ "cell_type": "code",
1223
+ "source": [
1224
+ "classifier(\"Esta review es muy buena\")"
1225
+ ],
1226
+ "metadata": {
1227
+ "id": "wwaHAQLHoF7J",
1228
+ "colab": {
1229
+ "base_uri": "https://localhost:8080/"
1230
+ },
1231
+ "outputId": "09d3da72-2202-4787-9435-d50226f92337"
1232
+ },
1233
+ "execution_count": 62,
1234
+ "outputs": [
1235
+ {
1236
+ "output_type": "execute_result",
1237
+ "data": {
1238
+ "text/plain": [
1239
+ "[{'label': 'POSITIVE', 'score': 0.5269547700881958}]"
1240
+ ]
1241
+ },
1242
+ "metadata": {},
1243
+ "execution_count": 62
1244
+ }
1245
+ ]
1246
+ },
1247
+ {
1248
+ "cell_type": "code",
1249
+ "source": [
1250
+ "classifier(\"Este producto es bueno pero a su vez es malo\")"
1251
+ ],
1252
+ "metadata": {
1253
+ "id": "bZxF-PJRoIrt",
1254
+ "colab": {
1255
+ "base_uri": "https://localhost:8080/"
1256
+ },
1257
+ "outputId": "223523e5-590a-49f0-ef18-46a85697b6de"
1258
+ },
1259
+ "execution_count": 58,
1260
+ "outputs": [
1261
+ {
1262
+ "output_type": "execute_result",
1263
+ "data": {
1264
+ "text/plain": [
1265
+ "[{'label': 'NEGATIVE', 'score': 0.5181595683097839}]"
1266
+ ]
1267
+ },
1268
+ "metadata": {},
1269
+ "execution_count": 58
1270
+ }
1271
+ ]
1272
+ },
1273
+ {
1274
+ "cell_type": "code",
1275
+ "source": [
1276
+ "classifier(\"Excelente justo que buscaba\")"
1277
+ ],
1278
+ "metadata": {
1279
+ "id": "CehHU_mVoR2V",
1280
+ "colab": {
1281
+ "base_uri": "https://localhost:8080/"
1282
+ },
1283
+ "outputId": "c3eaba90-87b5-4068-b5d3-04c2e2c27ca2"
1284
+ },
1285
+ "execution_count": 59,
1286
+ "outputs": [
1287
+ {
1288
+ "output_type": "execute_result",
1289
+ "data": {
1290
+ "text/plain": [
1291
+ "[{'label': 'NEGATIVE', 'score': 0.5213820338249207}]"
1292
+ ]
1293
+ },
1294
+ "metadata": {},
1295
+ "execution_count": 59
1296
+ }
1297
+ ]
1298
+ },
1299
+ {
1300
+ "cell_type": "code",
1301
+ "source": [
1302
+ "classifier(\"odio\")"
1303
+ ],
1304
+ "metadata": {
1305
+ "id": "JWrTYblBoa3k",
1306
+ "colab": {
1307
+ "base_uri": "https://localhost:8080/"
1308
+ },
1309
+ "outputId": "7f63feac-c7e9-4185-ecd9-911ade565cc5"
1310
+ },
1311
+ "execution_count": 60,
1312
+ "outputs": [
1313
+ {
1314
+ "output_type": "execute_result",
1315
+ "data": {
1316
+ "text/plain": [
1317
+ "[{'label': 'NEGATIVE', 'score': 0.5219336152076721}]"
1318
+ ]
1319
+ },
1320
+ "metadata": {},
1321
+ "execution_count": 60
1322
+ }
1323
+ ]
1324
+ }
1325
+ ]
1326
+ }