Commit
·
792aeca
1
Parent(s):
b67821c
Update README.md
Browse files
README.md
CHANGED
@@ -17,6 +17,7 @@ TeenyTinyLlama is a series of small foundational models trained on Portuguese.
|
|
17 |
|
18 |
This repository contains a version of [TeenyTinyLlama-162m]() fine-tuned on a translated version of the IMDB dataset.
|
19 |
|
|
|
20 |
|
21 |
```python
|
22 |
# IMDB
|
@@ -26,12 +27,7 @@ import evaluate
|
|
26 |
import numpy as np
|
27 |
from datasets import load_dataset, Dataset, DatasetDict
|
28 |
from transformers import AutoTokenizer, DataCollatorWithPadding
|
29 |
-
from transformers import AutoModelForSequenceClassification, TrainingArguments,
|
30 |
-
|
31 |
-
|
32 |
-
evaluation_strategy="epoch"
|
33 |
-
save_strategy="epoch"
|
34 |
-
hub_model_id="nicholasKluge/Teeny-tiny-llama-162m-imdb"
|
35 |
|
36 |
# Load the task
|
37 |
dataset = load_dataset("christykoh/imdb_pt")
|
@@ -72,14 +68,12 @@ training_args = TrainingArguments(
|
|
72 |
per_device_eval_batch_size=16,
|
73 |
num_train_epochs=3,
|
74 |
weight_decay=0.01,
|
75 |
-
evaluation_strategy=
|
76 |
-
save_strategy=
|
77 |
load_best_model_at_end=True,
|
78 |
-
push_to_hub=
|
79 |
-
hub_token=
|
80 |
-
|
81 |
-
hub_model_id=hub_model_id,
|
82 |
-
tf32=False,
|
83 |
)
|
84 |
|
85 |
# Define the Trainer
|
|
|
17 |
|
18 |
This repository contains a version of [TeenyTinyLlama-162m]() fine-tuned on a translated version of the IMDB dataset.
|
19 |
|
20 |
+
## Reproducing
|
21 |
|
22 |
```python
|
23 |
# IMDB
|
|
|
27 |
import numpy as np
|
28 |
from datasets import load_dataset, Dataset, DatasetDict
|
29 |
from transformers import AutoTokenizer, DataCollatorWithPadding
|
30 |
+
from transformers import AutoModelForSequenceClassification, TrainingArguments, Traine
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
# Load the task
|
33 |
dataset = load_dataset("christykoh/imdb_pt")
|
|
|
68 |
per_device_eval_batch_size=16,
|
69 |
num_train_epochs=3,
|
70 |
weight_decay=0.01,
|
71 |
+
evaluation_strategy="epoch",
|
72 |
+
save_strategy="epoch",
|
73 |
load_best_model_at_end=True,
|
74 |
+
push_to_hub=True,
|
75 |
+
hub_token="your_token_here",
|
76 |
+
hub_model_id="username/model-name-imdb
|
|
|
|
|
77 |
)
|
78 |
|
79 |
# Define the Trainer
|