Spaces:
Sleeping
Sleeping
Filter 1/11 of the data:
Browse files
app.py
CHANGED
@@ -61,7 +61,8 @@ if torch.cuda.is_available():
|
|
61 |
# Load data.
|
62 |
raw_dataset = load_dataset("ai4privacy/pii-masking-400k", split='train')
|
63 |
raw_dataset = raw_dataset.filter(lambda example: example["language"].startswith("en"))
|
64 |
-
raw_dataset = raw_dataset.select(range(2000))
|
|
|
65 |
raw_dataset = raw_dataset.train_test_split(test_size=0.2)
|
66 |
print(raw_dataset)
|
67 |
print(raw_dataset.column_names)
|
@@ -177,7 +178,7 @@ def distillation_loss(student_logits, teacher_logits, true_labels, temperature,
|
|
177 |
# hyperparameters
|
178 |
batch_size = 32
|
179 |
lr = 1e-4
|
180 |
-
num_epochs =
|
181 |
temperature = 2.0
|
182 |
alpha = 0.5
|
183 |
|
|
|
61 |
# Load data.
|
62 |
raw_dataset = load_dataset("ai4privacy/pii-masking-400k", split='train')
|
63 |
raw_dataset = raw_dataset.filter(lambda example: example["language"].startswith("en"))
|
64 |
+
#raw_dataset = raw_dataset.select(range(2000))
|
65 |
+
raw_dataset = raw_dataset.filter(lambda example, idx: idx % 11 == 0, with_indices=True)
|
66 |
raw_dataset = raw_dataset.train_test_split(test_size=0.2)
|
67 |
print(raw_dataset)
|
68 |
print(raw_dataset.column_names)
|
|
|
178 |
# hyperparameters
|
179 |
batch_size = 32
|
180 |
lr = 1e-4
|
181 |
+
num_epochs = 50
|
182 |
temperature = 2.0
|
183 |
alpha = 0.5
|
184 |
|