CarolXia commited on
Commit
54f4eaa
·
1 Parent(s): 9ae9e39

Filter 1/11 of the data:

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -61,7 +61,8 @@ if torch.cuda.is_available():
61
  # Load data.
62
  raw_dataset = load_dataset("ai4privacy/pii-masking-400k", split='train')
63
  raw_dataset = raw_dataset.filter(lambda example: example["language"].startswith("en"))
64
- raw_dataset = raw_dataset.select(range(2000))
 
65
  raw_dataset = raw_dataset.train_test_split(test_size=0.2)
66
  print(raw_dataset)
67
  print(raw_dataset.column_names)
@@ -177,7 +178,7 @@ def distillation_loss(student_logits, teacher_logits, true_labels, temperature,
177
  # hyperparameters
178
  batch_size = 32
179
  lr = 1e-4
180
- num_epochs = 300
181
  temperature = 2.0
182
  alpha = 0.5
183
 
 
61
  # Load data.
62
  raw_dataset = load_dataset("ai4privacy/pii-masking-400k", split='train')
63
  raw_dataset = raw_dataset.filter(lambda example: example["language"].startswith("en"))
64
+ #raw_dataset = raw_dataset.select(range(2000))
65
+ raw_dataset = raw_dataset.filter(lambda example, idx: idx % 11 == 0, with_indices=True)
66
  raw_dataset = raw_dataset.train_test_split(test_size=0.2)
67
  print(raw_dataset)
68
  print(raw_dataset.column_names)
 
178
  # hyperparameters
179
  batch_size = 32
180
  lr = 1e-4
181
+ num_epochs = 50
182
  temperature = 2.0
183
  alpha = 0.5
184