Spaces:
Sleeping
Sleeping
go back to mdberta and increase sample size
Browse files
app.py
CHANGED
@@ -43,11 +43,12 @@ dimension = len(id2label)
|
|
43 |
st.write("dimension", dimension)
|
44 |
|
45 |
student_model_config = teacher_model.config
|
46 |
-
student_model_config.num_attention_heads =
|
47 |
-
student_model_config.num_hidden_layers =
|
48 |
student_model = DebertaV2ForTokenClassification.from_pretrained(
|
49 |
-
"microsoft/
|
50 |
-
config=student_model_config
|
|
|
51 |
print(student_model)
|
52 |
print_trainable_parameters(student_model)
|
53 |
|
@@ -174,7 +175,7 @@ def distillation_loss(student_logits, teacher_logits, true_labels, temperature,
|
|
174 |
# hyperparameters
|
175 |
batch_size = 32
|
176 |
lr = 1e-4
|
177 |
-
num_epochs =
|
178 |
temperature = 2.0
|
179 |
alpha = 0.5
|
180 |
|
|
|
43 |
st.write("dimension", dimension)
|
44 |
|
45 |
student_model_config = teacher_model.config
|
46 |
+
student_model_config.num_attention_heads = 8
|
47 |
+
student_model_config.num_hidden_layers = 6
|
48 |
student_model = DebertaV2ForTokenClassification.from_pretrained(
|
49 |
+
"microsoft/mdeberta-v3-base",
|
50 |
+
config=student_model_config)
|
51 |
+
# ignore_mismatched_sizes=True)
|
52 |
print(student_model)
|
53 |
print_trainable_parameters(student_model)
|
54 |
|
|
|
175 |
# hyperparameters
|
176 |
batch_size = 32
|
177 |
lr = 1e-4
|
178 |
+
num_epochs = 30
|
179 |
temperature = 2.0
|
180 |
alpha = 0.5
|
181 |
|