Spaces:

CarolXia
/

kd-finetune

Sleeping

CarolXia commited on Dec 2, 2024

Commit

7bfd06b

1 Parent(s): fd99680

go back to mdberta and increase sample size

Files changed (1) hide show

app.py CHANGED Viewed

@@ -43,11 +43,12 @@ dimension = len(id2label)
 st.write("dimension", dimension)
 student_model_config = teacher_model.config
-student_model_config.num_attention_heads = 6
-student_model_config.num_hidden_layers = 4
 student_model = DebertaV2ForTokenClassification.from_pretrained(
-    "microsoft/deberta-v3-small",
-    config=student_model_config, ignore_mismatched_sizes=True)
 print(student_model)
 print_trainable_parameters(student_model)
@@ -174,7 +175,7 @@ def distillation_loss(student_logits, teacher_logits, true_labels, temperature,
 # hyperparameters
 batch_size = 32
 lr = 1e-4
-num_epochs = 10
 temperature = 2.0
 alpha = 0.5

 st.write("dimension", dimension)
 student_model_config = teacher_model.config
+student_model_config.num_attention_heads = 8
+student_model_config.num_hidden_layers = 6
 student_model = DebertaV2ForTokenClassification.from_pretrained(
+    "microsoft/mdeberta-v3-base",
+    config=student_model_config)
+#    ignore_mismatched_sizes=True)
 print(student_model)
 print_trainable_parameters(student_model)
 # hyperparameters
 batch_size = 32
 lr = 1e-4
+num_epochs = 30
 temperature = 2.0
 alpha = 0.5