End of training

Browse files

Files changed (12) hide show

README.md +12 -25
all_results.json +4 -4
config.json +6 -2
eval_results.json +4 -4
model.safetensors +2 -2
runs/Jun03_13-17-09_Nogas-MacBookPro.local/events.out.tfevents.1717409830.Nogas-MacBookPro.local.49918.0 +3 -0
runs/Jun03_13-17-09_Nogas-MacBookPro.local/events.out.tfevents.1717410191.Nogas-MacBookPro.local.49918.1 +3 -0
runs/May23_14-54-53_NogasMacBookPro/events.out.tfevents.1716465295.NogasMacBookPro.52914.0 +3 -0
runs/May23_14-54-53_NogasMacBookPro/events.out.tfevents.1716465540.NogasMacBookPro.52914.1 +3 -0
train_results.json +4 -4
trainer_state.json +66 -26
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -2,6 +2,7 @@
 language:
 - en
 license: apache-2.0
 tags:
 - generated_from_trainer
 metrics:
@@ -9,27 +10,19 @@ metrics:
 - precision
 - recall
 - f1
-base_model: sileod/deberta-v3-base-tasksource-nli
 model-index:
 - name: deberta-v3-bass-complex-questions_classifier
   results: []
-widget:
-- text: "Why did the company decide to enter the Latin America region?"
-  example_title: "Simple Question"
-- text: "What initiatives does the Company have to promote work-life balance among its employees? And what is the approach to developing a pipeline of future leaders?"
-  example_title: "Multi-Questions"
-- text: "How do the annual growth rates in international markets of GlobalMarket Expanders compare to those of OverseasGrowth Corp. in the market expansion services sector?"
-  example_title: "Comparative Question"
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
 # deberta-v3-bass-complex-questions_classifier
-This model is a fine-tuned version of [sileod/deberta-v3-base-tasksource-nli](https://huggingface.co/sileod/deberta-v3-base-tasksource-nli) on an unknown dataset. It is designed to classify questions into three categories: simple, multi, and compare.
 It achieves the following results on the evaluation set:
-- Loss: 0.0
 - Accuracy: 1.0
 - Precision: 1.0
 - Recall: 1.0
@@ -37,19 +30,15 @@ It achieves the following results on the evaluation set:
 ## Model description
-The model is trained to classify the type of questions based on their complexity:
-- **Simple:** Contains one and ONLY one question.
-- **Multi:** Contains 2 or more questions.
-- **Compare:** Involves direct comparisons using specific, invented company names or refers to different aspects within the same company.
 ## Intended uses & limitations
-This model can be used for question classification tasks, such as organizing large datasets of questions or automating question routing in customer service systems. However, it may not generalize well to questions outside the scope of the training data, or questions in languages other than English.
 ## Training and evaluation data
-The training and evaluation datasets used for fine-tuning this model can be found in the "data" folder. They contain labeled questions categorized as simple, multi, and compare to facilitate training and evaluation of the model.
 ## Training procedure
@@ -66,16 +55,14 @@ The following hyperparameters were used during training:
 ### Training results
-The model achieves the following results on the evaluation set:
-- Loss: 0.0
-- Accuracy: 1.0
-- Precision: 1.0
-- Recall: 1.0
-- F1: 1.0
 ### Framework versions
-- Transformers 4.38.2
 - Pytorch 2.1.1
 - Datasets 2.15.0
-- Tokenizers 0.15.2

 language:
 - en
 license: apache-2.0
+base_model: sileod/deberta-v3-base-tasksource-nli
 tags:
 - generated_from_trainer
 metrics:
 - precision
 - recall
 - f1
 model-index:
 - name: deberta-v3-bass-complex-questions_classifier
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
 # deberta-v3-bass-complex-questions_classifier
+This model is a fine-tuned version of [sileod/deberta-v3-base-tasksource-nli](https://huggingface.co/sileod/deberta-v3-base-tasksource-nli) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0001
 - Accuracy: 1.0
 - Precision: 1.0
 - Recall: 1.0
 ## Model description
+More information needed
 ## Intended uses & limitations
+More information needed
 ## Training and evaluation data
+More information needed
 ## Training procedure
 ### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Accuracy | Precision | Recall | F1  |
+|:-------------:|:------:|:----:|:---------------:|:--------:|:---------:|:------:|:---:|
+| 0.0532        | 2.3585 | 500  | 0.0001          | 1.0      | 1.0       | 1.0    | 1.0 |
 ### Framework versions
+- Transformers 4.41.2
 - Pytorch 2.1.1
 - Datasets 2.15.0
+- Tokenizers 0.19.1

all_results.json CHANGED Viewed

@@ -4,8 +4,8 @@
     "eval_F1": 1.0,
     "eval_Precision": 1.0,
     "eval_Recall": 1.0,
-    "eval_loss": 0.0,
-    "eval_runtime": 1.5241,
-    "eval_samples_per_second": 99.732,
-    "eval_steps_per_second": 12.466
 }

     "eval_F1": 1.0,
     "eval_Precision": 1.0,
     "eval_Recall": 1.0,
+    "eval_loss": 0.00012063107715221122,
+    "eval_runtime": 2.3163,
+    "eval_samples_per_second": 91.524,
+    "eval_steps_per_second": 11.656
 }

config.json CHANGED Viewed

@@ -517,13 +517,17 @@
   "id2label": {
     "0": "multi",
     "1": "simple",
-    "2": "compare"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
     "compare": 2,
     "multi": 0,
     "simple": 1
   },
   "layer_norm_eps": 1e-07,
@@ -1053,7 +1057,7 @@
     "mmlu-4"
   ],
   "torch_dtype": "float32",
-  "transformers_version": "4.38.2",
   "type_vocab_size": 0,
   "vocab_size": 128100
 }

   "id2label": {
     "0": "multi",
     "1": "simple",
+    "2": "compare",
+    "3": "functional",
+    "4": "phatics"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
     "compare": 2,
+    "functional": 3,
     "multi": 0,
+    "phatics": 4,
     "simple": 1
   },
   "layer_norm_eps": 1e-07,
     "mmlu-4"
   ],
   "torch_dtype": "float32",
+  "transformers_version": "4.41.2",
   "type_vocab_size": 0,
   "vocab_size": 128100
 }

eval_results.json CHANGED Viewed

@@ -4,8 +4,8 @@
     "eval_F1": 1.0,
     "eval_Precision": 1.0,
     "eval_Recall": 1.0,
-    "eval_loss": 0.0,
-    "eval_runtime": 1.5241,
-    "eval_samples_per_second": 99.732,
-    "eval_steps_per_second": 12.466
 }

     "eval_F1": 1.0,
     "eval_Precision": 1.0,
     "eval_Recall": 1.0,
+    "eval_loss": 0.00012063107715221122,
+    "eval_runtime": 2.3163,
+    "eval_samples_per_second": 91.524,
+    "eval_steps_per_second": 11.656
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33c06471fbb9f7e92bcf178661da8f9e4390ba2e407dbc3c82c352906790466f
-size 737722356

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb041a39d47f74a7a5a3e9017a757a4ea98c8c60ba2dc1e84362c490b04969f3
+size 737728508

runs/Jun03_13-17-09_Nogas-MacBookPro.local/events.out.tfevents.1717409830.Nogas-MacBookPro.local.49918.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfeedf8465204e37f797e5b598bc0371fa02f84570b1c6d5ccf3fd6286688c5f
+size 24123

runs/Jun03_13-17-09_Nogas-MacBookPro.local/events.out.tfevents.1717410191.Nogas-MacBookPro.local.49918.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45489a0dbdc089a95da848749e6e8ec6bd4ba9dfe28a5dd013cf375d7cdebc3a
+size 1976

runs/May23_14-54-53_NogasMacBookPro/events.out.tfevents.1716465295.NogasMacBookPro.52914.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b94c3dd58790b088e51adfc81a8c4f4335b50e03c257acbcab315f1e435b48c3
+size 23870

runs/May23_14-54-53_NogasMacBookPro/events.out.tfevents.1716465540.NogasMacBookPro.52914.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd9cbff03c5846632b6e84463cc4460988773caea11b56b6b911bd914b3383c3
+size 1032

train_results.json CHANGED Viewed

@@ -4,8 +4,8 @@
     "eval_F1": 1.0,
     "eval_Precision": 1.0,
     "eval_Recall": 1.0,
-    "eval_loss": 0.0,
-    "eval_runtime": 1.5401,
-    "eval_samples_per_second": 98.697,
-    "eval_steps_per_second": 12.337
 }

     "eval_F1": 1.0,
     "eval_Precision": 1.0,
     "eval_Recall": 1.0,
+    "eval_loss": 0.0001206793385790661,
+    "eval_runtime": 3.4495,
+    "eval_samples_per_second": 61.459,
+    "eval_steps_per_second": 7.827
 }

trainer_state.json CHANGED Viewed

@@ -1,21 +1,40 @@
 {
-  "best_metric": null,
-  "best_model_checkpoint": null,
   "epoch": 3.0,
   "eval_steps": 500,
-  "global_step": 456,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 3.0,
-      "step": 456,
-      "total_flos": 131122250941500.0,
-      "train_loss": 4.576031642500311e-07,
-      "train_runtime": 158.1929,
-      "train_samples_per_second": 23.041,
-      "train_steps_per_second": 2.883
     },
     {
       "epoch": 3.0,
@@ -23,11 +42,11 @@
       "eval_F1": 1.0,
       "eval_Precision": 1.0,
       "eval_Recall": 1.0,
-      "eval_loss": 0.0,
-      "eval_runtime": 1.53,
-      "eval_samples_per_second": 99.348,
-      "eval_steps_per_second": 12.418,
-      "step": 456
     },
     {
       "epoch": 3.0,
@@ -35,11 +54,11 @@
       "eval_F1": 1.0,
       "eval_Precision": 1.0,
       "eval_Recall": 1.0,
-      "eval_loss": 0.0,
-      "eval_runtime": 1.5259,
-      "eval_samples_per_second": 99.613,
-      "eval_steps_per_second": 12.452,
-      "step": 456
     },
     {
       "epoch": 3.0,
@@ -47,19 +66,40 @@
       "eval_F1": 1.0,
       "eval_Precision": 1.0,
       "eval_Recall": 1.0,
-      "eval_loss": 0.0,
-      "eval_runtime": 1.5401,
-      "eval_samples_per_second": 98.697,
-      "eval_steps_per_second": 12.337,
-      "step": 456
     }
   ],
   "logging_steps": 500,
-  "max_steps": 456,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 500,
-  "total_flos": 131122250941500.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.0001206793385790661,
+  "best_model_checkpoint": "/Users/noga/Work_folder/Notebooks/LLM/classifier/new_classifier/complex_q_model/deberta-v3-bass-complex-questions_classifier/checkpoint-500",
   "epoch": 3.0,
   "eval_steps": 500,
+  "global_step": 636,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
+    {
+      "epoch": 2.358490566037736,
+      "grad_norm": 0.005825825501233339,
+      "learning_rate": 1.069182389937107e-05,
+      "loss": 0.0532,
+      "step": 500
+    },
+    {
+      "epoch": 2.358490566037736,
+      "eval_Accuracy": 1.0,
+      "eval_F1": 1.0,
+      "eval_Precision": 1.0,
+      "eval_Recall": 1.0,
+      "eval_loss": 0.0001206793385790661,
+      "eval_runtime": 4.3,
+      "eval_samples_per_second": 49.303,
+      "eval_steps_per_second": 6.279,
+      "step": 500
+    },
     {
       "epoch": 3.0,
+      "step": 636,
+      "total_flos": 175087193647410.0,
+      "train_loss": 0.04188158509538234,
+      "train_runtime": 276.1616,
+      "train_samples_per_second": 18.413,
+      "train_steps_per_second": 2.303
     },
     {
       "epoch": 3.0,
       "eval_F1": 1.0,
       "eval_Precision": 1.0,
       "eval_Recall": 1.0,
+      "eval_loss": 0.0001206793385790661,
+      "eval_runtime": 3.6877,
+      "eval_samples_per_second": 57.489,
+      "eval_steps_per_second": 7.322,
+      "step": 636
     },
     {
       "epoch": 3.0,
       "eval_F1": 1.0,
       "eval_Precision": 1.0,
       "eval_Recall": 1.0,
+      "eval_loss": 0.0001206793385790661,
+      "eval_runtime": 3.1147,
+      "eval_samples_per_second": 68.065,
+      "eval_steps_per_second": 8.669,
+      "step": 636
     },
     {
       "epoch": 3.0,
       "eval_F1": 1.0,
       "eval_Precision": 1.0,
       "eval_Recall": 1.0,
+      "eval_loss": 0.0001206793385790661,
+      "eval_runtime": 3.4495,
+      "eval_samples_per_second": 61.459,
+      "eval_steps_per_second": 7.827,
+      "step": 636
     }
   ],
   "logging_steps": 500,
+  "max_steps": 636,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 175087193647410.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec6c424cf30f375e05290cbeb72eb24dcaf90f1d416b4aa9439896ec9edfc099
-size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:78ffb207fc30f1f375edfcc5b246e89aa5c93ddbee9efd8d848851fa1badbc73
+size 5304