nogae commited on
Commit
f08df82
·
verified ·
1 Parent(s): edb9033

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,7 @@
2
  language:
3
  - en
4
  license: apache-2.0
 
5
  tags:
6
  - generated_from_trainer
7
  metrics:
@@ -9,27 +10,19 @@ metrics:
9
  - precision
10
  - recall
11
  - f1
12
- base_model: sileod/deberta-v3-base-tasksource-nli
13
  model-index:
14
  - name: deberta-v3-bass-complex-questions_classifier
15
  results: []
16
- widget:
17
- - text: "Why did the company decide to enter the Latin America region?"
18
- example_title: "Simple Question"
19
- - text: "What initiatives does the Company have to promote work-life balance among its employees? And what is the approach to developing a pipeline of future leaders?"
20
- example_title: "Multi-Questions"
21
- - text: "How do the annual growth rates in international markets of GlobalMarket Expanders compare to those of OverseasGrowth Corp. in the market expansion services sector?"
22
- example_title: "Comparative Question"
23
  ---
24
 
25
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
26
  should probably proofread and complete it, then remove this comment. -->
27
 
28
  # deberta-v3-bass-complex-questions_classifier
29
- This model is a fine-tuned version of [sileod/deberta-v3-base-tasksource-nli](https://huggingface.co/sileod/deberta-v3-base-tasksource-nli) on an unknown dataset. It is designed to classify questions into three categories: simple, multi, and compare.
30
 
 
31
  It achieves the following results on the evaluation set:
32
- - Loss: 0.0
33
  - Accuracy: 1.0
34
  - Precision: 1.0
35
  - Recall: 1.0
@@ -37,19 +30,15 @@ It achieves the following results on the evaluation set:
37
 
38
  ## Model description
39
 
40
- The model is trained to classify the type of questions based on their complexity:
41
- - **Simple:** Contains one and ONLY one question.
42
- - **Multi:** Contains 2 or more questions.
43
- - **Compare:** Involves direct comparisons using specific, invented company names or refers to different aspects within the same company.
44
-
45
 
46
  ## Intended uses & limitations
47
 
48
- This model can be used for question classification tasks, such as organizing large datasets of questions or automating question routing in customer service systems. However, it may not generalize well to questions outside the scope of the training data, or questions in languages other than English.
49
 
50
  ## Training and evaluation data
51
 
52
- The training and evaluation datasets used for fine-tuning this model can be found in the "data" folder. They contain labeled questions categorized as simple, multi, and compare to facilitate training and evaluation of the model.
53
 
54
  ## Training procedure
55
 
@@ -66,16 +55,14 @@ The following hyperparameters were used during training:
66
 
67
  ### Training results
68
 
69
- The model achieves the following results on the evaluation set:
70
- - Loss: 0.0
71
- - Accuracy: 1.0
72
- - Precision: 1.0
73
- - Recall: 1.0
74
- - F1: 1.0
75
 
76
  ### Framework versions
77
 
78
- - Transformers 4.38.2
79
  - Pytorch 2.1.1
80
  - Datasets 2.15.0
81
- - Tokenizers 0.15.2
 
2
  language:
3
  - en
4
  license: apache-2.0
5
+ base_model: sileod/deberta-v3-base-tasksource-nli
6
  tags:
7
  - generated_from_trainer
8
  metrics:
 
10
  - precision
11
  - recall
12
  - f1
 
13
  model-index:
14
  - name: deberta-v3-bass-complex-questions_classifier
15
  results: []
 
 
 
 
 
 
 
16
  ---
17
 
18
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
19
  should probably proofread and complete it, then remove this comment. -->
20
 
21
  # deberta-v3-bass-complex-questions_classifier
 
22
 
23
+ This model is a fine-tuned version of [sileod/deberta-v3-base-tasksource-nli](https://huggingface.co/sileod/deberta-v3-base-tasksource-nli) on an unknown dataset.
24
  It achieves the following results on the evaluation set:
25
+ - Loss: 0.0001
26
  - Accuracy: 1.0
27
  - Precision: 1.0
28
  - Recall: 1.0
 
30
 
31
  ## Model description
32
 
33
+ More information needed
 
 
 
 
34
 
35
  ## Intended uses & limitations
36
 
37
+ More information needed
38
 
39
  ## Training and evaluation data
40
 
41
+ More information needed
42
 
43
  ## Training procedure
44
 
 
55
 
56
  ### Training results
57
 
58
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 |
59
+ |:-------------:|:------:|:----:|:---------------:|:--------:|:---------:|:------:|:---:|
60
+ | 0.0532 | 2.3585 | 500 | 0.0001 | 1.0 | 1.0 | 1.0 | 1.0 |
61
+
 
 
62
 
63
  ### Framework versions
64
 
65
+ - Transformers 4.41.2
66
  - Pytorch 2.1.1
67
  - Datasets 2.15.0
68
+ - Tokenizers 0.19.1
all_results.json CHANGED
@@ -4,8 +4,8 @@
4
  "eval_F1": 1.0,
5
  "eval_Precision": 1.0,
6
  "eval_Recall": 1.0,
7
- "eval_loss": 0.0,
8
- "eval_runtime": 1.5241,
9
- "eval_samples_per_second": 99.732,
10
- "eval_steps_per_second": 12.466
11
  }
 
4
  "eval_F1": 1.0,
5
  "eval_Precision": 1.0,
6
  "eval_Recall": 1.0,
7
+ "eval_loss": 0.00012063107715221122,
8
+ "eval_runtime": 2.3163,
9
+ "eval_samples_per_second": 91.524,
10
+ "eval_steps_per_second": 11.656
11
  }
config.json CHANGED
@@ -517,13 +517,17 @@
517
  "id2label": {
518
  "0": "multi",
519
  "1": "simple",
520
- "2": "compare"
 
 
521
  },
522
  "initializer_range": 0.02,
523
  "intermediate_size": 3072,
524
  "label2id": {
525
  "compare": 2,
 
526
  "multi": 0,
 
527
  "simple": 1
528
  },
529
  "layer_norm_eps": 1e-07,
@@ -1053,7 +1057,7 @@
1053
  "mmlu-4"
1054
  ],
1055
  "torch_dtype": "float32",
1056
- "transformers_version": "4.38.2",
1057
  "type_vocab_size": 0,
1058
  "vocab_size": 128100
1059
  }
 
517
  "id2label": {
518
  "0": "multi",
519
  "1": "simple",
520
+ "2": "compare",
521
+ "3": "functional",
522
+ "4": "phatics"
523
  },
524
  "initializer_range": 0.02,
525
  "intermediate_size": 3072,
526
  "label2id": {
527
  "compare": 2,
528
+ "functional": 3,
529
  "multi": 0,
530
+ "phatics": 4,
531
  "simple": 1
532
  },
533
  "layer_norm_eps": 1e-07,
 
1057
  "mmlu-4"
1058
  ],
1059
  "torch_dtype": "float32",
1060
+ "transformers_version": "4.41.2",
1061
  "type_vocab_size": 0,
1062
  "vocab_size": 128100
1063
  }
eval_results.json CHANGED
@@ -4,8 +4,8 @@
4
  "eval_F1": 1.0,
5
  "eval_Precision": 1.0,
6
  "eval_Recall": 1.0,
7
- "eval_loss": 0.0,
8
- "eval_runtime": 1.5241,
9
- "eval_samples_per_second": 99.732,
10
- "eval_steps_per_second": 12.466
11
  }
 
4
  "eval_F1": 1.0,
5
  "eval_Precision": 1.0,
6
  "eval_Recall": 1.0,
7
+ "eval_loss": 0.00012063107715221122,
8
+ "eval_runtime": 2.3163,
9
+ "eval_samples_per_second": 91.524,
10
+ "eval_steps_per_second": 11.656
11
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33c06471fbb9f7e92bcf178661da8f9e4390ba2e407dbc3c82c352906790466f
3
- size 737722356
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb041a39d47f74a7a5a3e9017a757a4ea98c8c60ba2dc1e84362c490b04969f3
3
+ size 737728508
runs/Jun03_13-17-09_Nogas-MacBookPro.local/events.out.tfevents.1717409830.Nogas-MacBookPro.local.49918.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfeedf8465204e37f797e5b598bc0371fa02f84570b1c6d5ccf3fd6286688c5f
3
+ size 24123
runs/Jun03_13-17-09_Nogas-MacBookPro.local/events.out.tfevents.1717410191.Nogas-MacBookPro.local.49918.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45489a0dbdc089a95da848749e6e8ec6bd4ba9dfe28a5dd013cf375d7cdebc3a
3
+ size 1976
runs/May23_14-54-53_NogasMacBookPro/events.out.tfevents.1716465295.NogasMacBookPro.52914.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b94c3dd58790b088e51adfc81a8c4f4335b50e03c257acbcab315f1e435b48c3
3
+ size 23870
runs/May23_14-54-53_NogasMacBookPro/events.out.tfevents.1716465540.NogasMacBookPro.52914.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd9cbff03c5846632b6e84463cc4460988773caea11b56b6b911bd914b3383c3
3
+ size 1032
train_results.json CHANGED
@@ -4,8 +4,8 @@
4
  "eval_F1": 1.0,
5
  "eval_Precision": 1.0,
6
  "eval_Recall": 1.0,
7
- "eval_loss": 0.0,
8
- "eval_runtime": 1.5401,
9
- "eval_samples_per_second": 98.697,
10
- "eval_steps_per_second": 12.337
11
  }
 
4
  "eval_F1": 1.0,
5
  "eval_Precision": 1.0,
6
  "eval_Recall": 1.0,
7
+ "eval_loss": 0.0001206793385790661,
8
+ "eval_runtime": 3.4495,
9
+ "eval_samples_per_second": 61.459,
10
+ "eval_steps_per_second": 7.827
11
  }
trainer_state.json CHANGED
@@ -1,21 +1,40 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 456,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 3.0,
13
- "step": 456,
14
- "total_flos": 131122250941500.0,
15
- "train_loss": 4.576031642500311e-07,
16
- "train_runtime": 158.1929,
17
- "train_samples_per_second": 23.041,
18
- "train_steps_per_second": 2.883
19
  },
20
  {
21
  "epoch": 3.0,
@@ -23,11 +42,11 @@
23
  "eval_F1": 1.0,
24
  "eval_Precision": 1.0,
25
  "eval_Recall": 1.0,
26
- "eval_loss": 0.0,
27
- "eval_runtime": 1.53,
28
- "eval_samples_per_second": 99.348,
29
- "eval_steps_per_second": 12.418,
30
- "step": 456
31
  },
32
  {
33
  "epoch": 3.0,
@@ -35,11 +54,11 @@
35
  "eval_F1": 1.0,
36
  "eval_Precision": 1.0,
37
  "eval_Recall": 1.0,
38
- "eval_loss": 0.0,
39
- "eval_runtime": 1.5259,
40
- "eval_samples_per_second": 99.613,
41
- "eval_steps_per_second": 12.452,
42
- "step": 456
43
  },
44
  {
45
  "epoch": 3.0,
@@ -47,19 +66,40 @@
47
  "eval_F1": 1.0,
48
  "eval_Precision": 1.0,
49
  "eval_Recall": 1.0,
50
- "eval_loss": 0.0,
51
- "eval_runtime": 1.5401,
52
- "eval_samples_per_second": 98.697,
53
- "eval_steps_per_second": 12.337,
54
- "step": 456
55
  }
56
  ],
57
  "logging_steps": 500,
58
- "max_steps": 456,
59
  "num_input_tokens_seen": 0,
60
  "num_train_epochs": 3,
61
  "save_steps": 500,
62
- "total_flos": 131122250941500.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  "train_batch_size": 8,
64
  "trial_name": null,
65
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.0001206793385790661,
3
+ "best_model_checkpoint": "/Users/noga/Work_folder/Notebooks/LLM/classifier/new_classifier/complex_q_model/deberta-v3-bass-complex-questions_classifier/checkpoint-500",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 636,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
+ {
12
+ "epoch": 2.358490566037736,
13
+ "grad_norm": 0.005825825501233339,
14
+ "learning_rate": 1.069182389937107e-05,
15
+ "loss": 0.0532,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 2.358490566037736,
20
+ "eval_Accuracy": 1.0,
21
+ "eval_F1": 1.0,
22
+ "eval_Precision": 1.0,
23
+ "eval_Recall": 1.0,
24
+ "eval_loss": 0.0001206793385790661,
25
+ "eval_runtime": 4.3,
26
+ "eval_samples_per_second": 49.303,
27
+ "eval_steps_per_second": 6.279,
28
+ "step": 500
29
+ },
30
  {
31
  "epoch": 3.0,
32
+ "step": 636,
33
+ "total_flos": 175087193647410.0,
34
+ "train_loss": 0.04188158509538234,
35
+ "train_runtime": 276.1616,
36
+ "train_samples_per_second": 18.413,
37
+ "train_steps_per_second": 2.303
38
  },
39
  {
40
  "epoch": 3.0,
 
42
  "eval_F1": 1.0,
43
  "eval_Precision": 1.0,
44
  "eval_Recall": 1.0,
45
+ "eval_loss": 0.0001206793385790661,
46
+ "eval_runtime": 3.6877,
47
+ "eval_samples_per_second": 57.489,
48
+ "eval_steps_per_second": 7.322,
49
+ "step": 636
50
  },
51
  {
52
  "epoch": 3.0,
 
54
  "eval_F1": 1.0,
55
  "eval_Precision": 1.0,
56
  "eval_Recall": 1.0,
57
+ "eval_loss": 0.0001206793385790661,
58
+ "eval_runtime": 3.1147,
59
+ "eval_samples_per_second": 68.065,
60
+ "eval_steps_per_second": 8.669,
61
+ "step": 636
62
  },
63
  {
64
  "epoch": 3.0,
 
66
  "eval_F1": 1.0,
67
  "eval_Precision": 1.0,
68
  "eval_Recall": 1.0,
69
+ "eval_loss": 0.0001206793385790661,
70
+ "eval_runtime": 3.4495,
71
+ "eval_samples_per_second": 61.459,
72
+ "eval_steps_per_second": 7.827,
73
+ "step": 636
74
  }
75
  ],
76
  "logging_steps": 500,
77
+ "max_steps": 636,
78
  "num_input_tokens_seen": 0,
79
  "num_train_epochs": 3,
80
  "save_steps": 500,
81
+ "stateful_callbacks": {
82
+ "EarlyStoppingCallback": {
83
+ "args": {
84
+ "early_stopping_patience": 3,
85
+ "early_stopping_threshold": 0.0
86
+ },
87
+ "attributes": {
88
+ "early_stopping_patience_counter": 0
89
+ }
90
+ },
91
+ "TrainerControl": {
92
+ "args": {
93
+ "should_epoch_stop": false,
94
+ "should_evaluate": false,
95
+ "should_log": false,
96
+ "should_save": true,
97
+ "should_training_stop": false
98
+ },
99
+ "attributes": {}
100
+ }
101
+ },
102
+ "total_flos": 175087193647410.0,
103
  "train_batch_size": 8,
104
  "trial_name": null,
105
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec6c424cf30f375e05290cbeb72eb24dcaf90f1d416b4aa9439896ec9edfc099
3
- size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78ffb207fc30f1f375edfcc5b246e89aa5c93ddbee9efd8d848851fa1badbc73
3
+ size 5304