Rodrigo1771 commited on
Commit
b60a1c8
·
verified ·
1 Parent(s): 0a2088d

End of training

Browse files
README.md CHANGED
@@ -3,9 +3,10 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
5
  tags:
 
6
  - generated_from_trainer
7
  datasets:
8
- - symptemist-75-ner
9
  metrics:
10
  - precision
11
  - recall
@@ -18,24 +19,24 @@ model-index:
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
- name: symptemist-75-ner
22
- type: symptemist-75-ner
23
  config: SympTEMIST NER
24
  split: validation
25
  args: SympTEMIST NER
26
  metrics:
27
  - name: Precision
28
  type: precision
29
- value: 0.6814159292035398
30
  - name: Recall
31
  type: recall
32
- value: 0.7164750957854407
33
  - name: F1
34
  type: f1
35
- value: 0.6985058697972252
36
  - name: Accuracy
37
  type: accuracy
38
- value: 0.9498861047835991
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -43,13 +44,13 @@ should probably proofread and complete it, then remove this comment. -->
43
 
44
  # output
45
 
46
- This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the symptemist-75-ner dataset.
47
  It achieves the following results on the evaluation set:
48
- - Loss: 0.3328
49
- - Precision: 0.6814
50
- - Recall: 0.7165
51
  - F1: 0.6985
52
- - Accuracy: 0.9499
53
 
54
  ## Model description
55
 
 
3
  license: apache-2.0
4
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
5
  tags:
6
+ - token-classification
7
  - generated_from_trainer
8
  datasets:
9
+ - Rodrigo1771/symptemist-75-ner
10
  metrics:
11
  - precision
12
  - recall
 
19
  name: Token Classification
20
  type: token-classification
21
  dataset:
22
+ name: Rodrigo1771/symptemist-75-ner
23
+ type: Rodrigo1771/symptemist-75-ner
24
  config: SympTEMIST NER
25
  split: validation
26
  args: SympTEMIST NER
27
  metrics:
28
  - name: Precision
29
  type: precision
30
+ value: 0.6896
31
  - name: Recall
32
  type: recall
33
+ value: 0.7077175697865353
34
  - name: F1
35
  type: f1
36
+ value: 0.6985413290113451
37
  - name: Accuracy
38
  type: accuracy
39
+ value: 0.9496936058263018
40
  ---
41
 
42
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
44
 
45
  # output
46
 
47
+ This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/symptemist-75-ner dataset.
48
  It achieves the following results on the evaluation set:
49
+ - Loss: 0.3089
50
+ - Precision: 0.6896
51
+ - Recall: 0.7077
52
  - F1: 0.6985
53
+ - Accuracy: 0.9497
54
 
55
  ## Model description
56
 
all_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9496936058263018,
4
+ "eval_f1": 0.6985413290113451,
5
+ "eval_loss": 0.3088673949241638,
6
+ "eval_precision": 0.6896,
7
+ "eval_recall": 0.7077175697865353,
8
+ "eval_runtime": 5.5622,
9
+ "eval_samples": 2519,
10
+ "eval_samples_per_second": 452.882,
11
+ "eval_steps_per_second": 56.633,
12
+ "predict_accuracy": 0.9466344311112421,
13
+ "predict_f1": 0.6937328822297406,
14
+ "predict_loss": 0.33691754937171936,
15
+ "predict_precision": 0.694516129032258,
16
+ "predict_recall": 0.6929514000643707,
17
+ "predict_runtime": 8.8756,
18
+ "predict_samples_per_second": 455.971,
19
+ "predict_steps_per_second": 57.01,
20
+ "total_flos": 7718163558521760.0,
21
+ "train_loss": 0.03527186407196906,
22
+ "train_runtime": 706.0488,
23
+ "train_samples": 15848,
24
+ "train_samples_per_second": 224.46,
25
+ "train_steps_per_second": 3.513
26
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9496936058263018,
4
+ "eval_f1": 0.6985413290113451,
5
+ "eval_loss": 0.3088673949241638,
6
+ "eval_precision": 0.6896,
7
+ "eval_recall": 0.7077175697865353,
8
+ "eval_runtime": 5.5622,
9
+ "eval_samples": 2519,
10
+ "eval_samples_per_second": 452.882,
11
+ "eval_steps_per_second": 56.633
12
+ }
predict_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_accuracy": 0.9466344311112421,
3
+ "predict_f1": 0.6937328822297406,
4
+ "predict_loss": 0.33691754937171936,
5
+ "predict_precision": 0.694516129032258,
6
+ "predict_recall": 0.6929514000643707,
7
+ "predict_runtime": 8.8756,
8
+ "predict_samples_per_second": 455.971,
9
+ "predict_steps_per_second": 57.01
10
+ }
predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
tb/events.out.tfevents.1725475205.a5c501872057.1590.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac1c4dd87538fa491bda1fedf86880bd54d355c94e361fe11b34dd49f825123b
3
+ size 560
train.log CHANGED
@@ -860,3 +860,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
860
  {'eval_loss': 0.33284899592399597, 'eval_precision': 0.6814159292035398, 'eval_recall': 0.7164750957854407, 'eval_f1': 0.6985058697972252, 'eval_accuracy': 0.9498861047835991, 'eval_runtime': 5.5975, 'eval_samples_per_second': 450.025, 'eval_steps_per_second': 56.276, 'epoch': 10.0}
861
  {'train_runtime': 706.0488, 'train_samples_per_second': 224.46, 'train_steps_per_second': 3.513, 'train_loss': 0.03527186407196906, 'epoch': 10.0}
862
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
863
  0%| | 0/315 [00:00<?, ?it/s]
864
  3%|▎ | 9/315 [00:00<00:03, 80.04it/s]
865
  6%|▌ | 18/315 [00:00<00:03, 81.66it/s]
866
  9%|▊ | 27/315 [00:00<00:03, 81.02it/s]
867
  11%|█▏ | 36/315 [00:00<00:03, 82.17it/s]
868
  14%|█▍ | 45/315 [00:00<00:03, 82.54it/s]
869
  17%|█▋ | 54/315 [00:00<00:03, 82.93it/s]
870
  20%|██ | 63/315 [00:00<00:03, 80.55it/s]
871
  23%|██▎ | 72/315 [00:00<00:02, 81.25it/s]
872
  26%|██▌ | 81/315 [00:01<00:02, 79.65it/s]
873
  28%|██▊ | 89/315 [00:01<00:02, 79.00it/s]
874
  31%|███ | 97/315 [00:01<00:02, 78.04it/s]
875
  34%|███▎ | 106/315 [00:01<00:02, 79.53it/s]
876
  37%|███▋ | 115/315 [00:01<00:02, 80.97it/s]
877
  39%|███▉ | 124/315 [00:01<00:02, 79.09it/s]
878
  42%|████▏ | 133/315 [00:01<00:02, 79.74it/s]
879
  45%|████▌ | 142/315 [00:01<00:02, 79.98it/s]
880
  48%|████▊ | 151/315 [00:01<00:02, 81.79it/s]
881
  51%|█████ | 160/315 [00:01<00:01, 81.15it/s]
882
  54%|█████▎ | 169/315 [00:02<00:01, 81.16it/s]
883
  57%|█████▋ | 178/315 [00:02<00:01, 81.46it/s]
884
  59%|█████▉ | 187/315 [00:02<00:01, 81.11it/s]
885
  62%|██████▏ | 196/315 [00:02<00:01, 80.61it/s]
886
  65%|██████▌ | 205/315 [00:02<00:01, 78.27it/s]
887
  68%|██████▊ | 214/315 [00:02<00:01, 80.07it/s]
888
  71%|███████ | 223/315 [00:02<00:01, 80.98it/s]
889
  74%|███████▎ | 232/315 [00:02<00:01, 82.35it/s]
890
  77%|███████▋ | 241/315 [00:02<00:00, 81.14it/s]
891
  79%|███████▉ | 250/315 [00:03<00:00, 81.51it/s]
892
  82%|████████▏ | 259/315 [00:03<00:00, 81.21it/s]
893
  85%|████████▌ | 268/315 [00:03<00:00, 81.50it/s]
894
  88%|████████▊ | 277/315 [00:03<00:00, 82.85it/s]
895
  91%|█████████ | 286/315 [00:03<00:00, 80.71it/s]
896
  94%|█████████▎| 295/315 [00:03<00:00, 80.77it/s]
897
  97%|█████████▋| 304/315 [00:03<00:00, 81.83it/s]
898
  99%|█████████▉| 313/315 [00:03<00:00, 82.13it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
899
  0%| | 0/506 [00:00<?, ?it/s]
900
  2%|▏ | 10/506 [00:00<00:05, 95.19it/s]
901
  4%|▍ | 20/506 [00:00<00:05, 85.84it/s]
902
  6%|▌ | 29/506 [00:00<00:05, 85.46it/s]
903
  8%|▊ | 38/506 [00:00<00:05, 83.57it/s]
904
  9%|▉ | 47/506 [00:00<00:05, 83.68it/s]
905
  11%|█ | 56/506 [00:00<00:05, 84.03it/s]
906
  13%|█▎ | 65/506 [00:00<00:05, 82.78it/s]
907
  15%|█▍ | 74/506 [00:00<00:05, 82.71it/s]
908
  16%|█▋ | 83/506 [00:01<00:05, 77.99it/s]
909
  18%|█▊ | 91/506 [00:01<00:05, 78.20it/s]
910
  20%|█▉ | 100/506 [00:01<00:05, 79.97it/s]
911
  22%|██▏ | 109/506 [00:01<00:04, 79.45it/s]
912
  23%|██▎ | 118/506 [00:01<00:04, 80.16it/s]
913
  25%|██▌ | 127/506 [00:01<00:04, 78.08it/s]
914
  27%|██▋ | 135/506 [00:01<00:05, 72.06it/s]
915
  28%|██▊ | 144/506 [00:01<00:04, 75.38it/s]
916
  30%|███ | 152/506 [00:01<00:04, 75.99it/s]
917
  32%|███▏ | 160/506 [00:02<00:04, 74.53it/s]
918
  33%|███▎ | 168/506 [00:02<00:04, 76.02it/s]
919
  35%|███▍ | 177/506 [00:02<00:04, 77.49it/s]
920
  37%|███▋ | 186/506 [00:02<00:04, 79.41it/s]
921
  39%|███▊ | 195/506 [00:02<00:03, 79.79it/s]
922
  40%|████ | 204/506 [00:02<00:03, 80.39it/s]
923
  42%|████▏ | 213/506 [00:02<00:03, 80.49it/s]
924
  44%|████▍ | 222/506 [00:02<00:03, 78.46it/s]
925
  45%|████▌ | 230/506 [00:02<00:03, 77.13it/s]
926
  47%|████▋ | 238/506 [00:03<00:03, 77.47it/s]
927
  49%|████▉ | 247/506 [00:03<00:03, 79.02it/s]
928
  50%|█████ | 255/506 [00:03<00:03, 78.64it/s]
929
  52%|█████▏ | 264/506 [00:03<00:03, 80.02it/s]
930
  54%|█████▍ | 273/506 [00:03<00:02, 81.15it/s]
931
  56%|█████▌ | 282/506 [00:03<00:02, 80.47it/s]
932
  58%|█████▊ | 291/506 [00:03<00:02, 80.42it/s]
933
  59%|█████▉ | 300/506 [00:03<00:02, 81.29it/s]
934
  61%|██████ | 309/506 [00:03<00:02, 80.93it/s]
935
  63%|██████▎ | 318/506 [00:03<00:02, 81.34it/s]
936
  65%|██████▍ | 327/506 [00:04<00:02, 82.42it/s]
937
  66%|██████▋ | 336/506 [00:04<00:02, 82.95it/s]
938
  68%|██████▊ | 345/506 [00:04<00:01, 83.14it/s]
939
  70%|██████▉ | 354/506 [00:04<00:01, 79.24it/s]
940
  72%|███████▏ | 362/506 [00:04<00:01, 75.32it/s]
941
  73%|███████▎ | 370/506 [00:04<00:01, 72.65it/s]
942
  75%|███████▍ | 378/506 [00:04<00:01, 71.57it/s]
943
  76%|███████▋ | 386/506 [00:04<00:01, 69.45it/s]
944
  78%|███████▊ | 393/506 [00:05<00:01, 69.06it/s]
945
  79%|███████▉ | 401/506 [00:05<00:01, 69.56it/s]
946
  81%|████████ | 409/506 [00:05<00:01, 70.60it/s]
947
  82%|████████▏ | 417/506 [00:05<00:01, 73.12it/s]
948
  84%|████████▍ | 425/506 [00:05<00:01, 74.82it/s]
949
  86%|████████▌ | 434/506 [00:05<00:00, 76.80it/s]
950
  88%|████████▊ | 443/506 [00:05<00:00, 78.43it/s]
951
  89%|████████▉ | 451/506 [00:05<00:00, 78.15it/s]
952
  91%|█████████ | 460/506 [00:05<00:00, 80.40it/s]
953
  93%|█████████▎| 469/506 [00:05<00:00, 81.42it/s]
954
  94%|█████████▍| 478/506 [00:06<00:00, 82.24it/s]
955
  96%|█████████▌| 487/506 [00:06<00:00, 80.66it/s]
956
  98%|█████████▊| 496/506 [00:06<00:00, 81.16it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
860
  {'eval_loss': 0.33284899592399597, 'eval_precision': 0.6814159292035398, 'eval_recall': 0.7164750957854407, 'eval_f1': 0.6985058697972252, 'eval_accuracy': 0.9498861047835991, 'eval_runtime': 5.5975, 'eval_samples_per_second': 450.025, 'eval_steps_per_second': 56.276, 'epoch': 10.0}
861
  {'train_runtime': 706.0488, 'train_samples_per_second': 224.46, 'train_steps_per_second': 3.513, 'train_loss': 0.03527186407196906, 'epoch': 10.0}
862
 
863
+ ***** train metrics *****
864
+ epoch = 10.0
865
+ total_flos = 7188099GF
866
+ train_loss = 0.0353
867
+ train_runtime = 0:11:46.04
868
+ train_samples = 15848
869
+ train_samples_per_second = 224.46
870
+ train_steps_per_second = 3.513
871
+ 09/04/2024 18:39:59 - INFO - __main__ - *** Evaluate ***
872
+ [INFO|trainer.py:811] 2024-09-04 18:39:59,625 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
873
+ [INFO|trainer.py:3819] 2024-09-04 18:39:59,627 >>
874
+ ***** Running Evaluation *****
875
+ [INFO|trainer.py:3821] 2024-09-04 18:39:59,627 >> Num examples = 2519
876
+ [INFO|trainer.py:3824] 2024-09-04 18:39:59,627 >> Batch size = 8
877
+
878
  0%| | 0/315 [00:00<?, ?it/s]
879
  3%|▎ | 9/315 [00:00<00:03, 80.04it/s]
880
  6%|▌ | 18/315 [00:00<00:03, 81.66it/s]
881
  9%|▊ | 27/315 [00:00<00:03, 81.02it/s]
882
  11%|█▏ | 36/315 [00:00<00:03, 82.17it/s]
883
  14%|█▍ | 45/315 [00:00<00:03, 82.54it/s]
884
  17%|█▋ | 54/315 [00:00<00:03, 82.93it/s]
885
  20%|██ | 63/315 [00:00<00:03, 80.55it/s]
886
  23%|██▎ | 72/315 [00:00<00:02, 81.25it/s]
887
  26%|██▌ | 81/315 [00:01<00:02, 79.65it/s]
888
  28%|██▊ | 89/315 [00:01<00:02, 79.00it/s]
889
  31%|███ | 97/315 [00:01<00:02, 78.04it/s]
890
  34%|███▎ | 106/315 [00:01<00:02, 79.53it/s]
891
  37%|███▋ | 115/315 [00:01<00:02, 80.97it/s]
892
  39%|███▉ | 124/315 [00:01<00:02, 79.09it/s]
893
  42%|████▏ | 133/315 [00:01<00:02, 79.74it/s]
894
  45%|████▌ | 142/315 [00:01<00:02, 79.98it/s]
895
  48%|████▊ | 151/315 [00:01<00:02, 81.79it/s]
896
  51%|█████ | 160/315 [00:01<00:01, 81.15it/s]
897
  54%|█████▎ | 169/315 [00:02<00:01, 81.16it/s]
898
  57%|█████▋ | 178/315 [00:02<00:01, 81.46it/s]
899
  59%|█████▉ | 187/315 [00:02<00:01, 81.11it/s]
900
  62%|██████▏ | 196/315 [00:02<00:01, 80.61it/s]
901
  65%|██████▌ | 205/315 [00:02<00:01, 78.27it/s]
902
  68%|██████▊ | 214/315 [00:02<00:01, 80.07it/s]
903
  71%|███████ | 223/315 [00:02<00:01, 80.98it/s]
904
  74%|███████▎ | 232/315 [00:02<00:01, 82.35it/s]
905
  77%|███████▋ | 241/315 [00:02<00:00, 81.14it/s]
906
  79%|███████▉ | 250/315 [00:03<00:00, 81.51it/s]
907
  82%|████████▏ | 259/315 [00:03<00:00, 81.21it/s]
908
  85%|████████▌ | 268/315 [00:03<00:00, 81.50it/s]
909
  88%|████████▊ | 277/315 [00:03<00:00, 82.85it/s]
910
  91%|█████████ | 286/315 [00:03<00:00, 80.71it/s]
911
  94%|█████████▎| 295/315 [00:03<00:00, 80.77it/s]
912
  97%|█████████▋| 304/315 [00:03<00:00, 81.83it/s]
913
  99%|█████████▉| 313/315 [00:03<00:00, 82.13it/s]
914
+ ***** eval metrics *****
915
+ epoch = 10.0
916
+ eval_accuracy = 0.9497
917
+ eval_f1 = 0.6985
918
+ eval_loss = 0.3089
919
+ eval_precision = 0.6896
920
+ eval_recall = 0.7077
921
+ eval_runtime = 0:00:05.56
922
+ eval_samples = 2519
923
+ eval_samples_per_second = 452.882
924
+ eval_steps_per_second = 56.633
925
+ 09/04/2024 18:40:05 - INFO - __main__ - *** Predict ***
926
+ [INFO|trainer.py:811] 2024-09-04 18:40:05,192 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
927
+ [INFO|trainer.py:3819] 2024-09-04 18:40:05,194 >>
928
+ ***** Running Prediction *****
929
+ [INFO|trainer.py:3821] 2024-09-04 18:40:05,194 >> Num examples = 4047
930
+ [INFO|trainer.py:3824] 2024-09-04 18:40:05,194 >> Batch size = 8
931
+
932
  0%| | 0/506 [00:00<?, ?it/s]
933
  2%|▏ | 10/506 [00:00<00:05, 95.19it/s]
934
  4%|▍ | 20/506 [00:00<00:05, 85.84it/s]
935
  6%|▌ | 29/506 [00:00<00:05, 85.46it/s]
936
  8%|▊ | 38/506 [00:00<00:05, 83.57it/s]
937
  9%|▉ | 47/506 [00:00<00:05, 83.68it/s]
938
  11%|█ | 56/506 [00:00<00:05, 84.03it/s]
939
  13%|█▎ | 65/506 [00:00<00:05, 82.78it/s]
940
  15%|█▍ | 74/506 [00:00<00:05, 82.71it/s]
941
  16%|█▋ | 83/506 [00:01<00:05, 77.99it/s]
942
  18%|█▊ | 91/506 [00:01<00:05, 78.20it/s]
943
  20%|█▉ | 100/506 [00:01<00:05, 79.97it/s]
944
  22%|██▏ | 109/506 [00:01<00:04, 79.45it/s]
945
  23%|██▎ | 118/506 [00:01<00:04, 80.16it/s]
946
  25%|██▌ | 127/506 [00:01<00:04, 78.08it/s]
947
  27%|██▋ | 135/506 [00:01<00:05, 72.06it/s]
948
  28%|██▊ | 144/506 [00:01<00:04, 75.38it/s]
949
  30%|███ | 152/506 [00:01<00:04, 75.99it/s]
950
  32%|███▏ | 160/506 [00:02<00:04, 74.53it/s]
951
  33%|███▎ | 168/506 [00:02<00:04, 76.02it/s]
952
  35%|███▍ | 177/506 [00:02<00:04, 77.49it/s]
953
  37%|███▋ | 186/506 [00:02<00:04, 79.41it/s]
954
  39%|███▊ | 195/506 [00:02<00:03, 79.79it/s]
955
  40%|████ | 204/506 [00:02<00:03, 80.39it/s]
956
  42%|████▏ | 213/506 [00:02<00:03, 80.49it/s]
957
  44%|████▍ | 222/506 [00:02<00:03, 78.46it/s]
958
  45%|████▌ | 230/506 [00:02<00:03, 77.13it/s]
959
  47%|████▋ | 238/506 [00:03<00:03, 77.47it/s]
960
  49%|████▉ | 247/506 [00:03<00:03, 79.02it/s]
961
  50%|█████ | 255/506 [00:03<00:03, 78.64it/s]
962
  52%|█████▏ | 264/506 [00:03<00:03, 80.02it/s]
963
  54%|█████▍ | 273/506 [00:03<00:02, 81.15it/s]
964
  56%|█████▌ | 282/506 [00:03<00:02, 80.47it/s]
965
  58%|█████▊ | 291/506 [00:03<00:02, 80.42it/s]
966
  59%|█████▉ | 300/506 [00:03<00:02, 81.29it/s]
967
  61%|██████ | 309/506 [00:03<00:02, 80.93it/s]
968
  63%|██████▎ | 318/506 [00:03<00:02, 81.34it/s]
969
  65%|██████▍ | 327/506 [00:04<00:02, 82.42it/s]
970
  66%|██████▋ | 336/506 [00:04<00:02, 82.95it/s]
971
  68%|██████▊ | 345/506 [00:04<00:01, 83.14it/s]
972
  70%|██████▉ | 354/506 [00:04<00:01, 79.24it/s]
973
  72%|███████▏ | 362/506 [00:04<00:01, 75.32it/s]
974
  73%|███████▎ | 370/506 [00:04<00:01, 72.65it/s]
975
  75%|███████▍ | 378/506 [00:04<00:01, 71.57it/s]
976
  76%|███████▋ | 386/506 [00:04<00:01, 69.45it/s]
977
  78%|███████▊ | 393/506 [00:05<00:01, 69.06it/s]
978
  79%|███████▉ | 401/506 [00:05<00:01, 69.56it/s]
979
  81%|████████ | 409/506 [00:05<00:01, 70.60it/s]
980
  82%|████████▏ | 417/506 [00:05<00:01, 73.12it/s]
981
  84%|████████▍ | 425/506 [00:05<00:01, 74.82it/s]
982
  86%|████████▌ | 434/506 [00:05<00:00, 76.80it/s]
983
  88%|████████▊ | 443/506 [00:05<00:00, 78.43it/s]
984
  89%|████████▉ | 451/506 [00:05<00:00, 78.15it/s]
985
  91%|█████████ | 460/506 [00:05<00:00, 80.40it/s]
986
  93%|█████████▎| 469/506 [00:05<00:00, 81.42it/s]
987
  94%|█████████▍| 478/506 [00:06<00:00, 82.24it/s]
988
  96%|█████████▌| 487/506 [00:06<00:00, 80.66it/s]
989
  98%|█████████▊| 496/506 [00:06<00:00, 81.16it/s]
990
+ [INFO|trainer.py:3503] 2024-09-04 18:40:14,231 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
991
+ [INFO|configuration_utils.py:472] 2024-09-04 18:40:14,233 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
992
+ [INFO|modeling_utils.py:2799] 2024-09-04 18:40:15,628 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
993
+ [INFO|tokenization_utils_base.py:2684] 2024-09-04 18:40:15,629 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
994
+ [INFO|tokenization_utils_base.py:2693] 2024-09-04 18:40:15,629 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
995
+ ***** predict metrics *****
996
+ predict_accuracy = 0.9466
997
+ predict_f1 = 0.6937
998
+ predict_loss = 0.3369
999
+ predict_precision = 0.6945
1000
+ predict_recall = 0.693
1001
+ predict_runtime = 0:00:08.87
1002
+ predict_samples_per_second = 455.971
1003
+ predict_steps_per_second = 57.01
1004
+
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "total_flos": 7718163558521760.0,
4
+ "train_loss": 0.03527186407196906,
5
+ "train_runtime": 706.0488,
6
+ "train_samples": 15848,
7
+ "train_samples_per_second": 224.46,
8
+ "train_steps_per_second": 3.513
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6985413290113451,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1736",
4
+ "epoch": 10.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2480,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.9477686162533286,
14
+ "eval_f1": 0.63506625891947,
15
+ "eval_loss": 0.16486208140850067,
16
+ "eval_precision": 0.5941821649976157,
17
+ "eval_recall": 0.6819923371647509,
18
+ "eval_runtime": 5.5136,
19
+ "eval_samples_per_second": 456.871,
20
+ "eval_steps_per_second": 57.132,
21
+ "step": 248
22
+ },
23
+ {
24
+ "epoch": 2.0,
25
+ "eval_accuracy": 0.9476402836151304,
26
+ "eval_f1": 0.6630581867388362,
27
+ "eval_loss": 0.18148483335971832,
28
+ "eval_precision": 0.6557815845824411,
29
+ "eval_recall": 0.6704980842911877,
30
+ "eval_runtime": 5.4181,
31
+ "eval_samples_per_second": 464.925,
32
+ "eval_steps_per_second": 58.139,
33
+ "step": 496
34
+ },
35
+ {
36
+ "epoch": 2.0161290322580645,
37
+ "grad_norm": 0.6250831484794617,
38
+ "learning_rate": 3.991935483870968e-05,
39
+ "loss": 0.134,
40
+ "step": 500
41
+ },
42
+ {
43
+ "epoch": 3.0,
44
+ "eval_accuracy": 0.9491802752735089,
45
+ "eval_f1": 0.6810897435897436,
46
+ "eval_loss": 0.2111387550830841,
47
+ "eval_precision": 0.6651017214397497,
48
+ "eval_recall": 0.6978653530377669,
49
+ "eval_runtime": 5.4844,
50
+ "eval_samples_per_second": 459.302,
51
+ "eval_steps_per_second": 57.435,
52
+ "step": 744
53
+ },
54
+ {
55
+ "epoch": 4.0,
56
+ "eval_accuracy": 0.9488434020982386,
57
+ "eval_f1": 0.6900026518164943,
58
+ "eval_loss": 0.25230270624160767,
59
+ "eval_precision": 0.6692386831275721,
60
+ "eval_recall": 0.7120963327859879,
61
+ "eval_runtime": 5.4481,
62
+ "eval_samples_per_second": 462.36,
63
+ "eval_steps_per_second": 57.818,
64
+ "step": 992
65
+ },
66
+ {
67
+ "epoch": 4.032258064516129,
68
+ "grad_norm": 0.7998089790344238,
69
+ "learning_rate": 2.9838709677419357e-05,
70
+ "loss": 0.026,
71
+ "step": 1000
72
+ },
73
+ {
74
+ "epoch": 5.0,
75
+ "eval_accuracy": 0.9490840257948603,
76
+ "eval_f1": 0.6847083552285864,
77
+ "eval_loss": 0.27709877490997314,
78
+ "eval_precision": 0.6584133400707428,
79
+ "eval_recall": 0.7131910235358512,
80
+ "eval_runtime": 5.6532,
81
+ "eval_samples_per_second": 445.585,
82
+ "eval_steps_per_second": 55.72,
83
+ "step": 1240
84
+ },
85
+ {
86
+ "epoch": 6.0,
87
+ "eval_accuracy": 0.9486348615611665,
88
+ "eval_f1": 0.6907651715039579,
89
+ "eval_loss": 0.2968369126319885,
90
+ "eval_precision": 0.6668364747834946,
91
+ "eval_recall": 0.7164750957854407,
92
+ "eval_runtime": 5.4549,
93
+ "eval_samples_per_second": 461.787,
94
+ "eval_steps_per_second": 57.746,
95
+ "step": 1488
96
+ },
97
+ {
98
+ "epoch": 6.048387096774194,
99
+ "grad_norm": 0.15673314034938812,
100
+ "learning_rate": 1.975806451612903e-05,
101
+ "loss": 0.0084,
102
+ "step": 1500
103
+ },
104
+ {
105
+ "epoch": 7.0,
106
+ "eval_accuracy": 0.9496936058263018,
107
+ "eval_f1": 0.6985413290113451,
108
+ "eval_loss": 0.3088673949241638,
109
+ "eval_precision": 0.6896,
110
+ "eval_recall": 0.7077175697865353,
111
+ "eval_runtime": 5.5771,
112
+ "eval_samples_per_second": 451.669,
113
+ "eval_steps_per_second": 56.481,
114
+ "step": 1736
115
+ },
116
+ {
117
+ "epoch": 8.0,
118
+ "eval_accuracy": 0.9498861047835991,
119
+ "eval_f1": 0.6946236559139785,
120
+ "eval_loss": 0.31877079606056213,
121
+ "eval_precision": 0.6825145272054939,
122
+ "eval_recall": 0.7071702244116037,
123
+ "eval_runtime": 5.3015,
124
+ "eval_samples_per_second": 475.15,
125
+ "eval_steps_per_second": 59.417,
126
+ "step": 1984
127
+ },
128
+ {
129
+ "epoch": 8.064516129032258,
130
+ "grad_norm": 0.22283445298671722,
131
+ "learning_rate": 9.67741935483871e-06,
132
+ "loss": 0.0042,
133
+ "step": 2000
134
+ },
135
+ {
136
+ "epoch": 9.0,
137
+ "eval_accuracy": 0.9494529821296801,
138
+ "eval_f1": 0.6979722518676629,
139
+ "eval_loss": 0.3295721411705017,
140
+ "eval_precision": 0.6808953669963561,
141
+ "eval_recall": 0.715927750410509,
142
+ "eval_runtime": 5.4512,
143
+ "eval_samples_per_second": 462.102,
144
+ "eval_steps_per_second": 57.786,
145
+ "step": 2232
146
+ },
147
+ {
148
+ "epoch": 10.0,
149
+ "eval_accuracy": 0.9498861047835991,
150
+ "eval_f1": 0.6985058697972252,
151
+ "eval_loss": 0.33284899592399597,
152
+ "eval_precision": 0.6814159292035398,
153
+ "eval_recall": 0.7164750957854407,
154
+ "eval_runtime": 5.5975,
155
+ "eval_samples_per_second": 450.025,
156
+ "eval_steps_per_second": 56.276,
157
+ "step": 2480
158
+ },
159
+ {
160
+ "epoch": 10.0,
161
+ "step": 2480,
162
+ "total_flos": 7718163558521760.0,
163
+ "train_loss": 0.03527186407196906,
164
+ "train_runtime": 706.0488,
165
+ "train_samples_per_second": 224.46,
166
+ "train_steps_per_second": 3.513
167
+ }
168
+ ],
169
+ "logging_steps": 500,
170
+ "max_steps": 2480,
171
+ "num_input_tokens_seen": 0,
172
+ "num_train_epochs": 10,
173
+ "save_steps": 500,
174
+ "stateful_callbacks": {
175
+ "TrainerControl": {
176
+ "args": {
177
+ "should_epoch_stop": false,
178
+ "should_evaluate": false,
179
+ "should_log": false,
180
+ "should_save": true,
181
+ "should_training_stop": true
182
+ },
183
+ "attributes": {}
184
+ }
185
+ },
186
+ "total_flos": 7718163558521760.0,
187
+ "train_batch_size": 32,
188
+ "trial_name": null,
189
+ "trial_params": null
190
+ }