Rodrigo1771 commited on
Commit
ce338e3
·
verified ·
1 Parent(s): d150a88

End of training

Browse files
README.md CHANGED
@@ -3,9 +3,10 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
5
  tags:
 
6
  - generated_from_trainer
7
  datasets:
8
- - symptemist-8-ner
9
  metrics:
10
  - precision
11
  - recall
@@ -18,24 +19,24 @@ model-index:
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
- name: symptemist-8-ner
22
- type: symptemist-8-ner
23
  config: SympTEMIST NER
24
  split: validation
25
  args: SympTEMIST NER
26
  metrics:
27
  - name: Precision
28
  type: precision
29
- value: 0.6711271230056614
30
  - name: Recall
31
  type: recall
32
- value: 0.7137383689107827
33
  - name: F1
34
  type: f1
35
- value: 0.6917771883289126
36
  - name: Accuracy
37
  type: accuracy
38
- value: 0.9491963168532838
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -43,13 +44,13 @@ should probably proofread and complete it, then remove this comment. -->
43
 
44
  # output
45
 
46
- This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the symptemist-8-ner dataset.
47
  It achieves the following results on the evaluation set:
48
- - Loss: 0.3125
49
- - Precision: 0.6711
50
- - Recall: 0.7137
51
- - F1: 0.6918
52
- - Accuracy: 0.9492
53
 
54
  ## Model description
55
 
 
3
  license: apache-2.0
4
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
5
  tags:
6
+ - token-classification
7
  - generated_from_trainer
8
  datasets:
9
+ - Rodrigo1771/symptemist-8-ner
10
  metrics:
11
  - precision
12
  - recall
 
19
  name: Token Classification
20
  type: token-classification
21
  dataset:
22
+ name: Rodrigo1771/symptemist-8-ner
23
+ type: Rodrigo1771/symptemist-8-ner
24
  config: SympTEMIST NER
25
  split: validation
26
  args: SympTEMIST NER
27
  metrics:
28
  - name: Precision
29
  type: precision
30
+ value: 0.6832101372756072
31
  - name: Recall
32
  type: recall
33
+ value: 0.7082649151614668
34
  - name: F1
35
  type: f1
36
+ value: 0.6955119591507659
37
  - name: Accuracy
38
  type: accuracy
39
+ value: 0.9498058968847252
40
  ---
41
 
42
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
44
 
45
  # output
46
 
47
+ This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/symptemist-8-ner dataset.
48
  It achieves the following results on the evaluation set:
49
+ - Loss: 0.3003
50
+ - Precision: 0.6832
51
+ - Recall: 0.7083
52
+ - F1: 0.6955
53
+ - Accuracy: 0.9498
54
 
55
  ## Model description
56
 
all_results.json CHANGED
@@ -1,26 +1,26 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.9496936058263018,
4
- "eval_f1": 0.6985413290113451,
5
- "eval_loss": 0.3088673949241638,
6
- "eval_precision": 0.6896,
7
- "eval_recall": 0.7077175697865353,
8
- "eval_runtime": 5.5622,
9
  "eval_samples": 2519,
10
- "eval_samples_per_second": 452.882,
11
- "eval_steps_per_second": 56.633,
12
- "predict_accuracy": 0.9466344311112421,
13
- "predict_f1": 0.6937328822297406,
14
- "predict_loss": 0.33691754937171936,
15
- "predict_precision": 0.694516129032258,
16
- "predict_recall": 0.6929514000643707,
17
- "predict_runtime": 8.8756,
18
- "predict_samples_per_second": 455.971,
19
- "predict_steps_per_second": 57.01,
20
- "total_flos": 7718163558521760.0,
21
- "train_loss": 0.03527186407196906,
22
- "train_runtime": 706.0488,
23
- "train_samples": 15848,
24
- "train_samples_per_second": 224.46,
25
- "train_steps_per_second": 3.513
26
  }
 
1
  {
2
+ "epoch": 9.976133651551313,
3
+ "eval_accuracy": 0.9498058968847252,
4
+ "eval_f1": 0.6955119591507659,
5
+ "eval_loss": 0.300260066986084,
6
+ "eval_precision": 0.6832101372756072,
7
+ "eval_recall": 0.7082649151614668,
8
+ "eval_runtime": 5.3532,
9
  "eval_samples": 2519,
10
+ "eval_samples_per_second": 470.558,
11
+ "eval_steps_per_second": 58.843,
12
+ "predict_accuracy": 0.9465066682391328,
13
+ "predict_f1": 0.6871925091255355,
14
+ "predict_loss": 0.32003945112228394,
15
+ "predict_precision": 0.6778334376956794,
16
+ "predict_recall": 0.6968136466044416,
17
+ "predict_runtime": 9.1273,
18
+ "predict_samples_per_second": 443.395,
19
+ "predict_steps_per_second": 55.438,
20
+ "total_flos": 6479980841102670.0,
21
+ "train_loss": 0.03990328233493002,
22
+ "train_runtime": 605.7066,
23
+ "train_samples": 13389,
24
+ "train_samples_per_second": 221.048,
25
+ "train_steps_per_second": 3.451
26
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.9496936058263018,
4
- "eval_f1": 0.6985413290113451,
5
- "eval_loss": 0.3088673949241638,
6
- "eval_precision": 0.6896,
7
- "eval_recall": 0.7077175697865353,
8
- "eval_runtime": 5.5622,
9
  "eval_samples": 2519,
10
- "eval_samples_per_second": 452.882,
11
- "eval_steps_per_second": 56.633
12
  }
 
1
  {
2
+ "epoch": 9.976133651551313,
3
+ "eval_accuracy": 0.9498058968847252,
4
+ "eval_f1": 0.6955119591507659,
5
+ "eval_loss": 0.300260066986084,
6
+ "eval_precision": 0.6832101372756072,
7
+ "eval_recall": 0.7082649151614668,
8
+ "eval_runtime": 5.3532,
9
  "eval_samples": 2519,
10
+ "eval_samples_per_second": 470.558,
11
+ "eval_steps_per_second": 58.843
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.9466344311112421,
3
- "predict_f1": 0.6937328822297406,
4
- "predict_loss": 0.33691754937171936,
5
- "predict_precision": 0.694516129032258,
6
- "predict_recall": 0.6929514000643707,
7
- "predict_runtime": 8.8756,
8
- "predict_samples_per_second": 455.971,
9
- "predict_steps_per_second": 57.01
10
  }
 
1
  {
2
+ "predict_accuracy": 0.9465066682391328,
3
+ "predict_f1": 0.6871925091255355,
4
+ "predict_loss": 0.32003945112228394,
5
+ "predict_precision": 0.6778334376956794,
6
+ "predict_recall": 0.6968136466044416,
7
+ "predict_runtime": 9.1273,
8
+ "predict_samples_per_second": 443.395,
9
+ "predict_steps_per_second": 55.438
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tb/events.out.tfevents.1725476139.a5c501872057.6105.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7c46d59ae0e93e011e758737f2c949894e6db42b7b574263d9a4123ea40093f
3
+ size 560
train.log CHANGED
@@ -860,3 +860,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
860
  {'eval_loss': 0.31253090500831604, 'eval_precision': 0.6711271230056614, 'eval_recall': 0.7137383689107827, 'eval_f1': 0.6917771883289126, 'eval_accuracy': 0.9491963168532838, 'eval_runtime': 5.6829, 'eval_samples_per_second': 443.26, 'eval_steps_per_second': 55.429, 'epoch': 9.98}
861
  {'train_runtime': 605.7066, 'train_samples_per_second': 221.048, 'train_steps_per_second': 3.451, 'train_loss': 0.03990328233493002, 'epoch': 9.98}
862
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
863
  0%| | 0/315 [00:00<?, ?it/s]
864
  3%|▎ | 8/315 [00:00<00:03, 79.67it/s]
865
  5%|▌ | 16/315 [00:00<00:03, 77.77it/s]
866
  8%|▊ | 25/315 [00:00<00:03, 79.52it/s]
867
  11%|█ | 34/315 [00:00<00:03, 81.49it/s]
868
  14%|█▎ | 43/315 [00:00<00:03, 83.27it/s]
869
  17%|█▋ | 52/315 [00:00<00:03, 81.72it/s]
870
  19%|█▉ | 61/315 [00:00<00:03, 79.31it/s]
871
  22%|██▏ | 69/315 [00:00<00:03, 78.47it/s]
872
  25%|██▍ | 78/315 [00:00<00:02, 79.72it/s]
873
  27%|██▋ | 86/315 [00:01<00:02, 78.27it/s]
874
  30%|███ | 95/315 [00:01<00:02, 79.80it/s]
875
  33%|███▎ | 103/315 [00:01<00:02, 78.32it/s]
876
  36%|███▌ | 112/315 [00:01<00:02, 80.29it/s]
877
  38%|███▊ | 121/315 [00:01<00:02, 79.55it/s]
878
  41%|████ | 129/315 [00:01<00:02, 78.75it/s]
879
  43%|████▎ | 137/315 [00:01<00:02, 78.82it/s]
880
  46%|████▋ | 146/315 [00:01<00:02, 79.39it/s]
881
  49%|████▉ | 155/315 [00:01<00:01, 81.02it/s]
882
  52%|█████▏ | 164/315 [00:02<00:01, 80.30it/s]
883
  55%|█████▍ | 173/315 [00:02<00:01, 80.93it/s]
884
  58%|█████▊ | 182/315 [00:02<00:01, 80.02it/s]
885
  61%|██████ | 191/315 [00:02<00:01, 80.51it/s]
886
  63%|██████▎ | 200/315 [00:02<00:01, 79.29it/s]
887
  66%|██████▌ | 208/315 [00:02<00:01, 78.49it/s]
888
  69%|██████▉ | 217/315 [00:02<00:01, 80.52it/s]
889
  72%|███████▏ | 226/315 [00:02<00:01, 82.31it/s]
890
  75%|███████▍ | 235/315 [00:02<00:00, 83.74it/s]
891
  77%|███████▋ | 244/315 [00:03<00:00, 81.82it/s]
892
  80%|████████ | 253/315 [00:03<00:00, 81.65it/s]
893
  83%|████████▎ | 262/315 [00:03<00:00, 82.45it/s]
894
  86%|████████▌ | 271/315 [00:03<00:00, 81.81it/s]
895
  89%|████████▉ | 280/315 [00:03<00:00, 82.55it/s]
896
  92%|█████████▏| 289/315 [00:03<00:00, 80.62it/s]
897
  95%|█████████▍| 298/315 [00:03<00:00, 80.79it/s]
898
  97%|█████████▋| 307/315 [00:03<00:00, 81.16it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
899
  0%| | 0/506 [00:00<?, ?it/s]
900
  2%|▏ | 10/506 [00:00<00:05, 94.71it/s]
901
  4%|▍ | 20/506 [00:00<00:05, 81.40it/s]
902
  6%|▌ | 29/506 [00:00<00:05, 82.81it/s]
903
  8%|▊ | 38/506 [00:00<00:05, 81.92it/s]
904
  9%|▉ | 47/506 [00:00<00:05, 82.66it/s]
905
  11%|█ | 56/506 [00:00<00:05, 83.27it/s]
906
  13%|█▎ | 65/506 [00:00<00:05, 82.44it/s]
907
  15%|█▍ | 74/506 [00:00<00:05, 83.07it/s]
908
  16%|█▋ | 83/506 [00:01<00:05, 78.19it/s]
909
  18%|█▊ | 91/506 [00:01<00:05, 78.64it/s]
910
  20%|█▉ | 99/506 [00:01<00:05, 78.19it/s]
911
  21%|██▏ | 108/506 [00:01<00:05, 78.67it/s]
912
  23%|██▎ | 117/506 [00:01<00:04, 80.48it/s]
913
  25%|██▍ | 126/506 [00:01<00:04, 78.13it/s]
914
  26%|██▋ | 134/506 [00:01<00:05, 72.17it/s]
915
  28%|██▊ | 142/506 [00:01<00:04, 74.13it/s]
916
  30%|██▉ | 151/506 [00:01<00:04, 76.31it/s]
917
  31%|███▏ | 159/506 [00:02<00:04, 73.31it/s]
918
  33%|███▎ | 167/506 [00:02<00:04, 73.99it/s]
919
  35%|███▍ | 176/506 [00:02<00:04, 75.97it/s]
920
  37%|███▋ | 185/506 [00:02<00:04, 78.12it/s]
921
  38%|███▊ | 194/506 [00:02<00:03, 78.82it/s]
922
  40%|████ | 203/506 [00:02<00:03, 80.17it/s]
923
  42%|████▏ | 212/506 [00:02<00:03, 79.80it/s]
924
  44%|████▎ | 221/506 [00:02<00:03, 79.76it/s]
925
  45%|████▌ | 229/506 [00:02<00:03, 78.81it/s]
926
  47%|████▋ | 237/506 [00:03<00:03, 78.04it/s]
927
  49%|████▊ | 246/506 [00:03<00:03, 79.77it/s]
928
  50%|█████ | 254/506 [00:03<00:03, 78.96it/s]
929
  52%|█████▏ | 263/506 [00:03<00:03, 79.11it/s]
930
  54%|█████▎ | 271/506 [00:03<00:03, 75.85it/s]
931
  55%|█████▌ | 279/506 [00:03<00:03, 74.36it/s]
932
  57%|█████▋ | 287/506 [00:03<00:03, 72.60it/s]
933
  58%|█████▊ | 295/506 [00:03<00:02, 71.46it/s]
934
  60%|█████▉ | 303/506 [00:03<00:02, 70.59it/s]
935
  61%|██████▏ | 311/506 [00:04<00:02, 70.00it/s]
936
  63%|██████▎ | 320/506 [00:04<00:02, 73.65it/s]
937
  65%|██████▌ | 329/506 [00:04<00:02, 76.95it/s]
938
  67%|██████▋ | 338/506 [00:04<00:02, 78.75it/s]
939
  69%|██████▊ | 347/506 [00:04<00:01, 80.66it/s]
940
  70%|███████ | 356/506 [00:04<00:01, 82.02it/s]
941
  72%|███████▏ | 365/506 [00:04<00:01, 81.74it/s]
942
  74%|███████▍ | 374/506 [00:04<00:01, 79.69it/s]
943
  75%|███████▌ | 382/506 [00:04<00:01, 79.52it/s]
944
  77%|███████▋ | 390/506 [00:05<00:01, 77.40it/s]
945
  79%|███████▊ | 398/506 [00:05<00:01, 71.06it/s]
946
  80%|████████ | 407/506 [00:05<00:01, 73.71it/s]
947
  82%|████████▏ | 415/506 [00:05<00:01, 74.33it/s]
948
  84%|████████▎ | 423/506 [00:05<00:01, 75.66it/s]
949
  85%|████████▌ | 432/506 [00:05<00:00, 77.90it/s]
950
  87%|████████▋ | 440/506 [00:05<00:00, 78.07it/s]
951
  89%|████████▊ | 448/506 [00:05<00:00, 78.35it/s]
952
  90%|█████████ | 457/506 [00:05<00:00, 79.22it/s]
953
  92%|█████████▏| 466/506 [00:05<00:00, 80.30it/s]
954
  94%|█████████▍| 475/506 [00:06<00:00, 81.34it/s]
955
  96%|█████████▌| 484/506 [00:06<00:00, 77.78it/s]
956
  97%|█████████▋| 492/506 [00:06<00:00, 74.43it/s]
957
  99%|█████████▉| 500/506 [00:06<00:00, 72.82it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
860
  {'eval_loss': 0.31253090500831604, 'eval_precision': 0.6711271230056614, 'eval_recall': 0.7137383689107827, 'eval_f1': 0.6917771883289126, 'eval_accuracy': 0.9491963168532838, 'eval_runtime': 5.6829, 'eval_samples_per_second': 443.26, 'eval_steps_per_second': 55.429, 'epoch': 9.98}
861
  {'train_runtime': 605.7066, 'train_samples_per_second': 221.048, 'train_steps_per_second': 3.451, 'train_loss': 0.03990328233493002, 'epoch': 9.98}
862
 
863
+ ***** train metrics *****
864
+ epoch = 9.9761
865
+ total_flos = 6034952GF
866
+ train_loss = 0.0399
867
+ train_runtime = 0:10:05.70
868
+ train_samples = 13389
869
+ train_samples_per_second = 221.048
870
+ train_steps_per_second = 3.451
871
+ 09/04/2024 18:55:34 - INFO - __main__ - *** Evaluate ***
872
+ [INFO|trainer.py:811] 2024-09-04 18:55:34,018 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
873
+ [INFO|trainer.py:3819] 2024-09-04 18:55:34,020 >>
874
+ ***** Running Evaluation *****
875
+ [INFO|trainer.py:3821] 2024-09-04 18:55:34,020 >> Num examples = 2519
876
+ [INFO|trainer.py:3824] 2024-09-04 18:55:34,020 >> Batch size = 8
877
+
878
  0%| | 0/315 [00:00<?, ?it/s]
879
  3%|▎ | 8/315 [00:00<00:03, 79.67it/s]
880
  5%|▌ | 16/315 [00:00<00:03, 77.77it/s]
881
  8%|▊ | 25/315 [00:00<00:03, 79.52it/s]
882
  11%|█ | 34/315 [00:00<00:03, 81.49it/s]
883
  14%|█▎ | 43/315 [00:00<00:03, 83.27it/s]
884
  17%|█▋ | 52/315 [00:00<00:03, 81.72it/s]
885
  19%|█▉ | 61/315 [00:00<00:03, 79.31it/s]
886
  22%|██▏ | 69/315 [00:00<00:03, 78.47it/s]
887
  25%|██▍ | 78/315 [00:00<00:02, 79.72it/s]
888
  27%|██▋ | 86/315 [00:01<00:02, 78.27it/s]
889
  30%|███ | 95/315 [00:01<00:02, 79.80it/s]
890
  33%|███▎ | 103/315 [00:01<00:02, 78.32it/s]
891
  36%|███▌ | 112/315 [00:01<00:02, 80.29it/s]
892
  38%|███▊ | 121/315 [00:01<00:02, 79.55it/s]
893
  41%|████ | 129/315 [00:01<00:02, 78.75it/s]
894
  43%|████▎ | 137/315 [00:01<00:02, 78.82it/s]
895
  46%|████▋ | 146/315 [00:01<00:02, 79.39it/s]
896
  49%|████▉ | 155/315 [00:01<00:01, 81.02it/s]
897
  52%|█████▏ | 164/315 [00:02<00:01, 80.30it/s]
898
  55%|█████▍ | 173/315 [00:02<00:01, 80.93it/s]
899
  58%|█████▊ | 182/315 [00:02<00:01, 80.02it/s]
900
  61%|██████ | 191/315 [00:02<00:01, 80.51it/s]
901
  63%|██████▎ | 200/315 [00:02<00:01, 79.29it/s]
902
  66%|██████▌ | 208/315 [00:02<00:01, 78.49it/s]
903
  69%|██████▉ | 217/315 [00:02<00:01, 80.52it/s]
904
  72%|███████▏ | 226/315 [00:02<00:01, 82.31it/s]
905
  75%|███████▍ | 235/315 [00:02<00:00, 83.74it/s]
906
  77%|███████▋ | 244/315 [00:03<00:00, 81.82it/s]
907
  80%|████████ | 253/315 [00:03<00:00, 81.65it/s]
908
  83%|████████▎ | 262/315 [00:03<00:00, 82.45it/s]
909
  86%|████████▌ | 271/315 [00:03<00:00, 81.81it/s]
910
  89%|████████▉ | 280/315 [00:03<00:00, 82.55it/s]
911
  92%|█████████▏| 289/315 [00:03<00:00, 80.62it/s]
912
  95%|█████████▍| 298/315 [00:03<00:00, 80.79it/s]
913
  97%|█████████▋| 307/315 [00:03<00:00, 81.16it/s]
914
+ ***** eval metrics *****
915
+ epoch = 9.9761
916
+ eval_accuracy = 0.9498
917
+ eval_f1 = 0.6955
918
+ eval_loss = 0.3003
919
+ eval_precision = 0.6832
920
+ eval_recall = 0.7083
921
+ eval_runtime = 0:00:05.35
922
+ eval_samples = 2519
923
+ eval_samples_per_second = 470.558
924
+ eval_steps_per_second = 58.843
925
+ 09/04/2024 18:55:39 - INFO - __main__ - *** Predict ***
926
+ [INFO|trainer.py:811] 2024-09-04 18:55:39,376 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
927
+ [INFO|trainer.py:3819] 2024-09-04 18:55:39,378 >>
928
+ ***** Running Prediction *****
929
+ [INFO|trainer.py:3821] 2024-09-04 18:55:39,378 >> Num examples = 4047
930
+ [INFO|trainer.py:3824] 2024-09-04 18:55:39,378 >> Batch size = 8
931
+
932
  0%| | 0/506 [00:00<?, ?it/s]
933
  2%|▏ | 10/506 [00:00<00:05, 94.71it/s]
934
  4%|▍ | 20/506 [00:00<00:05, 81.40it/s]
935
  6%|▌ | 29/506 [00:00<00:05, 82.81it/s]
936
  8%|▊ | 38/506 [00:00<00:05, 81.92it/s]
937
  9%|▉ | 47/506 [00:00<00:05, 82.66it/s]
938
  11%|█ | 56/506 [00:00<00:05, 83.27it/s]
939
  13%|█▎ | 65/506 [00:00<00:05, 82.44it/s]
940
  15%|█▍ | 74/506 [00:00<00:05, 83.07it/s]
941
  16%|█▋ | 83/506 [00:01<00:05, 78.19it/s]
942
  18%|█▊ | 91/506 [00:01<00:05, 78.64it/s]
943
  20%|█▉ | 99/506 [00:01<00:05, 78.19it/s]
944
  21%|██▏ | 108/506 [00:01<00:05, 78.67it/s]
945
  23%|██▎ | 117/506 [00:01<00:04, 80.48it/s]
946
  25%|██▍ | 126/506 [00:01<00:04, 78.13it/s]
947
  26%|██▋ | 134/506 [00:01<00:05, 72.17it/s]
948
  28%|██▊ | 142/506 [00:01<00:04, 74.13it/s]
949
  30%|██▉ | 151/506 [00:01<00:04, 76.31it/s]
950
  31%|███▏ | 159/506 [00:02<00:04, 73.31it/s]
951
  33%|███▎ | 167/506 [00:02<00:04, 73.99it/s]
952
  35%|███▍ | 176/506 [00:02<00:04, 75.97it/s]
953
  37%|███▋ | 185/506 [00:02<00:04, 78.12it/s]
954
  38%|███▊ | 194/506 [00:02<00:03, 78.82it/s]
955
  40%|████ | 203/506 [00:02<00:03, 80.17it/s]
956
  42%|████▏ | 212/506 [00:02<00:03, 79.80it/s]
957
  44%|████▎ | 221/506 [00:02<00:03, 79.76it/s]
958
  45%|████▌ | 229/506 [00:02<00:03, 78.81it/s]
959
  47%|████▋ | 237/506 [00:03<00:03, 78.04it/s]
960
  49%|████▊ | 246/506 [00:03<00:03, 79.77it/s]
961
  50%|█████ | 254/506 [00:03<00:03, 78.96it/s]
962
  52%|█████▏ | 263/506 [00:03<00:03, 79.11it/s]
963
  54%|█████▎ | 271/506 [00:03<00:03, 75.85it/s]
964
  55%|█████▌ | 279/506 [00:03<00:03, 74.36it/s]
965
  57%|█████▋ | 287/506 [00:03<00:03, 72.60it/s]
966
  58%|█████▊ | 295/506 [00:03<00:02, 71.46it/s]
967
  60%|█████▉ | 303/506 [00:03<00:02, 70.59it/s]
968
  61%|██████▏ | 311/506 [00:04<00:02, 70.00it/s]
969
  63%|██████▎ | 320/506 [00:04<00:02, 73.65it/s]
970
  65%|██████▌ | 329/506 [00:04<00:02, 76.95it/s]
971
  67%|██████▋ | 338/506 [00:04<00:02, 78.75it/s]
972
  69%|██████▊ | 347/506 [00:04<00:01, 80.66it/s]
973
  70%|███████ | 356/506 [00:04<00:01, 82.02it/s]
974
  72%|███████▏ | 365/506 [00:04<00:01, 81.74it/s]
975
  74%|███████▍ | 374/506 [00:04<00:01, 79.69it/s]
976
  75%|███████▌ | 382/506 [00:04<00:01, 79.52it/s]
977
  77%|███████▋ | 390/506 [00:05<00:01, 77.40it/s]
978
  79%|███████▊ | 398/506 [00:05<00:01, 71.06it/s]
979
  80%|████████ | 407/506 [00:05<00:01, 73.71it/s]
980
  82%|████████▏ | 415/506 [00:05<00:01, 74.33it/s]
981
  84%|████████▎ | 423/506 [00:05<00:01, 75.66it/s]
982
  85%|████████▌ | 432/506 [00:05<00:00, 77.90it/s]
983
  87%|████████▋ | 440/506 [00:05<00:00, 78.07it/s]
984
  89%|████████▊ | 448/506 [00:05<00:00, 78.35it/s]
985
  90%|█████████ | 457/506 [00:05<00:00, 79.22it/s]
986
  92%|█████████▏| 466/506 [00:05<00:00, 80.30it/s]
987
  94%|█████████▍| 475/506 [00:06<00:00, 81.34it/s]
988
  96%|█████████▌| 484/506 [00:06<00:00, 77.78it/s]
989
  97%|█████████▋| 492/506 [00:06<00:00, 74.43it/s]
990
  99%|█████████▉| 500/506 [00:06<00:00, 72.82it/s]
991
+ [INFO|trainer.py:3503] 2024-09-04 18:55:48,667 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
992
+ [INFO|configuration_utils.py:472] 2024-09-04 18:55:48,669 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
993
+ [INFO|modeling_utils.py:2799] 2024-09-04 18:55:50,042 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
994
+ [INFO|tokenization_utils_base.py:2684] 2024-09-04 18:55:50,043 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
995
+ [INFO|tokenization_utils_base.py:2693] 2024-09-04 18:55:50,043 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
996
+ ***** predict metrics *****
997
+ predict_accuracy = 0.9465
998
+ predict_f1 = 0.6872
999
+ predict_loss = 0.32
1000
+ predict_precision = 0.6778
1001
+ predict_recall = 0.6968
1002
+ predict_runtime = 0:00:09.12
1003
+ predict_samples_per_second = 443.395
1004
+ predict_steps_per_second = 55.438
1005
+
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 10.0,
3
- "total_flos": 7718163558521760.0,
4
- "train_loss": 0.03527186407196906,
5
- "train_runtime": 706.0488,
6
- "train_samples": 15848,
7
- "train_samples_per_second": 224.46,
8
- "train_steps_per_second": 3.513
9
  }
 
1
  {
2
+ "epoch": 9.976133651551313,
3
+ "total_flos": 6479980841102670.0,
4
+ "train_loss": 0.03990328233493002,
5
+ "train_runtime": 605.7066,
6
+ "train_samples": 13389,
7
+ "train_samples_per_second": 221.048,
8
+ "train_steps_per_second": 3.451
9
  }
trainer_state.json CHANGED
@@ -1,173 +1,173 @@
1
  {
2
- "best_metric": 0.6985413290113451,
3
- "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1736",
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 2480,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.0,
13
- "eval_accuracy": 0.9477686162533286,
14
- "eval_f1": 0.63506625891947,
15
- "eval_loss": 0.16486208140850067,
16
- "eval_precision": 0.5941821649976157,
17
- "eval_recall": 0.6819923371647509,
18
- "eval_runtime": 5.5136,
19
- "eval_samples_per_second": 456.871,
20
- "eval_steps_per_second": 57.132,
21
- "step": 248
22
  },
23
  {
24
  "epoch": 2.0,
25
- "eval_accuracy": 0.9476402836151304,
26
- "eval_f1": 0.6630581867388362,
27
- "eval_loss": 0.18148483335971832,
28
- "eval_precision": 0.6557815845824411,
29
- "eval_recall": 0.6704980842911877,
30
- "eval_runtime": 5.4181,
31
- "eval_samples_per_second": 464.925,
32
- "eval_steps_per_second": 58.139,
33
- "step": 496
34
  },
35
  {
36
- "epoch": 2.0161290322580645,
37
- "grad_norm": 0.6250831484794617,
38
- "learning_rate": 3.991935483870968e-05,
39
- "loss": 0.134,
40
  "step": 500
41
  },
42
  {
43
- "epoch": 3.0,
44
- "eval_accuracy": 0.9491802752735089,
45
- "eval_f1": 0.6810897435897436,
46
- "eval_loss": 0.2111387550830841,
47
- "eval_precision": 0.6651017214397497,
48
  "eval_recall": 0.6978653530377669,
49
- "eval_runtime": 5.4844,
50
- "eval_samples_per_second": 459.302,
51
- "eval_steps_per_second": 57.435,
52
- "step": 744
53
  },
54
  {
55
  "epoch": 4.0,
56
- "eval_accuracy": 0.9488434020982386,
57
- "eval_f1": 0.6900026518164943,
58
- "eval_loss": 0.25230270624160767,
59
- "eval_precision": 0.6692386831275721,
60
- "eval_recall": 0.7120963327859879,
61
- "eval_runtime": 5.4481,
62
- "eval_samples_per_second": 462.36,
63
- "eval_steps_per_second": 57.818,
64
- "step": 992
65
  },
66
  {
67
- "epoch": 4.032258064516129,
68
- "grad_norm": 0.7998089790344238,
69
- "learning_rate": 2.9838709677419357e-05,
70
- "loss": 0.026,
71
  "step": 1000
72
  },
73
  {
74
- "epoch": 5.0,
75
- "eval_accuracy": 0.9490840257948603,
76
- "eval_f1": 0.6847083552285864,
77
- "eval_loss": 0.27709877490997314,
78
- "eval_precision": 0.6584133400707428,
79
- "eval_recall": 0.7131910235358512,
80
- "eval_runtime": 5.6532,
81
- "eval_samples_per_second": 445.585,
82
- "eval_steps_per_second": 55.72,
83
- "step": 1240
84
  },
85
  {
86
  "epoch": 6.0,
87
- "eval_accuracy": 0.9486348615611665,
88
- "eval_f1": 0.6907651715039579,
89
- "eval_loss": 0.2968369126319885,
90
- "eval_precision": 0.6668364747834946,
91
- "eval_recall": 0.7164750957854407,
92
- "eval_runtime": 5.4549,
93
- "eval_samples_per_second": 461.787,
94
- "eval_steps_per_second": 57.746,
95
- "step": 1488
96
  },
97
  {
98
- "epoch": 6.048387096774194,
99
- "grad_norm": 0.15673314034938812,
100
- "learning_rate": 1.975806451612903e-05,
101
- "loss": 0.0084,
102
- "step": 1500
 
 
 
 
 
103
  },
104
  {
105
- "epoch": 7.0,
106
- "eval_accuracy": 0.9496936058263018,
107
- "eval_f1": 0.6985413290113451,
108
- "eval_loss": 0.3088673949241638,
109
- "eval_precision": 0.6896,
110
- "eval_recall": 0.7077175697865353,
111
- "eval_runtime": 5.5771,
112
- "eval_samples_per_second": 451.669,
113
- "eval_steps_per_second": 56.481,
114
- "step": 1736
115
  },
116
  {
117
  "epoch": 8.0,
118
- "eval_accuracy": 0.9498861047835991,
119
- "eval_f1": 0.6946236559139785,
120
- "eval_loss": 0.31877079606056213,
121
- "eval_precision": 0.6825145272054939,
122
- "eval_recall": 0.7071702244116037,
123
- "eval_runtime": 5.3015,
124
- "eval_samples_per_second": 475.15,
125
- "eval_steps_per_second": 59.417,
126
- "step": 1984
127
  },
128
  {
129
- "epoch": 8.064516129032258,
130
- "grad_norm": 0.22283445298671722,
131
- "learning_rate": 9.67741935483871e-06,
132
- "loss": 0.0042,
133
- "step": 2000
 
 
 
 
 
134
  },
135
  {
136
- "epoch": 9.0,
137
- "eval_accuracy": 0.9494529821296801,
138
- "eval_f1": 0.6979722518676629,
139
- "eval_loss": 0.3295721411705017,
140
- "eval_precision": 0.6808953669963561,
141
- "eval_recall": 0.715927750410509,
142
- "eval_runtime": 5.4512,
143
- "eval_samples_per_second": 462.102,
144
- "eval_steps_per_second": 57.786,
145
- "step": 2232
146
  },
147
  {
148
- "epoch": 10.0,
149
- "eval_accuracy": 0.9498861047835991,
150
- "eval_f1": 0.6985058697972252,
151
- "eval_loss": 0.33284899592399597,
152
- "eval_precision": 0.6814159292035398,
153
- "eval_recall": 0.7164750957854407,
154
- "eval_runtime": 5.5975,
155
- "eval_samples_per_second": 450.025,
156
- "eval_steps_per_second": 56.276,
157
- "step": 2480
158
  },
159
  {
160
- "epoch": 10.0,
161
- "step": 2480,
162
- "total_flos": 7718163558521760.0,
163
- "train_loss": 0.03527186407196906,
164
- "train_runtime": 706.0488,
165
- "train_samples_per_second": 224.46,
166
- "train_steps_per_second": 3.513
167
  }
168
  ],
169
  "logging_steps": 500,
170
- "max_steps": 2480,
171
  "num_input_tokens_seen": 0,
172
  "num_train_epochs": 10,
173
  "save_steps": 500,
@@ -183,7 +183,7 @@
183
  "attributes": {}
184
  }
185
  },
186
- "total_flos": 7718163558521760.0,
187
  "train_batch_size": 32,
188
  "trial_name": null,
189
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6955119591507659,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1676",
4
+ "epoch": 9.976133651551313,
5
  "eval_steps": 500,
6
+ "global_step": 2090,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.9976133651551312,
13
+ "eval_accuracy": 0.9457473772017069,
14
+ "eval_f1": 0.6225502672435734,
15
+ "eval_loss": 0.15105590224266052,
16
+ "eval_precision": 0.5818268315889629,
17
+ "eval_recall": 0.6694033935413246,
18
+ "eval_runtime": 5.6572,
19
+ "eval_samples_per_second": 445.271,
20
+ "eval_steps_per_second": 55.681,
21
+ "step": 209
22
  },
23
  {
24
  "epoch": 2.0,
25
+ "eval_accuracy": 0.9464852898713465,
26
+ "eval_f1": 0.653414882772681,
27
+ "eval_loss": 0.1794203370809555,
28
+ "eval_precision": 0.6113495469718646,
29
+ "eval_recall": 0.7016967706622879,
30
+ "eval_runtime": 5.5563,
31
+ "eval_samples_per_second": 453.36,
32
+ "eval_steps_per_second": 56.692,
33
+ "step": 419
34
  },
35
  {
36
+ "epoch": 2.386634844868735,
37
+ "grad_norm": 1.321442723274231,
38
+ "learning_rate": 3.8038277511961725e-05,
39
+ "loss": 0.1282,
40
  "step": 500
41
  },
42
  {
43
+ "epoch": 2.9976133651551313,
44
+ "eval_accuracy": 0.9470146620039142,
45
+ "eval_f1": 0.6769312450225644,
46
+ "eval_loss": 0.21023894846439362,
47
+ "eval_precision": 0.6572164948453608,
48
  "eval_recall": 0.6978653530377669,
49
+ "eval_runtime": 5.4361,
50
+ "eval_samples_per_second": 463.385,
51
+ "eval_steps_per_second": 57.946,
52
+ "step": 628
53
  },
54
  {
55
  "epoch": 4.0,
56
+ "eval_accuracy": 0.9489075684173377,
57
+ "eval_f1": 0.6789039638201649,
58
+ "eval_loss": 0.23289668560028076,
59
+ "eval_precision": 0.660455486542443,
60
+ "eval_recall": 0.6984126984126984,
61
+ "eval_runtime": 5.4098,
62
+ "eval_samples_per_second": 465.634,
63
+ "eval_steps_per_second": 58.227,
64
+ "step": 838
65
  },
66
  {
67
+ "epoch": 4.77326968973747,
68
+ "grad_norm": 0.4721558690071106,
69
+ "learning_rate": 2.6076555023923443e-05,
70
+ "loss": 0.0255,
71
  "step": 1000
72
  },
73
  {
74
+ "epoch": 4.997613365155131,
75
+ "eval_accuracy": 0.9490679842150855,
76
+ "eval_f1": 0.6805555555555556,
77
+ "eval_loss": 0.2591019570827484,
78
+ "eval_precision": 0.6645800730307773,
79
+ "eval_recall": 0.6973180076628352,
80
+ "eval_runtime": 5.2875,
81
+ "eval_samples_per_second": 476.407,
82
+ "eval_steps_per_second": 59.575,
83
+ "step": 1047
84
  },
85
  {
86
  "epoch": 6.0,
87
+ "eval_accuracy": 0.9491481921139594,
88
+ "eval_f1": 0.6778487752928647,
89
+ "eval_loss": 0.27368706464767456,
90
+ "eval_precision": 0.6599274235355106,
91
+ "eval_recall": 0.6967706622879036,
92
+ "eval_runtime": 5.2916,
93
+ "eval_samples_per_second": 476.037,
94
+ "eval_steps_per_second": 59.528,
95
+ "step": 1257
96
  },
97
  {
98
+ "epoch": 6.997613365155131,
99
+ "eval_accuracy": 0.949244441592608,
100
+ "eval_f1": 0.6884462151394423,
101
+ "eval_loss": 0.28756192326545715,
102
+ "eval_precision": 0.6687306501547987,
103
+ "eval_recall": 0.7093596059113301,
104
+ "eval_runtime": 5.2943,
105
+ "eval_samples_per_second": 475.796,
106
+ "eval_steps_per_second": 59.498,
107
+ "step": 1466
108
  },
109
  {
110
+ "epoch": 7.159904534606206,
111
+ "grad_norm": 0.6170095205307007,
112
+ "learning_rate": 1.4114832535885167e-05,
113
+ "loss": 0.0085,
114
+ "step": 1500
 
 
 
 
 
115
  },
116
  {
117
  "epoch": 8.0,
118
+ "eval_accuracy": 0.9498058968847252,
119
+ "eval_f1": 0.6955119591507659,
120
+ "eval_loss": 0.300260066986084,
121
+ "eval_precision": 0.6832101372756072,
122
+ "eval_recall": 0.7082649151614668,
123
+ "eval_runtime": 5.5232,
124
+ "eval_samples_per_second": 456.079,
125
+ "eval_steps_per_second": 57.032,
126
+ "step": 1676
127
  },
128
  {
129
+ "epoch": 8.99761336515513,
130
+ "eval_accuracy": 0.9491802752735089,
131
+ "eval_f1": 0.6925720327782183,
132
+ "eval_loss": 0.30722716450691223,
133
+ "eval_precision": 0.6697341513292433,
134
+ "eval_recall": 0.7170224411603722,
135
+ "eval_runtime": 5.6022,
136
+ "eval_samples_per_second": 449.646,
137
+ "eval_steps_per_second": 56.228,
138
+ "step": 1885
139
  },
140
  {
141
+ "epoch": 9.54653937947494,
142
+ "grad_norm": 0.3939690589904785,
143
+ "learning_rate": 2.15311004784689e-06,
144
+ "loss": 0.004,
145
+ "step": 2000
 
 
 
 
 
146
  },
147
  {
148
+ "epoch": 9.976133651551313,
149
+ "eval_accuracy": 0.9491963168532838,
150
+ "eval_f1": 0.6917771883289126,
151
+ "eval_loss": 0.31253090500831604,
152
+ "eval_precision": 0.6711271230056614,
153
+ "eval_recall": 0.7137383689107827,
154
+ "eval_runtime": 5.6829,
155
+ "eval_samples_per_second": 443.26,
156
+ "eval_steps_per_second": 55.429,
157
+ "step": 2090
158
  },
159
  {
160
+ "epoch": 9.976133651551313,
161
+ "step": 2090,
162
+ "total_flos": 6479980841102670.0,
163
+ "train_loss": 0.03990328233493002,
164
+ "train_runtime": 605.7066,
165
+ "train_samples_per_second": 221.048,
166
+ "train_steps_per_second": 3.451
167
  }
168
  ],
169
  "logging_steps": 500,
170
+ "max_steps": 2090,
171
  "num_input_tokens_seen": 0,
172
  "num_train_epochs": 10,
173
  "save_steps": 500,
 
183
  "attributes": {}
184
  }
185
  },
186
+ "total_flos": 6479980841102670.0,
187
  "train_batch_size": 32,
188
  "trial_name": null,
189
  "trial_params": null