Rodrigo1771 commited on
Commit
c81a169
·
verified ·
1 Parent(s): a858ec5

End of training

Browse files
README.md CHANGED
@@ -3,9 +3,10 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
5
  tags:
 
6
  - generated_from_trainer
7
  datasets:
8
- - drugtemist-fasttext-75-ner
9
  metrics:
10
  - precision
11
  - recall
@@ -18,24 +19,24 @@ model-index:
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
- name: drugtemist-fasttext-75-ner
22
- type: drugtemist-fasttext-75-ner
23
  config: DrugTEMIST NER
24
  split: validation
25
  args: DrugTEMIST NER
26
  metrics:
27
  - name: Precision
28
  type: precision
29
- value: 0.9416590701914311
30
  - name: Recall
31
  type: recall
32
- value: 0.9494485294117647
33
  - name: F1
34
  type: f1
35
- value: 0.945537757437071
36
  - name: Accuracy
37
  type: accuracy
38
- value: 0.9990525491919205
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -43,12 +44,12 @@ should probably proofread and complete it, then remove this comment. -->
43
 
44
  # output
45
 
46
- This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the drugtemist-fasttext-75-ner dataset.
47
  It achieves the following results on the evaluation set:
48
- - Loss: 0.0052
49
- - Precision: 0.9417
50
- - Recall: 0.9494
51
- - F1: 0.9455
52
  - Accuracy: 0.9991
53
 
54
  ## Model description
 
3
  license: apache-2.0
4
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
5
  tags:
6
+ - token-classification
7
  - generated_from_trainer
8
  datasets:
9
+ - Rodrigo1771/drugtemist-fasttext-75-ner
10
  metrics:
11
  - precision
12
  - recall
 
19
  name: Token Classification
20
  type: token-classification
21
  dataset:
22
+ name: Rodrigo1771/drugtemist-fasttext-75-ner
23
+ type: Rodrigo1771/drugtemist-fasttext-75-ner
24
  config: DrugTEMIST NER
25
  split: validation
26
  args: DrugTEMIST NER
27
  metrics:
28
  - name: Precision
29
  type: precision
30
+ value: 0.9447963800904977
31
  - name: Recall
32
  type: recall
33
+ value: 0.9595588235294118
34
  - name: F1
35
  type: f1
36
+ value: 0.9521203830369357
37
  - name: Accuracy
38
  type: accuracy
39
+ value: 0.9991418018042759
40
  ---
41
 
42
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
44
 
45
  # output
46
 
47
+ This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/drugtemist-fasttext-75-ner dataset.
48
  It achieves the following results on the evaluation set:
49
+ - Loss: 0.0044
50
+ - Precision: 0.9448
51
+ - Recall: 0.9596
52
+ - F1: 0.9521
53
  - Accuracy: 0.9991
54
 
55
  ## Model description
all_results.json CHANGED
@@ -1,26 +1,26 @@
1
  {
2
- "epoch": 9.99288256227758,
3
- "eval_accuracy": 0.9758743323218038,
4
- "eval_f1": 0.8053395240858967,
5
- "eval_loss": 0.17113561928272247,
6
- "eval_precision": 0.7991246256622898,
7
- "eval_recall": 0.8116518483855872,
8
- "eval_runtime": 14.1813,
9
  "eval_samples": 6810,
10
- "eval_samples_per_second": 480.21,
11
- "eval_steps_per_second": 60.079,
12
- "predict_accuracy": 0.9755136059661639,
13
- "predict_f1": 0.8018404526518684,
14
- "predict_loss": 0.161112979054451,
15
- "predict_precision": 0.7862455798073406,
16
- "predict_recall": 0.818066480588683,
17
- "predict_runtime": 29.1457,
18
- "predict_samples_per_second": 501.411,
19
- "predict_steps_per_second": 62.685,
20
- "total_flos": 2.130317416831723e+16,
21
- "train_loss": 0.017706579814779112,
22
- "train_runtime": 1901.5665,
23
- "train_samples": 44938,
24
- "train_samples_per_second": 236.321,
25
- "train_steps_per_second": 3.692
26
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9991418018042759,
4
+ "eval_f1": 0.9521203830369357,
5
+ "eval_loss": 0.004440780263394117,
6
+ "eval_precision": 0.9447963800904977,
7
+ "eval_recall": 0.9595588235294118,
8
+ "eval_runtime": 14.1123,
9
  "eval_samples": 6810,
10
+ "eval_samples_per_second": 482.558,
11
+ "eval_steps_per_second": 60.373,
12
+ "predict_accuracy": 0.9987636326290384,
13
+ "predict_f1": 0.9231204070096098,
14
+ "predict_loss": 0.006668921560049057,
15
+ "predict_precision": 0.8967600219659527,
16
+ "predict_recall": 0.9510774606872452,
17
+ "predict_runtime": 28.5821,
18
+ "predict_samples_per_second": 511.3,
19
+ "predict_steps_per_second": 63.921,
20
+ "total_flos": 1.5694885147146138e+16,
21
+ "train_loss": 0.002533856062142209,
22
+ "train_runtime": 1422.7831,
23
+ "train_samples": 31229,
24
+ "train_samples_per_second": 219.492,
25
+ "train_steps_per_second": 3.43
26
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 9.99288256227758,
3
- "eval_accuracy": 0.9758743323218038,
4
- "eval_f1": 0.8053395240858967,
5
- "eval_loss": 0.17113561928272247,
6
- "eval_precision": 0.7991246256622898,
7
- "eval_recall": 0.8116518483855872,
8
- "eval_runtime": 14.1813,
9
  "eval_samples": 6810,
10
- "eval_samples_per_second": 480.21,
11
- "eval_steps_per_second": 60.079
12
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9991418018042759,
4
+ "eval_f1": 0.9521203830369357,
5
+ "eval_loss": 0.004440780263394117,
6
+ "eval_precision": 0.9447963800904977,
7
+ "eval_recall": 0.9595588235294118,
8
+ "eval_runtime": 14.1123,
9
  "eval_samples": 6810,
10
+ "eval_samples_per_second": 482.558,
11
+ "eval_steps_per_second": 60.373
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.9755136059661639,
3
- "predict_f1": 0.8018404526518684,
4
- "predict_loss": 0.161112979054451,
5
- "predict_precision": 0.7862455798073406,
6
- "predict_recall": 0.818066480588683,
7
- "predict_runtime": 29.1457,
8
- "predict_samples_per_second": 501.411,
9
- "predict_steps_per_second": 62.685
10
  }
 
1
  {
2
+ "predict_accuracy": 0.9987636326290384,
3
+ "predict_f1": 0.9231204070096098,
4
+ "predict_loss": 0.006668921560049057,
5
+ "predict_precision": 0.8967600219659527,
6
+ "predict_recall": 0.9510774606872452,
7
+ "predict_runtime": 28.5821,
8
+ "predict_samples_per_second": 511.3,
9
+ "predict_steps_per_second": 63.921
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tb/events.out.tfevents.1725915390.0ada7e7d1d89.13010.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03d8cb7f565d31640d583d92b327a96be3b79169b4e94e4f8f1d5458aa42d308
3
+ size 560
train.log CHANGED
@@ -1463,3 +1463,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
1463
  {'eval_loss': 0.005196314305067062, 'eval_precision': 0.9416590701914311, 'eval_recall': 0.9494485294117647, 'eval_f1': 0.945537757437071, 'eval_accuracy': 0.9990525491919205, 'eval_runtime': 14.6459, 'eval_samples_per_second': 464.976, 'eval_steps_per_second': 58.173, 'epoch': 10.0}
1464
  {'train_runtime': 1422.7831, 'train_samples_per_second': 219.492, 'train_steps_per_second': 3.43, 'train_loss': 0.002533856062142209, 'epoch': 10.0}
1465
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1466
  0%| | 0/852 [00:00<?, ?it/s]
1467
  1%| | 10/852 [00:00<00:08, 93.57it/s]
1468
  2%|▏ | 20/852 [00:00<00:10, 80.55it/s]
1469
  3%|▎ | 29/852 [00:00<00:10, 80.58it/s]
1470
  4%|▍ | 38/852 [00:00<00:10, 81.11it/s]
1471
  6%|▌ | 47/852 [00:00<00:09, 81.65it/s]
1472
  7%|▋ | 56/852 [00:00<00:09, 83.14it/s]
1473
  8%|▊ | 65/852 [00:00<00:10, 78.65it/s]
1474
  9%|▊ | 73/852 [00:00<00:10, 77.04it/s]
1475
  10%|▉ | 81/852 [00:01<00:09, 77.70it/s]
1476
  11%|█ | 90/852 [00:01<00:09, 79.30it/s]
1477
  12%|█▏ | 98/852 [00:01<00:09, 79.14it/s]
1478
  13%|█▎ | 107/852 [00:01<00:09, 79.14it/s]
1479
  14%|█▎ | 116/852 [00:01<00:09, 80.00it/s]
1480
  15%|█▍ | 125/852 [00:01<00:08, 81.72it/s]
1481
  16%|█▌ | 134/852 [00:01<00:09, 76.77it/s]
1482
  17%|█▋ | 142/852 [00:01<00:09, 74.23it/s]
1483
  18%|█▊ | 150/852 [00:01<00:09, 75.01it/s]
1484
  19%|█▊ | 159/852 [00:02<00:08, 78.54it/s]
1485
  20%|█▉ | 168/852 [00:02<00:08, 79.78it/s]
1486
  21%|██ | 177/852 [00:02<00:08, 80.22it/s]
1487
  22%|██▏ | 186/852 [00:02<00:08, 81.84it/s]
1488
  23%|██▎ | 195/852 [00:02<00:08, 81.79it/s]
1489
  24%|██▍ | 204/852 [00:02<00:07, 82.81it/s]
1490
  25%|██▌ | 213/852 [00:02<00:07, 81.58it/s]
1491
  26%|██▌ | 222/852 [00:02<00:07, 81.05it/s]
1492
  27%|██▋ | 231/852 [00:02<00:07, 82.05it/s]
1493
  28%|██▊ | 240/852 [00:02<00:07, 81.07it/s]
1494
  29%|██▉ | 249/852 [00:03<00:07, 80.20it/s]
1495
  30%|███ | 258/852 [00:03<00:07, 81.36it/s]
1496
  31%|███▏ | 267/852 [00:03<00:07, 81.25it/s]
1497
  32%|███▏ | 276/852 [00:03<00:07, 81.52it/s]
1498
  33%|███▎ | 285/852 [00:03<00:06, 81.87it/s]
1499
  35%|███▍ | 294/852 [00:03<00:06, 80.80it/s]
1500
  36%|███▌ | 303/852 [00:03<00:07, 78.20it/s]
1501
  37%|███▋ | 312/852 [00:03<00:06, 77.55it/s]
1502
  38%|███▊ | 321/852 [00:04<00:06, 79.73it/s]
1503
  39%|███▊ | 330/852 [00:04<00:06, 79.60it/s]
1504
  40%|███▉ | 338/852 [00:04<00:06, 79.58it/s]
1505
  41%|████ | 347/852 [00:04<00:06, 79.98it/s]
1506
  42%|████▏ | 356/852 [00:04<00:06, 79.10it/s]
1507
  43%|████▎ | 365/852 [00:04<00:06, 79.97it/s]
1508
  44%|████▍ | 374/852 [00:04<00:05, 79.68it/s]
1509
  45%|████▍ | 382/852 [00:04<00:05, 78.69it/s]
1510
  46%|████▌ | 391/852 [00:04<00:05, 79.19it/s]
1511
  47%|████▋ | 399/852 [00:04<00:05, 78.98it/s]
1512
  48%|████▊ | 407/852 [00:05<00:05, 77.08it/s]
1513
  49%|████▉ | 416/852 [00:05<00:05, 78.27it/s]
1514
  50%|████▉ | 424/852 [00:05<00:05, 78.61it/s]
1515
  51%|█████ | 432/852 [00:05<00:05, 78.46it/s]
1516
  52%|█████▏ | 441/852 [00:05<00:05, 80.76it/s]
1517
  53%|█████▎ | 450/852 [00:05<00:04, 81.55it/s]
1518
  54%|█████▍ | 459/852 [00:05<00:04, 82.07it/s]
1519
  55%|█████▍ | 468/852 [00:05<00:04, 79.16it/s]
1520
  56%|█████▌ | 476/852 [00:05<00:04, 76.30it/s]
1521
  57%|█████▋ | 484/852 [00:06<00:04, 76.49it/s]
1522
  58%|█████▊ | 493/852 [00:06<00:04, 78.76it/s]
1523
  59%|█████▉ | 502/852 [00:06<00:04, 80.51it/s]
1524
  60%|█████▉ | 511/852 [00:06<00:04, 80.85it/s]
1525
  61%|██████ | 520/852 [00:06<00:04, 82.11it/s]
1526
  62%|██████▏ | 529/852 [00:06<00:04, 79.93it/s]
1527
  63%|██████▎ | 538/852 [00:06<00:03, 81.28it/s]
1528
  64%|██████▍ | 547/852 [00:06<00:03, 81.59it/s]
1529
  65%|██████▌ | 556/852 [00:06<00:03, 78.91it/s]
1530
  66%|██████▋ | 565/852 [00:07<00:03, 81.03it/s]
1531
  67%|██████▋ | 574/852 [00:07<00:03, 81.36it/s]
1532
  68%|██████▊ | 583/852 [00:07<00:03, 80.74it/s]
1533
  69%|██████▉ | 592/852 [00:07<00:03, 80.30it/s]
1534
  71%|███████ | 601/852 [00:07<00:03, 80.88it/s]
1535
  72%|███████▏ | 610/852 [00:07<00:02, 81.38it/s]
1536
  73%|███████▎ | 619/852 [00:07<00:02, 79.27it/s]
1537
  74%|███████▎ | 627/852 [00:07<00:02, 79.27it/s]
1538
  75%|███████▍ | 635/852 [00:07<00:02, 75.84it/s]
1539
  75%|███████▌ | 643/852 [00:08<00:02, 75.01it/s]
1540
  77%|███████▋ | 652/852 [00:08<00:02, 78.03it/s]
1541
  78%|███████▊ | 661/852 [00:08<00:02, 79.05it/s]
1542
  79%|███████▊ | 670/852 [00:08<00:02, 79.91it/s]
1543
  80%|███████▉ | 679/852 [00:08<00:02, 80.51it/s]
1544
  81%|████████ | 688/852 [00:08<00:02, 81.73it/s]
1545
  82%|████████▏ | 697/852 [00:08<00:01, 80.82it/s]
1546
  83%|████████▎ | 706/852 [00:08<00:01, 82.01it/s]
1547
  84%|████████▍ | 715/852 [00:08<00:01, 81.67it/s]
1548
  85%|████████▍ | 724/852 [00:09<00:01, 81.05it/s]
1549
  86%|████████▌ | 733/852 [00:09<00:01, 81.95it/s]
1550
  87%|████████▋ | 742/852 [00:09<00:01, 82.90it/s]
1551
  88%|████████▊ | 751/852 [00:09<00:01, 82.87it/s]
1552
  89%|████████▉ | 760/852 [00:09<00:01, 84.18it/s]
1553
  90%|█████████ | 769/852 [00:09<00:01, 82.73it/s]
1554
  91%|█████████▏| 778/852 [00:09<00:00, 82.09it/s]
1555
  92%|█████████▏| 787/852 [00:09<00:00, 80.10it/s]
1556
  93%|█████████▎| 796/852 [00:09<00:00, 79.31it/s]
1557
  94%|█████████▍| 805/852 [00:10<00:00, 80.66it/s]
1558
  96%|█████████▌| 814/852 [00:10<00:00, 79.72it/s]
1559
  97%|█████████▋| 823/852 [00:10<00:00, 81.12it/s]
1560
  98%|█████████▊| 832/852 [00:10<00:00, 81.37it/s]
1561
  99%|█████████▊| 841/852 [00:10<00:00, 81.30it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1562
  0%| | 0/1827 [00:00<?, ?it/s]
1563
  0%| | 9/1827 [00:00<00:22, 81.47it/s]
1564
  1%| | 18/1827 [00:00<00:24, 73.10it/s]
1565
  1%|▏ | 27/1827 [00:00<00:23, 77.38it/s]
1566
  2%|▏ | 35/1827 [00:00<00:22, 78.12it/s]
1567
  2%|▏ | 44/1827 [00:00<00:22, 79.84it/s]
1568
  3%|▎ | 53/1827 [00:00<00:22, 79.92it/s]
1569
  3%|▎ | 62/1827 [00:00<00:22, 78.68it/s]
1570
  4%|▍ | 71/1827 [00:00<00:22, 79.81it/s]
1571
  4%|▍ | 80/1827 [00:01<00:21, 82.54it/s]
1572
  5%|▍ | 89/1827 [00:01<00:21, 81.34it/s]
1573
  5%|▌ | 98/1827 [00:01<00:21, 82.29it/s]
1574
  6%|▌ | 107/1827 [00:01<00:20, 84.00it/s]
1575
  6%|▋ | 116/1827 [00:01<00:20, 82.38it/s]
1576
  7%|▋ | 125/1827 [00:01<00:20, 82.00it/s]
1577
  7%|▋ | 134/1827 [00:01<00:20, 83.17it/s]
1578
  8%|▊ | 143/1827 [00:01<00:20, 83.98it/s]
1579
  8%|▊ | 152/1827 [00:01<00:20, 83.56it/s]
1580
  9%|▉ | 161/1827 [00:01<00:20, 79.98it/s]
1581
  9%|▉ | 170/1827 [00:02<00:20, 80.90it/s]
1582
  10%|▉ | 179/1827 [00:02<00:20, 82.04it/s]
1583
  10%|█ | 188/1827 [00:02<00:19, 82.53it/s]
1584
  11%|█ | 197/1827 [00:02<00:19, 82.41it/s]
1585
  11%|█▏ | 206/1827 [00:02<00:20, 81.02it/s]
1586
  12%|█▏ | 215/1827 [00:02<00:20, 79.98it/s]
1587
  12%|█▏ | 224/1827 [00:02<00:19, 81.33it/s]
1588
  13%|█▎ | 233/1827 [00:02<00:19, 79.96it/s]
1589
  13%|█▎ | 242/1827 [00:02<00:20, 79.19it/s]
1590
  14%|█▎ | 250/1827 [00:03<00:19, 79.35it/s]
1591
  14%|█▍ | 259/1827 [00:03<00:19, 79.72it/s]
1592
  15%|█▍ | 268/1827 [00:03<00:19, 80.61it/s]
1593
  15%|█▌ | 277/1827 [00:03<00:18, 82.50it/s]
1594
  16%|█▌ | 286/1827 [00:03<00:18, 83.69it/s]
1595
  16%|█▌ | 295/1827 [00:03<00:18, 83.07it/s]
1596
  17%|█▋ | 304/1827 [00:03<00:18, 81.73it/s]
1597
  17%|█▋ | 313/1827 [00:03<00:18, 82.33it/s]
1598
  18%|█▊ | 322/1827 [00:03<00:18, 82.60it/s]
1599
  18%|█▊ | 331/1827 [00:04<00:18, 82.65it/s]
1600
  19%|█▊ | 340/1827 [00:04<00:17, 83.95it/s]
1601
  19%|█▉ | 349/1827 [00:04<00:18, 79.59it/s]
1602
  20%|█▉ | 358/1827 [00:04<00:18, 81.31it/s]
1603
  20%|██ | 367/1827 [00:04<00:17, 82.07it/s]
1604
  21%|██ | 376/1827 [00:04<00:17, 81.81it/s]
1605
  21%|██ | 385/1827 [00:04<00:17, 81.80it/s]
1606
  22%|██▏ | 394/1827 [00:04<00:17, 80.82it/s]
1607
  22%|██▏ | 403/1827 [00:04<00:17, 82.60it/s]
1608
  23%|██▎ | 412/1827 [00:05<00:17, 83.11it/s]
1609
  23%|██▎ | 421/1827 [00:05<00:17, 80.56it/s]
1610
  24%|██▎ | 430/1827 [00:05<00:17, 79.87it/s]
1611
  24%|██▍ | 439/1827 [00:05<00:17, 79.77it/s]
1612
  24%|██▍ | 447/1827 [00:05<00:17, 79.67it/s]
1613
  25%|██▍ | 456/1827 [00:05<00:16, 80.72it/s]
1614
  25%|██▌ | 465/1827 [00:05<00:16, 82.42it/s]
1615
  26%|██▌ | 474/1827 [00:05<00:16, 80.58it/s]
1616
  26%|██▋ | 483/1827 [00:05<00:16, 79.48it/s]
1617
  27%|██▋ | 491/1827 [00:06<00:17, 78.12it/s]
1618
  27%|██▋ | 500/1827 [00:06<00:16, 78.98it/s]
1619
  28%|██▊ | 509/1827 [00:06<00:16, 79.79it/s]
1620
  28%|██▊ | 518/1827 [00:06<00:16, 80.70it/s]
1621
  29%|██▉ | 527/1827 [00:06<00:15, 81.95it/s]
1622
  29%|██▉ | 536/1827 [00:06<00:15, 81.48it/s]
1623
  30%|██▉ | 545/1827 [00:06<00:15, 81.56it/s]
1624
  30%|███ | 554/1827 [00:06<00:15, 80.14it/s]
1625
  31%|███ | 563/1827 [00:06<00:15, 80.95it/s]
1626
  31%|███▏ | 572/1827 [00:07<00:15, 82.12it/s]
1627
  32%|███▏ | 581/1827 [00:07<00:15, 81.74it/s]
1628
  32%|███▏ | 590/1827 [00:07<00:15, 81.57it/s]
1629
  33%|███▎ | 599/1827 [00:07<00:15, 81.15it/s]
1630
  33%|███▎ | 608/1827 [00:07<00:14, 81.57it/s]
1631
  34%|███▍ | 617/1827 [00:07<00:15, 80.16it/s]
1632
  34%|███▍ | 626/1827 [00:07<00:14, 82.03it/s]
1633
  35%|███▍ | 635/1827 [00:07<00:14, 82.57it/s]
1634
  35%|███▌ | 644/1827 [00:07<00:14, 83.46it/s]
1635
  36%|███▌ | 653/1827 [00:08<00:15, 77.96it/s]
1636
  36%|███▌ | 662/1827 [00:08<00:14, 79.18it/s]
1637
  37%|███▋ | 671/1827 [00:08<00:14, 79.75it/s]
1638
  37%|███▋ | 680/1827 [00:08<00:14, 80.75it/s]
1639
  38%|███▊ | 689/1827 [00:08<00:13, 82.05it/s]
1640
  38%|███▊ | 698/1827 [00:08<00:13, 83.26it/s]
1641
  39%|███▊ | 707/1827 [00:08<00:13, 82.35it/s]
1642
  39%|███▉ | 716/1827 [00:08<00:13, 83.46it/s]
1643
  40%|███▉ | 725/1827 [00:08<00:13, 84.47it/s]
1644
  40%|████ | 734/1827 [00:09<00:12, 84.78it/s]
1645
  41%|████ | 743/1827 [00:09<00:12, 84.36it/s]
1646
  41%|████ | 752/1827 [00:09<00:12, 84.90it/s]
1647
  42%|████▏ | 761/1827 [00:09<00:12, 84.81it/s]
1648
  42%|████▏ | 770/1827 [00:09<00:12, 85.40it/s]
1649
  43%|████▎ | 779/1827 [00:09<00:12, 82.72it/s]
1650
  43%|████▎ | 788/1827 [00:09<00:12, 82.68it/s]
1651
  44%|████▎ | 797/1827 [00:09<00:13, 78.45it/s]
1652
  44%|████▍ | 806/1827 [00:09<00:12, 80.13it/s]
1653
  45%|████▍ | 815/1827 [00:10<00:12, 81.93it/s]
1654
  45%|████▌ | 824/1827 [00:10<00:12, 81.92it/s]
1655
  46%|████▌ | 833/1827 [00:10<00:12, 80.09it/s]
1656
  46%|████▌ | 842/1827 [00:10<00:12, 80.84it/s]
1657
  47%|████▋ | 851/1827 [00:10<00:11, 82.26it/s]
1658
  47%|████▋ | 860/1827 [00:10<00:11, 82.48it/s]
1659
  48%|████▊ | 869/1827 [00:10<00:11, 80.82it/s]
1660
  48%|████▊ | 878/1827 [00:10<00:11, 79.77it/s]
1661
  48%|████▊ | 886/1827 [00:10<00:11, 79.80it/s]
1662
  49%|████▉ | 895/1827 [00:11<00:11, 80.41it/s]
1663
  49%|████▉ | 904/1827 [00:11<00:11, 82.22it/s]
1664
  50%|████▉ | 913/1827 [00:11<00:10, 83.47it/s]
1665
  50%|█████ | 922/1827 [00:11<00:10, 83.69it/s]
1666
  51%|█████ | 931/1827 [00:11<00:11, 81.45it/s]
1667
  51%|█████▏ | 940/1827 [00:11<00:10, 82.66it/s]
1668
  52%|█████▏ | 949/1827 [00:11<00:10, 80.89it/s]
1669
  52%|█████▏ | 958/1827 [00:11<00:10, 81.33it/s]
1670
  53%|█████▎ | 967/1827 [00:11<00:10, 82.58it/s]
1671
  53%|█████▎ | 976/1827 [00:11<00:10, 81.82it/s]
1672
  54%|█████▍ | 985/1827 [00:12<00:10, 82.23it/s]
1673
  54%|█████▍ | 994/1827 [00:12<00:10, 83.02it/s]
1674
  55%|█████▍ | 1003/1827 [00:12<00:09, 83.11it/s]
1675
  55%|█████▌ | 1012/1827 [00:12<00:09, 83.22it/s]
1676
  56%|█████▌ | 1021/1827 [00:12<00:09, 84.03it/s]
1677
  56%|█████▋ | 1030/1827 [00:12<00:09, 85.02it/s]
1678
  57%|█████▋ | 1039/1827 [00:12<00:09, 82.97it/s]
1679
  57%|█████▋ | 1048/1827 [00:12<00:09, 83.08it/s]
1680
  58%|█████▊ | 1057/1827 [00:12<00:09, 83.42it/s]
1681
  58%|█████▊ | 1066/1827 [00:13<00:09, 83.10it/s]
1682
  59%|█████▉ | 1075/1827 [00:13<00:08, 84.02it/s]
1683
  59%|█████▉ | 1084/1827 [00:13<00:08, 84.75it/s]
1684
  60%|█████▉ | 1093/1827 [00:13<00:08, 85.02it/s]
1685
  60%|██████ | 1102/1827 [00:13<00:08, 84.64it/s]
1686
  61%|██████ | 1111/1827 [00:13<00:08, 84.52it/s]
1687
  61%|██████▏ | 1120/1827 [00:13<00:08, 83.58it/s]
1688
  62%|██████▏ | 1129/1827 [00:13<00:08, 83.47it/s]
1689
  62%|██████▏ | 1138/1827 [00:13<00:08, 83.97it/s]
1690
  63%|██████▎ | 1147/1827 [00:14<00:08, 84.60it/s]
1691
  63%|██████▎ | 1156/1827 [00:14<00:07, 84.68it/s]
1692
  64%|██████▍ | 1165/1827 [00:14<00:08, 81.69it/s]
1693
  64%|██████▍ | 1174/1827 [00:14<00:07, 83.04it/s]
1694
  65%|██████▍ | 1183/1827 [00:14<00:07, 81.23it/s]
1695
  65%|██████▌ | 1192/1827 [00:14<00:07, 82.81it/s]
1696
  66%|██████▌ | 1201/1827 [00:14<00:07, 83.12it/s]
1697
  66%|██████▌ | 1210/1827 [00:14<00:07, 82.78it/s]
1698
  67%|██████▋ | 1219/1827 [00:14<00:07, 81.18it/s]
1699
  67%|██████▋ | 1228/1827 [00:15<00:07, 83.02it/s]
1700
  68%|██████▊ | 1237/1827 [00:15<00:07, 83.76it/s]
1701
  68%|██████▊ | 1246/1827 [00:15<00:06, 84.24it/s]
1702
  69%|██████▊ | 1255/1827 [00:15<00:07, 81.04it/s]
1703
  69%|██████▉ | 1264/1827 [00:15<00:06, 81.41it/s]
1704
  70%|██████▉ | 1273/1827 [00:15<00:06, 81.98it/s]
1705
  70%|███████ | 1282/1827 [00:15<00:06, 83.21it/s]
1706
  71%|███████ | 1291/1827 [00:15<00:06, 83.31it/s]
1707
  71%|███████ | 1300/1827 [00:15<00:06, 83.92it/s]
1708
  72%|███████▏ | 1309/1827 [00:15<00:06, 84.33it/s]
1709
  72%|███████▏ | 1318/1827 [00:16<00:05, 84.99it/s]
1710
  73%|███████▎ | 1327/1827 [00:16<00:05, 84.67it/s]
1711
  73%|███████▎ | 1336/1827 [00:16<00:05, 85.18it/s]
1712
  74%|███████▎ | 1345/1827 [00:16<00:05, 84.30it/s]
1713
  74%|███████▍ | 1354/1827 [00:16<00:05, 84.28it/s]
1714
  75%|███████▍ | 1363/1827 [00:16<00:05, 84.42it/s]
1715
  75%|███████▌ | 1372/1827 [00:16<00:05, 83.96it/s]
1716
  76%|███████▌ | 1381/1827 [00:16<00:05, 84.35it/s]
1717
  76%|███████▌ | 1390/1827 [00:16<00:05, 83.32it/s]
1718
  77%|███████▋ | 1399/1827 [00:17<00:05, 81.72it/s]
1719
  77%|███████▋ | 1408/1827 [00:17<00:05, 82.44it/s]
1720
  78%|███████▊ | 1417/1827 [00:17<00:04, 83.25it/s]
1721
  78%|███████▊ | 1426/1827 [00:17<00:04, 81.63it/s]
1722
  79%|███████▊ | 1435/1827 [00:17<00:04, 81.84it/s]
1723
  79%|███████▉ | 1444/1827 [00:17<00:04, 79.43it/s]
1724
  79%|███████▉ | 1452/1827 [00:17<00:04, 78.88it/s]
1725
  80%|███████▉ | 1460/1827 [00:17<00:04, 79.00it/s]
1726
  80%|████████ | 1469/1827 [00:17<00:04, 77.76it/s]
1727
  81%|████████ | 1478/1827 [00:18<00:04, 78.91it/s]
1728
  81%|████████▏ | 1487/1827 [00:18<00:04, 79.81it/s]
1729
  82%|████████▏ | 1495/1827 [00:18<00:04, 77.98it/s]
1730
  82%|████████▏ | 1503/1827 [00:18<00:04, 78.22it/s]
1731
  83%|████████▎ | 1512/1827 [00:18<00:03, 79.84it/s]
1732
  83%|████████▎ | 1520/1827 [00:18<00:03, 79.72it/s]
1733
  84%|████████▎ | 1528/1827 [00:18<00:03, 78.62it/s]
1734
  84%|████████▍ | 1536/1827 [00:18<00:03, 77.91it/s]
1735
  85%|████████▍ | 1545/1827 [00:18<00:03, 78.92it/s]
1736
  85%|████████▌ | 1554/1827 [00:19<00:03, 79.49it/s]
1737
  85%|████████▌ | 1562/1827 [00:19<00:03, 79.02it/s]
1738
  86%|████████▌ | 1570/1827 [00:19<00:03, 78.77it/s]
1739
  86%|████████▋ | 1579/1827 [00:19<00:03, 80.51it/s]
1740
  87%|████████▋ | 1588/1827 [00:19<00:03, 79.62it/s]
1741
  87%|████████▋ | 1597/1827 [00:19<00:02, 79.98it/s]
1742
  88%|████████▊ | 1606/1827 [00:19<00:02, 81.13it/s]
1743
  88%|████████▊ | 1615/1827 [00:19<00:02, 80.28it/s]
1744
  89%|████████▉ | 1624/1827 [00:19<00:02, 74.42it/s]
1745
  89%|████████▉ | 1632/1827 [00:20<00:02, 74.97it/s]
1746
  90%|████████▉ | 1641/1827 [00:20<00:02, 76.63it/s]
1747
  90%|█████████ | 1650/1827 [00:20<00:02, 78.43it/s]
1748
  91%|█████████ | 1659/1827 [00:20<00:02, 79.16it/s]
1749
  91%|█████████ | 1667/1827 [00:20<00:02, 78.81it/s]
1750
  92%|█████████▏| 1676/1827 [00:20<00:01, 80.04it/s]
1751
  92%|█████████▏| 1685/1827 [00:20<00:01, 81.58it/s]
1752
  93%|█████████▎| 1694/1827 [00:20<00:01, 79.80it/s]
1753
  93%|█████████▎| 1703/1827 [00:20<00:01, 80.70it/s]
1754
  94%|█████████▎| 1712/1827 [00:20<00:01, 82.14it/s]
1755
  94%|█████████▍| 1721/1827 [00:21<00:01, 79.30it/s]
1756
  95%|█████████▍| 1730/1827 [00:21<00:01, 81.07it/s]
1757
  95%|█████████▌| 1739/1827 [00:21<00:01, 82.15it/s]
1758
  96%|█████████▌| 1748/1827 [00:21<00:00, 83.61it/s]
1759
  96%|█████████▌| 1757/1827 [00:21<00:00, 83.43it/s]
1760
  97%|█████████▋| 1766/1827 [00:21<00:00, 83.99it/s]
1761
  97%|█████████▋| 1775/1827 [00:21<00:00, 84.37it/s]
1762
  98%|█████████▊| 1784/1827 [00:21<00:00, 85.02it/s]
1763
  98%|█████████▊| 1793/1827 [00:21<00:00, 82.53it/s]
1764
  99%|█████████▊| 1802/1827 [00:22<00:00, 82.32it/s]
1765
  99%|█████████▉| 1811/1827 [00:22<00:00, 81.76it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1463
  {'eval_loss': 0.005196314305067062, 'eval_precision': 0.9416590701914311, 'eval_recall': 0.9494485294117647, 'eval_f1': 0.945537757437071, 'eval_accuracy': 0.9990525491919205, 'eval_runtime': 14.6459, 'eval_samples_per_second': 464.976, 'eval_steps_per_second': 58.173, 'epoch': 10.0}
1464
  {'train_runtime': 1422.7831, 'train_samples_per_second': 219.492, 'train_steps_per_second': 3.43, 'train_loss': 0.002533856062142209, 'epoch': 10.0}
1465
 
1466
+ ***** train metrics *****
1467
+ epoch = 10.0
1468
+ total_flos = 14617000GF
1469
+ train_loss = 0.0025
1470
+ train_runtime = 0:23:42.78
1471
+ train_samples = 31229
1472
+ train_samples_per_second = 219.492
1473
+ train_steps_per_second = 3.43
1474
+ 09/09/2024 20:56:16 - INFO - __main__ - *** Evaluate ***
1475
+ [INFO|trainer.py:811] 2024-09-09 20:56:16,801 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
1476
+ [INFO|trainer.py:3819] 2024-09-09 20:56:16,803 >>
1477
+ ***** Running Evaluation *****
1478
+ [INFO|trainer.py:3821] 2024-09-09 20:56:16,803 >> Num examples = 6810
1479
+ [INFO|trainer.py:3824] 2024-09-09 20:56:16,803 >> Batch size = 8
1480
+
1481
  0%| | 0/852 [00:00<?, ?it/s]
1482
  1%| | 10/852 [00:00<00:08, 93.57it/s]
1483
  2%|▏ | 20/852 [00:00<00:10, 80.55it/s]
1484
  3%|▎ | 29/852 [00:00<00:10, 80.58it/s]
1485
  4%|▍ | 38/852 [00:00<00:10, 81.11it/s]
1486
  6%|▌ | 47/852 [00:00<00:09, 81.65it/s]
1487
  7%|▋ | 56/852 [00:00<00:09, 83.14it/s]
1488
  8%|▊ | 65/852 [00:00<00:10, 78.65it/s]
1489
  9%|▊ | 73/852 [00:00<00:10, 77.04it/s]
1490
  10%|▉ | 81/852 [00:01<00:09, 77.70it/s]
1491
  11%|█ | 90/852 [00:01<00:09, 79.30it/s]
1492
  12%|█▏ | 98/852 [00:01<00:09, 79.14it/s]
1493
  13%|█▎ | 107/852 [00:01<00:09, 79.14it/s]
1494
  14%|█▎ | 116/852 [00:01<00:09, 80.00it/s]
1495
  15%|█▍ | 125/852 [00:01<00:08, 81.72it/s]
1496
  16%|█▌ | 134/852 [00:01<00:09, 76.77it/s]
1497
  17%|█▋ | 142/852 [00:01<00:09, 74.23it/s]
1498
  18%|█▊ | 150/852 [00:01<00:09, 75.01it/s]
1499
  19%|█▊ | 159/852 [00:02<00:08, 78.54it/s]
1500
  20%|█▉ | 168/852 [00:02<00:08, 79.78it/s]
1501
  21%|██ | 177/852 [00:02<00:08, 80.22it/s]
1502
  22%|██▏ | 186/852 [00:02<00:08, 81.84it/s]
1503
  23%|██▎ | 195/852 [00:02<00:08, 81.79it/s]
1504
  24%|██▍ | 204/852 [00:02<00:07, 82.81it/s]
1505
  25%|██▌ | 213/852 [00:02<00:07, 81.58it/s]
1506
  26%|██▌ | 222/852 [00:02<00:07, 81.05it/s]
1507
  27%|██▋ | 231/852 [00:02<00:07, 82.05it/s]
1508
  28%|██▊ | 240/852 [00:02<00:07, 81.07it/s]
1509
  29%|██▉ | 249/852 [00:03<00:07, 80.20it/s]
1510
  30%|███ | 258/852 [00:03<00:07, 81.36it/s]
1511
  31%|███▏ | 267/852 [00:03<00:07, 81.25it/s]
1512
  32%|███▏ | 276/852 [00:03<00:07, 81.52it/s]
1513
  33%|███▎ | 285/852 [00:03<00:06, 81.87it/s]
1514
  35%|███▍ | 294/852 [00:03<00:06, 80.80it/s]
1515
  36%|███▌ | 303/852 [00:03<00:07, 78.20it/s]
1516
  37%|███▋ | 312/852 [00:03<00:06, 77.55it/s]
1517
  38%|███▊ | 321/852 [00:04<00:06, 79.73it/s]
1518
  39%|███▊ | 330/852 [00:04<00:06, 79.60it/s]
1519
  40%|███▉ | 338/852 [00:04<00:06, 79.58it/s]
1520
  41%|████ | 347/852 [00:04<00:06, 79.98it/s]
1521
  42%|████▏ | 356/852 [00:04<00:06, 79.10it/s]
1522
  43%|████▎ | 365/852 [00:04<00:06, 79.97it/s]
1523
  44%|████▍ | 374/852 [00:04<00:05, 79.68it/s]
1524
  45%|████▍ | 382/852 [00:04<00:05, 78.69it/s]
1525
  46%|████▌ | 391/852 [00:04<00:05, 79.19it/s]
1526
  47%|████▋ | 399/852 [00:04<00:05, 78.98it/s]
1527
  48%|████▊ | 407/852 [00:05<00:05, 77.08it/s]
1528
  49%|████▉ | 416/852 [00:05<00:05, 78.27it/s]
1529
  50%|████▉ | 424/852 [00:05<00:05, 78.61it/s]
1530
  51%|█████ | 432/852 [00:05<00:05, 78.46it/s]
1531
  52%|█████▏ | 441/852 [00:05<00:05, 80.76it/s]
1532
  53%|█████▎ | 450/852 [00:05<00:04, 81.55it/s]
1533
  54%|█████▍ | 459/852 [00:05<00:04, 82.07it/s]
1534
  55%|█████▍ | 468/852 [00:05<00:04, 79.16it/s]
1535
  56%|█████▌ | 476/852 [00:05<00:04, 76.30it/s]
1536
  57%|█████▋ | 484/852 [00:06<00:04, 76.49it/s]
1537
  58%|█████▊ | 493/852 [00:06<00:04, 78.76it/s]
1538
  59%|█████▉ | 502/852 [00:06<00:04, 80.51it/s]
1539
  60%|█████▉ | 511/852 [00:06<00:04, 80.85it/s]
1540
  61%|██████ | 520/852 [00:06<00:04, 82.11it/s]
1541
  62%|██████▏ | 529/852 [00:06<00:04, 79.93it/s]
1542
  63%|██████▎ | 538/852 [00:06<00:03, 81.28it/s]
1543
  64%|██████▍ | 547/852 [00:06<00:03, 81.59it/s]
1544
  65%|██████▌ | 556/852 [00:06<00:03, 78.91it/s]
1545
  66%|██████▋ | 565/852 [00:07<00:03, 81.03it/s]
1546
  67%|██████▋ | 574/852 [00:07<00:03, 81.36it/s]
1547
  68%|██████▊ | 583/852 [00:07<00:03, 80.74it/s]
1548
  69%|██████▉ | 592/852 [00:07<00:03, 80.30it/s]
1549
  71%|███████ | 601/852 [00:07<00:03, 80.88it/s]
1550
  72%|███████▏ | 610/852 [00:07<00:02, 81.38it/s]
1551
  73%|███████▎ | 619/852 [00:07<00:02, 79.27it/s]
1552
  74%|███████▎ | 627/852 [00:07<00:02, 79.27it/s]
1553
  75%|███████▍ | 635/852 [00:07<00:02, 75.84it/s]
1554
  75%|███████▌ | 643/852 [00:08<00:02, 75.01it/s]
1555
  77%|███████▋ | 652/852 [00:08<00:02, 78.03it/s]
1556
  78%|███████▊ | 661/852 [00:08<00:02, 79.05it/s]
1557
  79%|███████▊ | 670/852 [00:08<00:02, 79.91it/s]
1558
  80%|███████▉ | 679/852 [00:08<00:02, 80.51it/s]
1559
  81%|████████ | 688/852 [00:08<00:02, 81.73it/s]
1560
  82%|████████▏ | 697/852 [00:08<00:01, 80.82it/s]
1561
  83%|████████▎ | 706/852 [00:08<00:01, 82.01it/s]
1562
  84%|████████▍ | 715/852 [00:08<00:01, 81.67it/s]
1563
  85%|████████▍ | 724/852 [00:09<00:01, 81.05it/s]
1564
  86%|████████▌ | 733/852 [00:09<00:01, 81.95it/s]
1565
  87%|████████▋ | 742/852 [00:09<00:01, 82.90it/s]
1566
  88%|████████▊ | 751/852 [00:09<00:01, 82.87it/s]
1567
  89%|████████▉ | 760/852 [00:09<00:01, 84.18it/s]
1568
  90%|█████████ | 769/852 [00:09<00:01, 82.73it/s]
1569
  91%|█████████▏| 778/852 [00:09<00:00, 82.09it/s]
1570
  92%|█████████▏| 787/852 [00:09<00:00, 80.10it/s]
1571
  93%|█████████▎| 796/852 [00:09<00:00, 79.31it/s]
1572
  94%|█████████▍| 805/852 [00:10<00:00, 80.66it/s]
1573
  96%|█████████▌| 814/852 [00:10<00:00, 79.72it/s]
1574
  97%|█████████▋| 823/852 [00:10<00:00, 81.12it/s]
1575
  98%|█████████▊| 832/852 [00:10<00:00, 81.37it/s]
1576
  99%|█████████▊| 841/852 [00:10<00:00, 81.30it/s]
1577
+ ***** eval metrics *****
1578
+ epoch = 10.0
1579
+ eval_accuracy = 0.9991
1580
+ eval_f1 = 0.9521
1581
+ eval_loss = 0.0044
1582
+ eval_precision = 0.9448
1583
+ eval_recall = 0.9596
1584
+ eval_runtime = 0:00:14.11
1585
+ eval_samples = 6810
1586
+ eval_samples_per_second = 482.558
1587
+ eval_steps_per_second = 60.373
1588
+ 09/09/2024 20:56:30 - INFO - __main__ - *** Predict ***
1589
+ [INFO|trainer.py:811] 2024-09-09 20:56:30,918 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens, id. If ner_tags, tokens, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
1590
+ [INFO|trainer.py:3819] 2024-09-09 20:56:30,920 >>
1591
+ ***** Running Prediction *****
1592
+ [INFO|trainer.py:3821] 2024-09-09 20:56:30,920 >> Num examples = 14614
1593
+ [INFO|trainer.py:3824] 2024-09-09 20:56:30,920 >> Batch size = 8
1594
+
1595
  0%| | 0/1827 [00:00<?, ?it/s]
1596
  0%| | 9/1827 [00:00<00:22, 81.47it/s]
1597
  1%| | 18/1827 [00:00<00:24, 73.10it/s]
1598
  1%|▏ | 27/1827 [00:00<00:23, 77.38it/s]
1599
  2%|▏ | 35/1827 [00:00<00:22, 78.12it/s]
1600
  2%|▏ | 44/1827 [00:00<00:22, 79.84it/s]
1601
  3%|▎ | 53/1827 [00:00<00:22, 79.92it/s]
1602
  3%|▎ | 62/1827 [00:00<00:22, 78.68it/s]
1603
  4%|▍ | 71/1827 [00:00<00:22, 79.81it/s]
1604
  4%|▍ | 80/1827 [00:01<00:21, 82.54it/s]
1605
  5%|▍ | 89/1827 [00:01<00:21, 81.34it/s]
1606
  5%|▌ | 98/1827 [00:01<00:21, 82.29it/s]
1607
  6%|▌ | 107/1827 [00:01<00:20, 84.00it/s]
1608
  6%|▋ | 116/1827 [00:01<00:20, 82.38it/s]
1609
  7%|▋ | 125/1827 [00:01<00:20, 82.00it/s]
1610
  7%|▋ | 134/1827 [00:01<00:20, 83.17it/s]
1611
  8%|▊ | 143/1827 [00:01<00:20, 83.98it/s]
1612
  8%|▊ | 152/1827 [00:01<00:20, 83.56it/s]
1613
  9%|▉ | 161/1827 [00:01<00:20, 79.98it/s]
1614
  9%|▉ | 170/1827 [00:02<00:20, 80.90it/s]
1615
  10%|▉ | 179/1827 [00:02<00:20, 82.04it/s]
1616
  10%|█ | 188/1827 [00:02<00:19, 82.53it/s]
1617
  11%|█ | 197/1827 [00:02<00:19, 82.41it/s]
1618
  11%|█▏ | 206/1827 [00:02<00:20, 81.02it/s]
1619
  12%|█▏ | 215/1827 [00:02<00:20, 79.98it/s]
1620
  12%|█▏ | 224/1827 [00:02<00:19, 81.33it/s]
1621
  13%|█▎ | 233/1827 [00:02<00:19, 79.96it/s]
1622
  13%|█▎ | 242/1827 [00:02<00:20, 79.19it/s]
1623
  14%|█▎ | 250/1827 [00:03<00:19, 79.35it/s]
1624
  14%|█▍ | 259/1827 [00:03<00:19, 79.72it/s]
1625
  15%|█▍ | 268/1827 [00:03<00:19, 80.61it/s]
1626
  15%|█▌ | 277/1827 [00:03<00:18, 82.50it/s]
1627
  16%|█▌ | 286/1827 [00:03<00:18, 83.69it/s]
1628
  16%|█▌ | 295/1827 [00:03<00:18, 83.07it/s]
1629
  17%|█▋ | 304/1827 [00:03<00:18, 81.73it/s]
1630
  17%|█▋ | 313/1827 [00:03<00:18, 82.33it/s]
1631
  18%|█▊ | 322/1827 [00:03<00:18, 82.60it/s]
1632
  18%|█▊ | 331/1827 [00:04<00:18, 82.65it/s]
1633
  19%|█▊ | 340/1827 [00:04<00:17, 83.95it/s]
1634
  19%|█▉ | 349/1827 [00:04<00:18, 79.59it/s]
1635
  20%|█▉ | 358/1827 [00:04<00:18, 81.31it/s]
1636
  20%|██ | 367/1827 [00:04<00:17, 82.07it/s]
1637
  21%|██ | 376/1827 [00:04<00:17, 81.81it/s]
1638
  21%|██ | 385/1827 [00:04<00:17, 81.80it/s]
1639
  22%|██▏ | 394/1827 [00:04<00:17, 80.82it/s]
1640
  22%|██▏ | 403/1827 [00:04<00:17, 82.60it/s]
1641
  23%|██▎ | 412/1827 [00:05<00:17, 83.11it/s]
1642
  23%|██▎ | 421/1827 [00:05<00:17, 80.56it/s]
1643
  24%|██▎ | 430/1827 [00:05<00:17, 79.87it/s]
1644
  24%|██▍ | 439/1827 [00:05<00:17, 79.77it/s]
1645
  24%|██▍ | 447/1827 [00:05<00:17, 79.67it/s]
1646
  25%|██▍ | 456/1827 [00:05<00:16, 80.72it/s]
1647
  25%|██▌ | 465/1827 [00:05<00:16, 82.42it/s]
1648
  26%|██▌ | 474/1827 [00:05<00:16, 80.58it/s]
1649
  26%|██▋ | 483/1827 [00:05<00:16, 79.48it/s]
1650
  27%|██▋ | 491/1827 [00:06<00:17, 78.12it/s]
1651
  27%|██▋ | 500/1827 [00:06<00:16, 78.98it/s]
1652
  28%|██▊ | 509/1827 [00:06<00:16, 79.79it/s]
1653
  28%|██▊ | 518/1827 [00:06<00:16, 80.70it/s]
1654
  29%|██▉ | 527/1827 [00:06<00:15, 81.95it/s]
1655
  29%|██▉ | 536/1827 [00:06<00:15, 81.48it/s]
1656
  30%|██▉ | 545/1827 [00:06<00:15, 81.56it/s]
1657
  30%|███ | 554/1827 [00:06<00:15, 80.14it/s]
1658
  31%|███ | 563/1827 [00:06<00:15, 80.95it/s]
1659
  31%|███▏ | 572/1827 [00:07<00:15, 82.12it/s]
1660
  32%|███▏ | 581/1827 [00:07<00:15, 81.74it/s]
1661
  32%|███▏ | 590/1827 [00:07<00:15, 81.57it/s]
1662
  33%|███▎ | 599/1827 [00:07<00:15, 81.15it/s]
1663
  33%|███▎ | 608/1827 [00:07<00:14, 81.57it/s]
1664
  34%|███▍ | 617/1827 [00:07<00:15, 80.16it/s]
1665
  34%|███▍ | 626/1827 [00:07<00:14, 82.03it/s]
1666
  35%|███▍ | 635/1827 [00:07<00:14, 82.57it/s]
1667
  35%|███▌ | 644/1827 [00:07<00:14, 83.46it/s]
1668
  36%|███▌ | 653/1827 [00:08<00:15, 77.96it/s]
1669
  36%|███▌ | 662/1827 [00:08<00:14, 79.18it/s]
1670
  37%|███▋ | 671/1827 [00:08<00:14, 79.75it/s]
1671
  37%|███▋ | 680/1827 [00:08<00:14, 80.75it/s]
1672
  38%|███▊ | 689/1827 [00:08<00:13, 82.05it/s]
1673
  38%|███▊ | 698/1827 [00:08<00:13, 83.26it/s]
1674
  39%|███▊ | 707/1827 [00:08<00:13, 82.35it/s]
1675
  39%|███▉ | 716/1827 [00:08<00:13, 83.46it/s]
1676
  40%|███▉ | 725/1827 [00:08<00:13, 84.47it/s]
1677
  40%|████ | 734/1827 [00:09<00:12, 84.78it/s]
1678
  41%|████ | 743/1827 [00:09<00:12, 84.36it/s]
1679
  41%|████ | 752/1827 [00:09<00:12, 84.90it/s]
1680
  42%|████▏ | 761/1827 [00:09<00:12, 84.81it/s]
1681
  42%|████▏ | 770/1827 [00:09<00:12, 85.40it/s]
1682
  43%|████▎ | 779/1827 [00:09<00:12, 82.72it/s]
1683
  43%|████▎ | 788/1827 [00:09<00:12, 82.68it/s]
1684
  44%|████▎ | 797/1827 [00:09<00:13, 78.45it/s]
1685
  44%|████▍ | 806/1827 [00:09<00:12, 80.13it/s]
1686
  45%|████▍ | 815/1827 [00:10<00:12, 81.93it/s]
1687
  45%|████▌ | 824/1827 [00:10<00:12, 81.92it/s]
1688
  46%|████▌ | 833/1827 [00:10<00:12, 80.09it/s]
1689
  46%|████▌ | 842/1827 [00:10<00:12, 80.84it/s]
1690
  47%|████▋ | 851/1827 [00:10<00:11, 82.26it/s]
1691
  47%|████▋ | 860/1827 [00:10<00:11, 82.48it/s]
1692
  48%|████▊ | 869/1827 [00:10<00:11, 80.82it/s]
1693
  48%|████▊ | 878/1827 [00:10<00:11, 79.77it/s]
1694
  48%|████▊ | 886/1827 [00:10<00:11, 79.80it/s]
1695
  49%|████▉ | 895/1827 [00:11<00:11, 80.41it/s]
1696
  49%|████▉ | 904/1827 [00:11<00:11, 82.22it/s]
1697
  50%|████▉ | 913/1827 [00:11<00:10, 83.47it/s]
1698
  50%|█████ | 922/1827 [00:11<00:10, 83.69it/s]
1699
  51%|█████ | 931/1827 [00:11<00:11, 81.45it/s]
1700
  51%|█████▏ | 940/1827 [00:11<00:10, 82.66it/s]
1701
  52%|█████▏ | 949/1827 [00:11<00:10, 80.89it/s]
1702
  52%|█████▏ | 958/1827 [00:11<00:10, 81.33it/s]
1703
  53%|█████▎ | 967/1827 [00:11<00:10, 82.58it/s]
1704
  53%|█████▎ | 976/1827 [00:11<00:10, 81.82it/s]
1705
  54%|█████▍ | 985/1827 [00:12<00:10, 82.23it/s]
1706
  54%|█████▍ | 994/1827 [00:12<00:10, 83.02it/s]
1707
  55%|█████▍ | 1003/1827 [00:12<00:09, 83.11it/s]
1708
  55%|█████▌ | 1012/1827 [00:12<00:09, 83.22it/s]
1709
  56%|█████▌ | 1021/1827 [00:12<00:09, 84.03it/s]
1710
  56%|█████▋ | 1030/1827 [00:12<00:09, 85.02it/s]
1711
  57%|█████▋ | 1039/1827 [00:12<00:09, 82.97it/s]
1712
  57%|█████▋ | 1048/1827 [00:12<00:09, 83.08it/s]
1713
  58%|█████▊ | 1057/1827 [00:12<00:09, 83.42it/s]
1714
  58%|█████▊ | 1066/1827 [00:13<00:09, 83.10it/s]
1715
  59%|█████▉ | 1075/1827 [00:13<00:08, 84.02it/s]
1716
  59%|█████▉ | 1084/1827 [00:13<00:08, 84.75it/s]
1717
  60%|█████▉ | 1093/1827 [00:13<00:08, 85.02it/s]
1718
  60%|██████ | 1102/1827 [00:13<00:08, 84.64it/s]
1719
  61%|██████ | 1111/1827 [00:13<00:08, 84.52it/s]
1720
  61%|██████▏ | 1120/1827 [00:13<00:08, 83.58it/s]
1721
  62%|██████▏ | 1129/1827 [00:13<00:08, 83.47it/s]
1722
  62%|██████▏ | 1138/1827 [00:13<00:08, 83.97it/s]
1723
  63%|██████▎ | 1147/1827 [00:14<00:08, 84.60it/s]
1724
  63%|██████▎ | 1156/1827 [00:14<00:07, 84.68it/s]
1725
  64%|██████▍ | 1165/1827 [00:14<00:08, 81.69it/s]
1726
  64%|██████▍ | 1174/1827 [00:14<00:07, 83.04it/s]
1727
  65%|██████▍ | 1183/1827 [00:14<00:07, 81.23it/s]
1728
  65%|██████▌ | 1192/1827 [00:14<00:07, 82.81it/s]
1729
  66%|██████▌ | 1201/1827 [00:14<00:07, 83.12it/s]
1730
  66%|██████▌ | 1210/1827 [00:14<00:07, 82.78it/s]
1731
  67%|██████▋ | 1219/1827 [00:14<00:07, 81.18it/s]
1732
  67%|██████▋ | 1228/1827 [00:15<00:07, 83.02it/s]
1733
  68%|██████▊ | 1237/1827 [00:15<00:07, 83.76it/s]
1734
  68%|██████▊ | 1246/1827 [00:15<00:06, 84.24it/s]
1735
  69%|██████▊ | 1255/1827 [00:15<00:07, 81.04it/s]
1736
  69%|██████▉ | 1264/1827 [00:15<00:06, 81.41it/s]
1737
  70%|██████▉ | 1273/1827 [00:15<00:06, 81.98it/s]
1738
  70%|███████ | 1282/1827 [00:15<00:06, 83.21it/s]
1739
  71%|███████ | 1291/1827 [00:15<00:06, 83.31it/s]
1740
  71%|███████ | 1300/1827 [00:15<00:06, 83.92it/s]
1741
  72%|███████▏ | 1309/1827 [00:15<00:06, 84.33it/s]
1742
  72%|███████▏ | 1318/1827 [00:16<00:05, 84.99it/s]
1743
  73%|███████▎ | 1327/1827 [00:16<00:05, 84.67it/s]
1744
  73%|███████▎ | 1336/1827 [00:16<00:05, 85.18it/s]
1745
  74%|███████▎ | 1345/1827 [00:16<00:05, 84.30it/s]
1746
  74%|███████▍ | 1354/1827 [00:16<00:05, 84.28it/s]
1747
  75%|███████▍ | 1363/1827 [00:16<00:05, 84.42it/s]
1748
  75%|███████▌ | 1372/1827 [00:16<00:05, 83.96it/s]
1749
  76%|███████▌ | 1381/1827 [00:16<00:05, 84.35it/s]
1750
  76%|███████▌ | 1390/1827 [00:16<00:05, 83.32it/s]
1751
  77%|███████▋ | 1399/1827 [00:17<00:05, 81.72it/s]
1752
  77%|███████▋ | 1408/1827 [00:17<00:05, 82.44it/s]
1753
  78%|███████▊ | 1417/1827 [00:17<00:04, 83.25it/s]
1754
  78%|███████▊ | 1426/1827 [00:17<00:04, 81.63it/s]
1755
  79%|███████▊ | 1435/1827 [00:17<00:04, 81.84it/s]
1756
  79%|███████▉ | 1444/1827 [00:17<00:04, 79.43it/s]
1757
  79%|███████▉ | 1452/1827 [00:17<00:04, 78.88it/s]
1758
  80%|███████▉ | 1460/1827 [00:17<00:04, 79.00it/s]
1759
  80%|████████ | 1469/1827 [00:17<00:04, 77.76it/s]
1760
  81%|████████ | 1478/1827 [00:18<00:04, 78.91it/s]
1761
  81%|████████▏ | 1487/1827 [00:18<00:04, 79.81it/s]
1762
  82%|████████▏ | 1495/1827 [00:18<00:04, 77.98it/s]
1763
  82%|████████▏ | 1503/1827 [00:18<00:04, 78.22it/s]
1764
  83%|████████▎ | 1512/1827 [00:18<00:03, 79.84it/s]
1765
  83%|████████▎ | 1520/1827 [00:18<00:03, 79.72it/s]
1766
  84%|████████▎ | 1528/1827 [00:18<00:03, 78.62it/s]
1767
  84%|████████▍ | 1536/1827 [00:18<00:03, 77.91it/s]
1768
  85%|████████▍ | 1545/1827 [00:18<00:03, 78.92it/s]
1769
  85%|████████▌ | 1554/1827 [00:19<00:03, 79.49it/s]
1770
  85%|████████▌ | 1562/1827 [00:19<00:03, 79.02it/s]
1771
  86%|████████▌ | 1570/1827 [00:19<00:03, 78.77it/s]
1772
  86%|████████▋ | 1579/1827 [00:19<00:03, 80.51it/s]
1773
  87%|████████▋ | 1588/1827 [00:19<00:03, 79.62it/s]
1774
  87%|████████▋ | 1597/1827 [00:19<00:02, 79.98it/s]
1775
  88%|████████▊ | 1606/1827 [00:19<00:02, 81.13it/s]
1776
  88%|████████▊ | 1615/1827 [00:19<00:02, 80.28it/s]
1777
  89%|████████▉ | 1624/1827 [00:19<00:02, 74.42it/s]
1778
  89%|████████▉ | 1632/1827 [00:20<00:02, 74.97it/s]
1779
  90%|████████▉ | 1641/1827 [00:20<00:02, 76.63it/s]
1780
  90%|█████████ | 1650/1827 [00:20<00:02, 78.43it/s]
1781
  91%|█████████ | 1659/1827 [00:20<00:02, 79.16it/s]
1782
  91%|█████████ | 1667/1827 [00:20<00:02, 78.81it/s]
1783
  92%|█████████▏| 1676/1827 [00:20<00:01, 80.04it/s]
1784
  92%|█████████▏| 1685/1827 [00:20<00:01, 81.58it/s]
1785
  93%|█████████▎| 1694/1827 [00:20<00:01, 79.80it/s]
1786
  93%|█████████▎| 1703/1827 [00:20<00:01, 80.70it/s]
1787
  94%|█████████▎| 1712/1827 [00:20<00:01, 82.14it/s]
1788
  94%|█████████▍| 1721/1827 [00:21<00:01, 79.30it/s]
1789
  95%|█████████▍| 1730/1827 [00:21<00:01, 81.07it/s]
1790
  95%|█████████▌| 1739/1827 [00:21<00:01, 82.15it/s]
1791
  96%|█████████▌| 1748/1827 [00:21<00:00, 83.61it/s]
1792
  96%|█████████▌| 1757/1827 [00:21<00:00, 83.43it/s]
1793
  97%|█████████▋| 1766/1827 [00:21<00:00, 83.99it/s]
1794
  97%|█████████▋| 1775/1827 [00:21<00:00, 84.37it/s]
1795
  98%|█████████▊| 1784/1827 [00:21<00:00, 85.02it/s]
1796
  98%|█████████▊| 1793/1827 [00:21<00:00, 82.53it/s]
1797
  99%|█████████▊| 1802/1827 [00:22<00:00, 82.32it/s]
1798
  99%|█████████▉| 1811/1827 [00:22<00:00, 81.76it/s]
1799
+ [INFO|trainer.py:3503] 2024-09-09 20:57:00,376 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
1800
+ [INFO|configuration_utils.py:472] 2024-09-09 20:57:00,377 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
1801
+ [INFO|modeling_utils.py:2799] 2024-09-09 20:57:01,738 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1802
+ [INFO|tokenization_utils_base.py:2684] 2024-09-09 20:57:01,739 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1803
+ [INFO|tokenization_utils_base.py:2693] 2024-09-09 20:57:01,739 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
1804
+ ***** predict metrics *****
1805
+ predict_accuracy = 0.9988
1806
+ predict_f1 = 0.9231
1807
+ predict_loss = 0.0067
1808
+ predict_precision = 0.8968
1809
+ predict_recall = 0.9511
1810
+ predict_runtime = 0:00:28.58
1811
+ predict_samples_per_second = 511.3
1812
+ predict_steps_per_second = 63.921
1813
+
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 9.99288256227758,
3
- "total_flos": 2.130317416831723e+16,
4
- "train_loss": 0.017706579814779112,
5
- "train_runtime": 1901.5665,
6
- "train_samples": 44938,
7
- "train_samples_per_second": 236.321,
8
- "train_steps_per_second": 3.692
9
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "total_flos": 1.5694885147146138e+16,
4
+ "train_loss": 0.002533856062142209,
5
+ "train_runtime": 1422.7831,
6
+ "train_samples": 31229,
7
+ "train_samples_per_second": 219.492,
8
+ "train_steps_per_second": 3.43
9
  }
trainer_state.json CHANGED
@@ -1,243 +1,208 @@
1
  {
2
- "best_metric": 0.8053395240858967,
3
- "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-7020",
4
- "epoch": 9.99288256227758,
5
  "eval_steps": 500,
6
- "global_step": 7020,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.7117437722419929,
13
- "grad_norm": 0.739061713218689,
14
- "learning_rate": 4.643874643874644e-05,
15
- "loss": 0.1165,
 
 
 
 
 
 
 
 
 
 
 
 
16
  "step": 500
17
  },
18
  {
19
- "epoch": 0.999288256227758,
20
- "eval_accuracy": 0.9735468988149999,
21
- "eval_f1": 0.7736125182933693,
22
- "eval_loss": 0.08085697889328003,
23
- "eval_precision": 0.7454979388153613,
24
- "eval_recall": 0.8039307440336921,
25
- "eval_runtime": 14.5471,
26
- "eval_samples_per_second": 468.136,
27
- "eval_steps_per_second": 58.569,
28
- "step": 702
29
- },
30
- {
31
- "epoch": 1.4234875444839858,
32
- "grad_norm": 0.5600239038467407,
33
- "learning_rate": 4.287749287749288e-05,
34
- "loss": 0.0461,
35
  "step": 1000
36
  },
37
  {
38
- "epoch": 2.0,
39
- "eval_accuracy": 0.9746797204333557,
40
- "eval_f1": 0.7832803271240344,
41
- "eval_loss": 0.09561743587255478,
42
- "eval_precision": 0.7611479028697572,
43
- "eval_recall": 0.8067384183434722,
44
- "eval_runtime": 14.2524,
45
- "eval_samples_per_second": 477.815,
46
- "eval_steps_per_second": 59.779,
47
- "step": 1405
48
  },
49
  {
50
- "epoch": 2.135231316725979,
51
- "grad_norm": 0.4771190881729126,
52
- "learning_rate": 3.931623931623932e-05,
53
- "loss": 0.0273,
54
  "step": 1500
55
  },
56
  {
57
- "epoch": 2.8469750889679717,
58
- "grad_norm": 0.29212868213653564,
59
- "learning_rate": 3.575498575498576e-05,
60
- "loss": 0.0165,
 
 
 
 
 
 
 
 
 
 
 
 
61
  "step": 2000
62
  },
63
  {
64
- "epoch": 2.999288256227758,
65
- "eval_accuracy": 0.974439424938553,
66
- "eval_f1": 0.7853282741175118,
67
- "eval_loss": 0.10571284592151642,
68
- "eval_precision": 0.7721003843545106,
69
- "eval_recall": 0.7990173139915769,
70
- "eval_runtime": 14.2529,
71
- "eval_samples_per_second": 477.797,
72
- "eval_steps_per_second": 59.777,
73
- "step": 2107
74
  },
75
  {
76
- "epoch": 3.5587188612099645,
77
- "grad_norm": 0.08157803863286972,
78
- "learning_rate": 3.2193732193732194e-05,
79
- "loss": 0.011,
80
  "step": 2500
81
  },
82
  {
83
- "epoch": 4.0,
84
- "eval_accuracy": 0.9750504620539086,
85
- "eval_f1": 0.7971327796108773,
86
- "eval_loss": 0.12744221091270447,
87
- "eval_precision": 0.775858250276855,
88
- "eval_recall": 0.8196069255966308,
89
- "eval_runtime": 14.6113,
90
- "eval_samples_per_second": 466.079,
91
- "eval_steps_per_second": 58.311,
92
- "step": 2810
93
- },
94
- {
95
- "epoch": 4.270462633451958,
96
- "grad_norm": 0.45329615473747253,
97
- "learning_rate": 2.863247863247863e-05,
98
- "loss": 0.0079,
99
  "step": 3000
100
  },
101
  {
102
- "epoch": 4.98220640569395,
103
- "grad_norm": 0.4696062207221985,
104
- "learning_rate": 2.5071225071225073e-05,
105
- "loss": 0.006,
106
- "step": 3500
 
 
 
 
 
107
  },
108
  {
109
- "epoch": 4.999288256227758,
110
- "eval_accuracy": 0.9744943496230793,
111
- "eval_f1": 0.7975886853698122,
112
- "eval_loss": 0.13583102822303772,
113
- "eval_precision": 0.7904411764705882,
114
- "eval_recall": 0.8048666354702855,
115
- "eval_runtime": 14.3515,
116
- "eval_samples_per_second": 474.516,
117
- "eval_steps_per_second": 59.367,
118
- "step": 3512
119
  },
120
  {
121
- "epoch": 5.693950177935943,
122
- "grad_norm": 0.6243860721588135,
123
- "learning_rate": 2.150997150997151e-05,
124
- "loss": 0.0045,
 
 
 
 
 
 
 
 
 
 
 
 
125
  "step": 4000
126
  },
127
  {
128
- "epoch": 6.0,
129
- "eval_accuracy": 0.9746179301632636,
130
- "eval_f1": 0.7948299953423382,
131
- "eval_loss": 0.14197705686092377,
132
- "eval_precision": 0.7911451089476125,
133
- "eval_recall": 0.7985493682732803,
134
- "eval_runtime": 14.3392,
135
- "eval_samples_per_second": 474.921,
136
- "eval_steps_per_second": 59.417,
137
- "step": 4215
138
  },
139
  {
140
- "epoch": 6.405693950177936,
141
- "grad_norm": 0.2963380813598633,
142
- "learning_rate": 1.794871794871795e-05,
143
- "loss": 0.0037,
144
  "step": 4500
145
  },
146
  {
147
- "epoch": 6.999288256227758,
148
- "eval_accuracy": 0.9748925535858953,
149
- "eval_f1": 0.796227293898463,
150
- "eval_loss": 0.160146102309227,
151
- "eval_precision": 0.7925359295317571,
152
- "eval_recall": 0.7999532054281703,
153
- "eval_runtime": 14.175,
154
- "eval_samples_per_second": 480.423,
155
- "eval_steps_per_second": 60.106,
156
- "step": 4917
157
- },
158
- {
159
- "epoch": 7.117437722419929,
160
- "grad_norm": 0.4029097557067871,
161
- "learning_rate": 1.4387464387464389e-05,
162
- "loss": 0.0028,
163
- "step": 5000
164
- },
165
- {
166
- "epoch": 7.829181494661921,
167
- "grad_norm": 0.04551521688699722,
168
- "learning_rate": 1.0826210826210826e-05,
169
- "loss": 0.0022,
170
- "step": 5500
171
- },
172
- {
173
- "epoch": 8.0,
174
- "eval_accuracy": 0.9758331388084089,
175
- "eval_f1": 0.8050679995350458,
176
- "eval_loss": 0.1620824635028839,
177
- "eval_precision": 0.7999537999538,
178
- "eval_recall": 0.8102480112306972,
179
- "eval_runtime": 14.4773,
180
- "eval_samples_per_second": 470.391,
181
- "eval_steps_per_second": 58.851,
182
- "step": 5620
183
- },
184
- {
185
- "epoch": 8.540925266903916,
186
- "grad_norm": 0.3445014953613281,
187
- "learning_rate": 7.264957264957266e-06,
188
- "loss": 0.0016,
189
- "step": 6000
190
- },
191
- {
192
- "epoch": 8.999288256227757,
193
- "eval_accuracy": 0.9757713485383168,
194
- "eval_f1": 0.8028807062376583,
195
- "eval_loss": 0.16807720065116882,
196
- "eval_precision": 0.7972318339100346,
197
- "eval_recall": 0.8086102012166588,
198
- "eval_runtime": 14.1973,
199
- "eval_samples_per_second": 479.67,
200
- "eval_steps_per_second": 60.012,
201
- "step": 6322
202
- },
203
- {
204
- "epoch": 9.252669039145907,
205
- "grad_norm": 0.12732785940170288,
206
- "learning_rate": 3.7037037037037037e-06,
207
- "loss": 0.0013,
208
- "step": 6500
209
- },
210
- {
211
- "epoch": 9.9644128113879,
212
- "grad_norm": 0.05971187725663185,
213
- "learning_rate": 1.4245014245014247e-07,
214
- "loss": 0.0013,
215
- "step": 7000
216
- },
217
- {
218
- "epoch": 9.99288256227758,
219
- "eval_accuracy": 0.9758743323218038,
220
- "eval_f1": 0.8053395240858967,
221
- "eval_loss": 0.17113561928272247,
222
- "eval_precision": 0.7991246256622898,
223
- "eval_recall": 0.8116518483855872,
224
- "eval_runtime": 14.5918,
225
- "eval_samples_per_second": 466.7,
226
- "eval_steps_per_second": 58.389,
227
- "step": 7020
228
- },
229
- {
230
- "epoch": 9.99288256227758,
231
- "step": 7020,
232
- "total_flos": 2.130317416831723e+16,
233
- "train_loss": 0.017706579814779112,
234
- "train_runtime": 1901.5665,
235
- "train_samples_per_second": 236.321,
236
- "train_steps_per_second": 3.692
237
  }
238
  ],
239
  "logging_steps": 500,
240
- "max_steps": 7020,
241
  "num_input_tokens_seen": 0,
242
  "num_train_epochs": 10,
243
  "save_steps": 500,
@@ -253,7 +218,7 @@
253
  "attributes": {}
254
  }
255
  },
256
- "total_flos": 2.130317416831723e+16,
257
  "train_batch_size": 32,
258
  "trial_name": null,
259
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9521203830369357,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2440",
4
+ "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 4880,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.9987641945981572,
14
+ "eval_f1": 0.9355123674911661,
15
+ "eval_loss": 0.003916793502867222,
16
+ "eval_precision": 0.9005102040816326,
17
+ "eval_recall": 0.9733455882352942,
18
+ "eval_runtime": 13.936,
19
+ "eval_samples_per_second": 488.661,
20
+ "eval_steps_per_second": 61.136,
21
+ "step": 488
22
+ },
23
+ {
24
+ "epoch": 1.0245901639344261,
25
+ "grad_norm": 0.24995087087154388,
26
+ "learning_rate": 4.487704918032787e-05,
27
+ "loss": 0.0189,
28
  "step": 500
29
  },
30
  {
31
+ "epoch": 2.0,
32
+ "eval_accuracy": 0.9989495654084337,
33
+ "eval_f1": 0.9413886384129847,
34
+ "eval_loss": 0.0032420416828244925,
35
+ "eval_precision": 0.9238938053097345,
36
+ "eval_recall": 0.9595588235294118,
37
+ "eval_runtime": 14.0058,
38
+ "eval_samples_per_second": 486.228,
39
+ "eval_steps_per_second": 60.832,
40
+ "step": 976
41
+ },
42
+ {
43
+ "epoch": 2.0491803278688523,
44
+ "grad_norm": 0.04940846189856529,
45
+ "learning_rate": 3.975409836065574e-05,
46
+ "loss": 0.0027,
47
  "step": 1000
48
  },
49
  {
50
+ "epoch": 3.0,
51
+ "eval_accuracy": 0.9989358342373021,
52
+ "eval_f1": 0.9402784014369105,
53
+ "eval_loss": 0.0043699671514332294,
54
+ "eval_precision": 0.9192273924495171,
55
+ "eval_recall": 0.9623161764705882,
56
+ "eval_runtime": 14.0607,
57
+ "eval_samples_per_second": 484.329,
58
+ "eval_steps_per_second": 60.594,
59
+ "step": 1464
60
  },
61
  {
62
+ "epoch": 3.0737704918032787,
63
+ "grad_norm": 0.01636342518031597,
64
+ "learning_rate": 3.463114754098361e-05,
65
+ "loss": 0.0015,
66
  "step": 1500
67
  },
68
  {
69
+ "epoch": 4.0,
70
+ "eval_accuracy": 0.9990594147774864,
71
+ "eval_f1": 0.9445208619899129,
72
+ "eval_loss": 0.0036185304634273052,
73
+ "eval_precision": 0.9423604757548033,
74
+ "eval_recall": 0.9466911764705882,
75
+ "eval_runtime": 14.26,
76
+ "eval_samples_per_second": 477.56,
77
+ "eval_steps_per_second": 59.748,
78
+ "step": 1952
79
+ },
80
+ {
81
+ "epoch": 4.098360655737705,
82
+ "grad_norm": 0.006754329428076744,
83
+ "learning_rate": 2.9508196721311478e-05,
84
+ "loss": 0.0007,
85
  "step": 2000
86
  },
87
  {
88
+ "epoch": 5.0,
89
+ "eval_accuracy": 0.9991418018042759,
90
+ "eval_f1": 0.9521203830369357,
91
+ "eval_loss": 0.004440780263394117,
92
+ "eval_precision": 0.9447963800904977,
93
+ "eval_recall": 0.9595588235294118,
94
+ "eval_runtime": 14.0071,
95
+ "eval_samples_per_second": 486.182,
96
+ "eval_steps_per_second": 60.826,
97
+ "step": 2440
98
  },
99
  {
100
+ "epoch": 5.122950819672131,
101
+ "grad_norm": 0.0005009469459764659,
102
+ "learning_rate": 2.4385245901639343e-05,
103
+ "loss": 0.0004,
104
  "step": 2500
105
  },
106
  {
107
+ "epoch": 6.0,
108
+ "eval_accuracy": 0.9990456836063548,
109
+ "eval_f1": 0.9464368886818817,
110
+ "eval_loss": 0.005531433038413525,
111
+ "eval_precision": 0.959395656279509,
112
+ "eval_recall": 0.9338235294117647,
113
+ "eval_runtime": 14.0724,
114
+ "eval_samples_per_second": 483.926,
115
+ "eval_steps_per_second": 60.544,
116
+ "step": 2928
117
+ },
118
+ {
119
+ "epoch": 6.147540983606557,
120
+ "grad_norm": 0.7484572529792786,
121
+ "learning_rate": 1.9262295081967212e-05,
122
+ "loss": 0.0002,
123
  "step": 3000
124
  },
125
  {
126
+ "epoch": 7.0,
127
+ "eval_accuracy": 0.9989907589218284,
128
+ "eval_f1": 0.9427393495190106,
129
+ "eval_loss": 0.004938796162605286,
130
+ "eval_precision": 0.9397260273972603,
131
+ "eval_recall": 0.9457720588235294,
132
+ "eval_runtime": 13.9575,
133
+ "eval_samples_per_second": 487.909,
134
+ "eval_steps_per_second": 61.042,
135
+ "step": 3416
136
  },
137
  {
138
+ "epoch": 7.172131147540983,
139
+ "grad_norm": 0.00041869020787999034,
140
+ "learning_rate": 1.4139344262295081e-05,
141
+ "loss": 0.0002,
142
+ "step": 3500
 
 
 
 
 
143
  },
144
  {
145
+ "epoch": 8.0,
146
+ "eval_accuracy": 0.999073145948618,
147
+ "eval_f1": 0.9468864468864469,
148
+ "eval_loss": 0.005282656755298376,
149
+ "eval_precision": 0.9434306569343066,
150
+ "eval_recall": 0.9503676470588235,
151
+ "eval_runtime": 14.1087,
152
+ "eval_samples_per_second": 482.68,
153
+ "eval_steps_per_second": 60.388,
154
+ "step": 3904
155
+ },
156
+ {
157
+ "epoch": 8.19672131147541,
158
+ "grad_norm": 0.004756764974445105,
159
+ "learning_rate": 9.016393442622952e-06,
160
+ "loss": 0.0001,
161
  "step": 4000
162
  },
163
  {
164
+ "epoch": 9.0,
165
+ "eval_accuracy": 0.9990525491919205,
166
+ "eval_f1": 0.946404031149794,
167
+ "eval_loss": 0.0050343358889222145,
168
+ "eval_precision": 0.94337899543379,
169
+ "eval_recall": 0.9494485294117647,
170
+ "eval_runtime": 14.373,
171
+ "eval_samples_per_second": 473.804,
172
+ "eval_steps_per_second": 59.278,
173
+ "step": 4392
174
  },
175
  {
176
+ "epoch": 9.221311475409836,
177
+ "grad_norm": 0.00033881032140925527,
178
+ "learning_rate": 3.89344262295082e-06,
179
+ "loss": 0.0001,
180
  "step": 4500
181
  },
182
  {
183
+ "epoch": 10.0,
184
+ "eval_accuracy": 0.9990525491919205,
185
+ "eval_f1": 0.945537757437071,
186
+ "eval_loss": 0.005196314305067062,
187
+ "eval_precision": 0.9416590701914311,
188
+ "eval_recall": 0.9494485294117647,
189
+ "eval_runtime": 14.6459,
190
+ "eval_samples_per_second": 464.976,
191
+ "eval_steps_per_second": 58.173,
192
+ "step": 4880
193
+ },
194
+ {
195
+ "epoch": 10.0,
196
+ "step": 4880,
197
+ "total_flos": 1.5694885147146138e+16,
198
+ "train_loss": 0.002533856062142209,
199
+ "train_runtime": 1422.7831,
200
+ "train_samples_per_second": 219.492,
201
+ "train_steps_per_second": 3.43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  }
203
  ],
204
  "logging_steps": 500,
205
+ "max_steps": 4880,
206
  "num_input_tokens_seen": 0,
207
  "num_train_epochs": 10,
208
  "save_steps": 500,
 
218
  "attributes": {}
219
  }
220
  },
221
+ "total_flos": 1.5694885147146138e+16,
222
  "train_batch_size": 32,
223
  "trial_name": null,
224
  "trial_params": null