Rodrigo1771 commited on
Commit
7af7a0d
·
verified ·
1 Parent(s): 12d998a

End of training

Browse files
README.md CHANGED
@@ -3,9 +3,10 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
5
  tags:
 
6
  - generated_from_trainer
7
  datasets:
8
- - drugtemist-85-ner
9
  metrics:
10
  - precision
11
  - recall
@@ -18,24 +19,24 @@ model-index:
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
- name: drugtemist-85-ner
22
- type: drugtemist-85-ner
23
  config: DrugTEMIST NER
24
  split: validation
25
  args: DrugTEMIST NER
26
  metrics:
27
  - name: Precision
28
  type: precision
29
- value: 0.9347826086956522
30
  - name: Recall
31
  type: recall
32
- value: 0.9485294117647058
33
  - name: F1
34
  type: f1
35
- value: 0.9416058394160585
36
  - name: Accuracy
37
  type: accuracy
38
- value: 0.9989083718950389
39
  ---
40
 
41
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -43,12 +44,12 @@ should probably proofread and complete it, then remove this comment. -->
43
 
44
  # output
45
 
46
- This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the drugtemist-85-ner dataset.
47
  It achieves the following results on the evaluation set:
48
- - Loss: 0.0058
49
- - Precision: 0.9348
50
- - Recall: 0.9485
51
- - F1: 0.9416
52
  - Accuracy: 0.9989
53
 
54
  ## Model description
 
3
  license: apache-2.0
4
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
5
  tags:
6
+ - token-classification
7
  - generated_from_trainer
8
  datasets:
9
+ - Rodrigo1771/drugtemist-85-ner
10
  metrics:
11
  - precision
12
  - recall
 
19
  name: Token Classification
20
  type: token-classification
21
  dataset:
22
+ name: Rodrigo1771/drugtemist-85-ner
23
+ type: Rodrigo1771/drugtemist-85-ner
24
  config: DrugTEMIST NER
25
  split: validation
26
  args: DrugTEMIST NER
27
  metrics:
28
  - name: Precision
29
  type: precision
30
+ value: 0.9461187214611873
31
  - name: Recall
32
  type: recall
33
+ value: 0.9522058823529411
34
  - name: F1
35
  type: f1
36
+ value: 0.9491525423728814
37
  - name: Accuracy
38
  type: accuracy
39
+ value: 0.9989426998228679
40
  ---
41
 
42
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
44
 
45
  # output
46
 
47
+ This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/drugtemist-85-ner dataset.
48
  It achieves the following results on the evaluation set:
49
+ - Loss: 0.0048
50
+ - Precision: 0.9461
51
+ - Recall: 0.9522
52
+ - F1: 0.9492
53
  - Accuracy: 0.9989
54
 
55
  ## Model description
all_results.json CHANGED
@@ -1,26 +1,26 @@
1
  {
2
- "epoch": 9.98998998998999,
3
- "eval_accuracy": 0.9764853694371592,
4
- "eval_f1": 0.8040201005025126,
5
- "eval_loss": 0.14244574308395386,
6
- "eval_precision": 0.803175344384777,
7
- "eval_recall": 0.8048666354702855,
8
- "eval_runtime": 14.2417,
9
  "eval_samples": 6810,
10
- "eval_samples_per_second": 478.175,
11
- "eval_steps_per_second": 59.825,
12
- "predict_accuracy": 0.9760745138469196,
13
- "predict_f1": 0.7984671441135821,
14
- "predict_loss": 0.12913450598716736,
15
- "predict_precision": 0.7908163265306123,
16
- "predict_recall": 0.8062674448109617,
17
- "predict_runtime": 28.8701,
18
- "predict_samples_per_second": 506.199,
19
- "predict_steps_per_second": 63.284,
20
- "total_flos": 1.5071241212671032e+16,
21
- "train_loss": 0.022475468706272407,
22
- "train_runtime": 1385.1143,
23
- "train_samples": 31947,
24
- "train_samples_per_second": 230.645,
25
- "train_steps_per_second": 3.603
26
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9989426998228679,
4
+ "eval_f1": 0.9491525423728814,
5
+ "eval_loss": 0.004777050111442804,
6
+ "eval_precision": 0.9461187214611873,
7
+ "eval_recall": 0.9522058823529411,
8
+ "eval_runtime": 13.9476,
9
  "eval_samples": 6810,
10
+ "eval_samples_per_second": 488.256,
11
+ "eval_steps_per_second": 61.086,
12
+ "predict_accuracy": 0.9987478324070453,
13
+ "predict_f1": 0.9243073407597828,
14
+ "predict_loss": 0.005894536152482033,
15
+ "predict_precision": 0.9069506726457399,
16
+ "predict_recall": 0.9423412929528246,
17
+ "predict_runtime": 28.2179,
18
+ "predict_samples_per_second": 517.899,
19
+ "predict_steps_per_second": 64.746,
20
+ "total_flos": 1.4714840952259542e+16,
21
+ "train_loss": 0.002772659832779558,
22
+ "train_runtime": 1349.0548,
23
+ "train_samples": 29797,
24
+ "train_samples_per_second": 220.873,
25
+ "train_steps_per_second": 3.454
26
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 9.98998998998999,
3
- "eval_accuracy": 0.9764853694371592,
4
- "eval_f1": 0.8040201005025126,
5
- "eval_loss": 0.14244574308395386,
6
- "eval_precision": 0.803175344384777,
7
- "eval_recall": 0.8048666354702855,
8
- "eval_runtime": 14.2417,
9
  "eval_samples": 6810,
10
- "eval_samples_per_second": 478.175,
11
- "eval_steps_per_second": 59.825
12
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9989426998228679,
4
+ "eval_f1": 0.9491525423728814,
5
+ "eval_loss": 0.004777050111442804,
6
+ "eval_precision": 0.9461187214611873,
7
+ "eval_recall": 0.9522058823529411,
8
+ "eval_runtime": 13.9476,
9
  "eval_samples": 6810,
10
+ "eval_samples_per_second": 488.256,
11
+ "eval_steps_per_second": 61.086
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.9760745138469196,
3
- "predict_f1": 0.7984671441135821,
4
- "predict_loss": 0.12913450598716736,
5
- "predict_precision": 0.7908163265306123,
6
- "predict_recall": 0.8062674448109617,
7
- "predict_runtime": 28.8701,
8
- "predict_samples_per_second": 506.199,
9
- "predict_steps_per_second": 63.284
10
  }
 
1
  {
2
+ "predict_accuracy": 0.9987478324070453,
3
+ "predict_f1": 0.9243073407597828,
4
+ "predict_loss": 0.005894536152482033,
5
+ "predict_precision": 0.9069506726457399,
6
+ "predict_recall": 0.9423412929528246,
7
+ "predict_runtime": 28.2179,
8
+ "predict_samples_per_second": 517.899,
9
+ "predict_steps_per_second": 64.746
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tb/events.out.tfevents.1725579302.2a66098fac87.9264.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ddb06f8e19acc3cfe83ba96582312f0eda7c54cb22293014a850cd5aff12d3d
3
+ size 560
train.log CHANGED
@@ -1450,3 +1450,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
1450
  {'eval_loss': 0.005793666001409292, 'eval_precision': 0.9347826086956522, 'eval_recall': 0.9485294117647058, 'eval_f1': 0.9416058394160585, 'eval_accuracy': 0.9989083718950389, 'eval_runtime': 14.4595, 'eval_samples_per_second': 470.971, 'eval_steps_per_second': 58.923, 'epoch': 10.0}
1451
  {'train_runtime': 1349.0548, 'train_samples_per_second': 220.873, 'train_steps_per_second': 3.454, 'train_loss': 0.002772659832779558, 'epoch': 10.0}
1452
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1453
  0%| | 0/852 [00:00<?, ?it/s]
1454
  1%| | 10/852 [00:00<00:09, 89.77it/s]
1455
  2%|▏ | 19/852 [00:00<00:10, 77.96it/s]
1456
  3%|▎ | 27/852 [00:00<00:10, 78.13it/s]
1457
  4%|▍ | 36/852 [00:00<00:10, 78.74it/s]
1458
  5%|▌ | 45/852 [00:00<00:10, 80.47it/s]
1459
  6%|▋ | 54/852 [00:00<00:09, 81.97it/s]
1460
  7%|▋ | 63/852 [00:00<00:09, 81.96it/s]
1461
  8%|▊ | 72/852 [00:00<00:09, 80.15it/s]
1462
  10%|▉ | 81/852 [00:01<00:09, 80.19it/s]
1463
  11%|█ | 90/852 [00:01<00:09, 80.63it/s]
1464
  12%|█▏ | 99/852 [00:01<00:09, 80.42it/s]
1465
  13%|█▎ | 108/852 [00:01<00:09, 79.74it/s]
1466
  14%|█▎ | 117/852 [00:01<00:09, 80.86it/s]
1467
  15%|█▍ | 126/852 [00:01<00:09, 78.38it/s]
1468
  16%|█▌ | 135/852 [00:01<00:09, 79.16it/s]
1469
  17%|█▋ | 143/852 [00:01<00:08, 79.17it/s]
1470
  18%|█▊ | 151/852 [00:01<00:08, 78.56it/s]
1471
  19%|█▉ | 160/852 [00:01<00:08, 81.22it/s]
1472
  20%|█▉ | 169/852 [00:02<00:08, 81.16it/s]
1473
  21%|██ | 178/852 [00:02<00:08, 81.76it/s]
1474
  22%|██▏ | 187/852 [00:02<00:08, 82.46it/s]
1475
  23%|██▎ | 196/852 [00:02<00:07, 82.29it/s]
1476
  24%|██▍ | 205/852 [00:02<00:07, 83.12it/s]
1477
  25%|██▌ | 214/852 [00:02<00:07, 80.08it/s]
1478
  26%|██▌ | 223/852 [00:02<00:07, 81.13it/s]
1479
  27%|██▋ | 232/852 [00:02<00:07, 81.21it/s]
1480
  28%|██▊ | 241/852 [00:03<00:07, 78.10it/s]
1481
  29%|██▉ | 250/852 [00:03<00:07, 79.34it/s]
1482
  30%|███ | 259/852 [00:03<00:07, 80.76it/s]
1483
  31%|███▏ | 268/852 [00:03<00:07, 74.97it/s]
1484
  33%|███▎ | 277/852 [00:03<00:07, 77.71it/s]
1485
  34%|███▎ | 286/852 [00:03<00:07, 80.09it/s]
1486
  35%|███▍ | 295/852 [00:03<00:06, 80.08it/s]
1487
  36%|███▌ | 304/852 [00:03<00:06, 81.59it/s]
1488
  37%|███▋ | 313/852 [00:03<00:06, 80.30it/s]
1489
  38%|███▊ | 322/852 [00:04<00:06, 82.39it/s]
1490
  39%|███▉ | 331/852 [00:04<00:06, 82.16it/s]
1491
  40%|███▉ | 340/852 [00:04<00:06, 82.30it/s]
1492
  41%|████ | 349/852 [00:04<00:06, 82.47it/s]
1493
  42%|████▏ | 358/852 [00:04<00:06, 80.49it/s]
1494
  43%|████▎ | 367/852 [00:04<00:05, 81.17it/s]
1495
  44%|████▍ | 376/852 [00:04<00:05, 81.82it/s]
1496
  45%|████▌ | 385/852 [00:04<00:05, 81.26it/s]
1497
  46%|████▌ | 394/852 [00:04<00:05, 81.23it/s]
1498
  47%|████▋ | 403/852 [00:05<00:05, 81.17it/s]
1499
  48%|████▊ | 412/852 [00:05<00:05, 79.22it/s]
1500
  49%|████▉ | 421/852 [00:05<00:05, 80.98it/s]
1501
  50%|█████ | 430/852 [00:05<00:05, 80.11it/s]
1502
  52%|█████▏ | 439/852 [00:05<00:05, 81.83it/s]
1503
  53%|█████▎ | 448/852 [00:05<00:04, 82.37it/s]
1504
  54%|█████▎ | 457/852 [00:05<00:04, 83.25it/s]
1505
  55%|█████▍ | 466/852 [00:05<00:04, 80.61it/s]
1506
  56%|█████▌ | 475/852 [00:05<00:04, 77.21it/s]
1507
  57%|█████▋ | 484/852 [00:06<00:04, 77.43it/s]
1508
  58%|█████▊ | 493/852 [00:06<00:04, 79.71it/s]
1509
  59%|█████▉ | 502/852 [00:06<00:04, 81.52it/s]
1510
  60%|█████▉ | 511/852 [00:06<00:04, 81.79it/s]
1511
  61%|██████ | 520/852 [00:06<00:04, 82.93it/s]
1512
  62%|██████▏ | 529/852 [00:06<00:03, 80.76it/s]
1513
  63%|████��█▎ | 538/852 [00:06<00:03, 82.65it/s]
1514
  64%|██████▍ | 547/852 [00:06<00:03, 83.43it/s]
1515
  65%|██████▌ | 556/852 [00:06<00:03, 80.56it/s]
1516
  66%|██████▋ | 565/852 [00:06<00:03, 82.97it/s]
1517
  67%|██████▋ | 574/852 [00:07<00:03, 83.67it/s]
1518
  68%|██████▊ | 583/852 [00:07<00:03, 83.26it/s]
1519
  69%|██████▉ | 592/852 [00:07<00:03, 82.65it/s]
1520
  71%|███████ | 601/852 [00:07<00:03, 82.89it/s]
1521
  72%|███████▏ | 610/852 [00:07<00:02, 83.17it/s]
1522
  73%|███████▎ | 619/852 [00:07<00:02, 80.86it/s]
1523
  74%|███████▎ | 628/852 [00:07<00:02, 80.15it/s]
1524
  75%|███████▍ | 637/852 [00:07<00:02, 80.82it/s]
1525
  76%|███████▌ | 646/852 [00:07<00:02, 78.59it/s]
1526
  77%|███████▋ | 655/852 [00:08<00:02, 80.46it/s]
1527
  78%|███████▊ | 664/852 [00:08<00:02, 81.10it/s]
1528
  79%|███████▉ | 673/852 [00:08<00:02, 80.96it/s]
1529
  80%|████████ | 682/852 [00:08<00:02, 81.12it/s]
1530
  81%|████████ | 691/852 [00:08<00:01, 82.73it/s]
1531
  82%|████████▏ | 700/852 [00:08<00:01, 83.32it/s]
1532
  83%|████████▎ | 709/852 [00:08<00:01, 84.29it/s]
1533
  84%|████████▍ | 718/852 [00:08<00:01, 83.48it/s]
1534
  85%|████████▌ | 727/852 [00:08<00:01, 83.89it/s]
1535
  86%|████████▋ | 736/852 [00:09<00:01, 83.25it/s]
1536
  87%|████████▋ | 745/852 [00:09<00:01, 84.23it/s]
1537
  88%|████████▊ | 754/852 [00:09<00:01, 84.42it/s]
1538
  90%|████████▉ | 763/852 [00:09<00:01, 85.20it/s]
1539
  91%|█████████ | 772/852 [00:09<00:00, 83.63it/s]
1540
  92%|█████████▏| 781/852 [00:09<00:00, 82.15it/s]
1541
  93%|█████████▎| 790/852 [00:09<00:00, 82.20it/s]
1542
  94%|█████████▍| 799/852 [00:09<00:00, 82.36it/s]
1543
  95%|█████████▍| 808/852 [00:09<00:00, 83.98it/s]
1544
  96%|█████████▌| 817/852 [00:10<00:00, 82.39it/s]
1545
  97%|█████████▋| 826/852 [00:10<00:00, 83.62it/s]
1546
  98%|█████████▊| 835/852 [00:10<00:00, 83.71it/s]
1547
  99%|█████████▉| 844/852 [00:10<00:00, 82.35it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1548
  0%| | 0/1827 [00:00<?, ?it/s]
1549
  1%| | 10/1827 [00:00<00:19, 90.92it/s]
1550
  1%| | 20/1827 [00:00<00:22, 80.87it/s]
1551
  2%|▏ | 29/1827 [00:00<00:21, 82.68it/s]
1552
  2%|▏ | 38/1827 [00:00<00:21, 81.95it/s]
1553
  3%|▎ | 47/1827 [00:00<00:21, 82.63it/s]
1554
  3%|▎ | 56/1827 [00:00<00:21, 82.47it/s]
1555
  4%|▎ | 65/1827 [00:00<00:21, 80.47it/s]
1556
  4%|▍ | 74/1827 [00:00<00:21, 81.66it/s]
1557
  5%|▍ | 83/1827 [00:01<00:21, 82.68it/s]
1558
  5%|▌ | 92/1827 [00:01<00:21, 82.58it/s]
1559
  6%|▌ | 101/1827 [00:01<00:20, 84.52it/s]
1560
  6%|▌ | 110/1827 [00:01<00:20, 85.29it/s]
1561
  7%|▋ | 119/1827 [00:01<00:20, 83.68it/s]
1562
  7%|▋ | 128/1827 [00:01<00:20, 82.79it/s]
1563
  7%|▋ | 137/1827 [00:01<00:20, 83.78it/s]
1564
  8%|▊ | 146/1827 [00:01<00:19, 84.13it/s]
1565
  8%|▊ | 155/1827 [00:01<00:19, 83.64it/s]
1566
  9%|▉ | 164/1827 [00:01<00:20, 81.81it/s]
1567
  9%|▉ | 173/1827 [00:02<00:19, 82.80it/s]
1568
  10%|▉ | 182/1827 [00:02<00:19, 84.13it/s]
1569
  10%|█ | 191/1827 [00:02<00:19, 84.70it/s]
1570
  11%|█ | 200/1827 [00:02<00:19, 83.62it/s]
1571
  11%|█▏ | 209/1827 [00:02<00:19, 84.42it/s]
1572
  12%|█▏ | 218/1827 [00:02<00:19, 83.10it/s]
1573
  12%|█▏ | 227/1827 [00:02<00:19, 83.75it/s]
1574
  13%|█▎ | 236/1827 [00:02<00:19, 80.40it/s]
1575
  13%|█▎ | 245/1827 [00:02<00:19, 81.38it/s]
1576
  14%|█▍ | 254/1827 [00:03<00:19, 81.71it/s]
1577
  14%|█▍ | 263/1827 [00:03<00:19, 80.90it/s]
1578
  15%|█▍ | 272/1827 [00:03<00:18, 83.07it/s]
1579
  15%|█▌ | 281/1827 [00:03<00:18, 84.44it/s]
1580
  16%|█▌ | 290/1827 [00:03<00:18, 83.94it/s]
1581
  16%|█▋ | 299/1827 [00:03<00:18, 84.45it/s]
1582
  17%|█▋ | 308/1827 [00:03<00:18, 83.65it/s]
1583
  17%|█▋ | 317/1827 [00:03<00:18, 83.14it/s]
1584
  18%|█▊ | 326/1827 [00:03<00:17, 84.05it/s]
1585
  18%|█▊ | 335/1827 [00:04<00:17, 83.67it/s]
1586
  19%|█▉ | 344/1827 [00:04<00:17, 84.53it/s]
1587
  19%|█▉ | 353/1827 [00:04<00:18, 79.63it/s]
1588
  20%|█▉ | 362/1827 [00:04<00:18, 80.95it/s]
1589
  20%|██ | 371/1827 [00:04<00:17, 82.08it/s]
1590
  21%|██ | 380/1827 [00:04<00:17, 83.24it/s]
1591
  21%|██▏ | 389/1827 [00:04<00:17, 82.54it/s]
1592
  22%|██▏ | 398/1827 [00:04<00:17, 81.91it/s]
1593
  22%|██▏ | 407/1827 [00:04<00:17, 83.46it/s]
1594
  23%|██▎ | 416/1827 [00:05<00:17, 81.20it/s]
1595
  23%|██▎ | 425/1827 [00:05<00:17, 82.39it/s]
1596
  24%|██▍ | 434/1827 [00:05<00:17, 81.62it/s]
1597
  24%|██▍ | 443/1827 [00:05<00:17, 80.18it/s]
1598
  25%|██▍ | 452/1827 [00:05<00:16, 82.39it/s]
1599
  25%|██▌ | 461/1827 [00:05<00:16, 83.63it/s]
1600
  26%|██▌ | 470/1827 [00:05<00:16, 83.17it/s]
1601
  26%|██▌ | 479/1827 [00:05<00:16, 81.50it/s]
1602
  27%|██▋ | 488/1827 [00:05<00:16, 81.09it/s]
1603
  27%|██▋ | 497/1827 [00:06<00:16, 80.25it/s]
1604
  28%|██▊ | 506/1827 [00:06<00:16, 81.31it/s]
1605
  28%|██▊ | 515/1827 [00:06<00:15, 82.50it/s]
1606
  29%|██▊ | 524/1827 [00:06<00:15, 84.16it/s]
1607
  29%|██▉ | 533/1827 [00:06<00:15, 83.22it/s]
1608
  30%|██▉ | 542/1827 [00:06<00:15, 84.11it/s]
1609
  30%|███ | 551/1827 [00:06<00:15, 83.08it/s]
1610
  31%|███ | 560/1827 [00:06<00:15, 82.78it/s]
1611
  31%|███ | 569/1827 [00:06<00:15, 83.19it/s]
1612
  32%|███▏ | 578/1827 [00:06<00:15, 82.89it/s]
1613
  32%|███▏ | 587/1827 [00:07<00:14, 82.68it/s]
1614
  33%|███▎ | 596/1827 [00:07<00:14, 82.70it/s]
1615
  33%|███▎ | 605/1827 [00:07<00:14, 82.71it/s]
1616
  34%|███▎ | 614/1827 [00:07<00:14, 82.53it/s]
1617
  34%|███▍ | 623/1827 [00:07<00:14, 83.35it/s]
1618
  35%|███▍ | 632/1827 [00:07<00:14, 83.93it/s]
1619
  35%|███▌ | 641/1827 [00:07<00:14, 84.33it/s]
1620
  36%|███▌ | 650/1827 [00:07<00:14, 79.40it/s]
1621
  36%|███▌ | 659/1827 [00:07<00:14, 79.16it/s]
1622
  37%|███▋ | 668/1827 [00:08<00:14, 80.89it/s]
1623
  37%|███▋ | 677/1827 [00:08<00:14, 78.78it/s]
1624
  38%|███▊ | 686/1827 [00:08<00:14, 80.15it/s]
1625
  38%|███▊ | 695/1827 [00:08<00:13, 81.68it/s]
1626
  39%|███▊ | 704/1827 [00:08<00:14, 78.28it/s]
1627
  39%|███▉ | 713/1827 [00:08<00:13, 80.50it/s]
1628
  40%|███▉ | 722/1827 [00:08<00:13, 82.68it/s]
1629
  40%|████ | 731/1827 [00:08<00:13, 83.49it/s]
1630
  41%|████ | 740/1827 [00:08<00:13, 82.99it/s]
1631
  41%|████ | 749/1827 [00:09<00:12, 84.43it/s]
1632
  41%|████▏ | 758/1827 [00:09<00:12, 84.86it/s]
1633
  42%|████▏ | 767/1827 [00:09<00:12, 85.25it/s]
1634
  42%|████▏ | 776/1827 [00:09<00:12, 85.62it/s]
1635
  43%|████▎ | 785/1827 [00:09<00:12, 82.78it/s]
1636
  43%|████▎ | 794/1827 [00:09<00:12, 84.06it/s]
1637
  44%|████▍ | 803/1827 [00:09<00:12, 83.01it/s]
1638
  44%|████▍ | 812/1827 [00:09<00:12, 83.78it/s]
1639
  45%|████▍ | 821/1827 [00:09<00:11, 84.28it/s]
1640
  45%|████▌ | 830/1827 [00:10<00:11, 84.61it/s]
1641
  46%|████▌ | 839/1827 [00:10<00:11, 83.72it/s]
1642
  46%|████▋ | 848/1827 [00:10<00:11, 85.15it/s]
1643
  47%|████▋ | 857/1827 [00:10<00:11, 85.42it/s]
1644
  47%|████▋ | 866/1827 [00:10<00:11, 85.71it/s]
1645
  48%|████▊ | 875/1827 [00:10<00:11, 86.06it/s]
1646
  48%|████▊ | 884/1827 [00:10<00:10, 86.43it/s]
1647
  49%|████▉ | 893/1827 [00:10<00:10, 85.57it/s]
1648
  49%|████▉ | 902/1827 [00:10<00:10, 86.11it/s]
1649
  50%|████▉ | 911/1827 [00:10<00:10, 86.50it/s]
1650
  50%|█████ | 920/1827 [00:11<00:10, 86.07it/s]
1651
  51%|█████ | 929/1827 [00:11<00:10, 86.11it/s]
1652
  51%|█████▏ | 938/1827 [00:11<00:10, 82.86it/s]
1653
  52%|█████▏ | 947/1827 [00:11<00:10, 81.36it/s]
1654
  52%|█████▏ | 956/1827 [00:11<00:10, 82.29it/s]
1655
  53%|█████▎ | 965/1827 [00:11<00:10, 83.27it/s]
1656
  53%|█████▎ | 974/1827 [00:11<00:10, 83.79it/s]
1657
  54%|█████▍ | 983/1827 [00:11<00:10, 83.54it/s]
1658
  54%|█████▍ | 992/1827 [00:11<00:10, 83.43it/s]
1659
  55%|█████▍ | 1001/1827 [00:12<00:09, 84.18it/s]
1660
  55%|█████▌ | 1010/1827 [00:12<00:09, 83.88it/s]
1661
  56%|█████▌ | 1019/1827 [00:12<00:09, 84.69it/s]
1662
  56%|█████▋ | 1028/1827 [00:12<00:09, 85.70it/s]
1663
  57%|█████▋ | 1037/1827 [00:12<00:09, 84.01it/s]
1664
  57%|█████▋ | 1046/1827 [00:12<00:09, 84.57it/s]
1665
  58%|█████▊ | 1055/1827 [00:12<00:09, 84.49it/s]
1666
  58%|████��▊ | 1064/1827 [00:12<00:09, 84.45it/s]
1667
  59%|█████▊ | 1073/1827 [00:12<00:08, 85.10it/s]
1668
  59%|█████▉ | 1082/1827 [00:13<00:08, 85.50it/s]
1669
  60%|█████▉ | 1091/1827 [00:13<00:08, 85.18it/s]
1670
  60%|██████ | 1100/1827 [00:13<00:08, 85.85it/s]
1671
  61%|██████ | 1109/1827 [00:13<00:08, 85.34it/s]
1672
  61%|██████ | 1118/1827 [00:13<00:08, 83.96it/s]
1673
  62%|██████▏ | 1127/1827 [00:13<00:08, 83.91it/s]
1674
  62%|██████▏ | 1136/1827 [00:13<00:08, 84.94it/s]
1675
  63%|██████▎ | 1145/1827 [00:13<00:08, 84.62it/s]
1676
  63%|██████▎ | 1154/1827 [00:13<00:07, 84.47it/s]
1677
  64%|██████▎ | 1163/1827 [00:13<00:08, 82.48it/s]
1678
  64%|██████▍ | 1172/1827 [00:14<00:07, 83.17it/s]
1679
  65%|██████▍ | 1181/1827 [00:14<00:07, 81.16it/s]
1680
  65%|██████▌ | 1190/1827 [00:14<00:07, 83.08it/s]
1681
  66%|██████▌ | 1199/1827 [00:14<00:07, 83.48it/s]
1682
  66%|██████▌ | 1208/1827 [00:14<00:07, 84.04it/s]
1683
  67%|██████▋ | 1217/1827 [00:14<00:07, 82.40it/s]
1684
  67%|██████▋ | 1226/1827 [00:14<00:07, 82.82it/s]
1685
  68%|██████▊ | 1235/1827 [00:14<00:07, 83.66it/s]
1686
  68%|██████▊ | 1244/1827 [00:14<00:06, 84.34it/s]
1687
  69%|██████▊ | 1253/1827 [00:15<00:07, 81.83it/s]
1688
  69%|██████▉ | 1262/1827 [00:15<00:06, 81.92it/s]
1689
  70%|██████▉ | 1271/1827 [00:15<00:06, 82.56it/s]
1690
  70%|███████ | 1280/1827 [00:15<00:06, 83.63it/s]
1691
  71%|███████ | 1289/1827 [00:15<00:06, 84.21it/s]
1692
  71%|███████ | 1298/1827 [00:15<00:06, 85.46it/s]
1693
  72%|███████▏ | 1307/1827 [00:15<00:06, 85.59it/s]
1694
  72%|███████▏ | 1316/1827 [00:15<00:05, 86.19it/s]
1695
  73%|███████▎ | 1325/1827 [00:15<00:05, 85.74it/s]
1696
  73%|███████▎ | 1334/1827 [00:16<00:05, 86.33it/s]
1697
  74%|███████▎ | 1343/1827 [00:16<00:05, 84.87it/s]
1698
  74%|███████▍ | 1352/1827 [00:16<00:05, 85.11it/s]
1699
  74%|███████▍ | 1361/1827 [00:16<00:05, 85.88it/s]
1700
  75%|███████▍ | 1370/1827 [00:16<00:05, 85.65it/s]
1701
  75%|███████▌ | 1379/1827 [00:16<00:05, 85.85it/s]
1702
  76%|███████▌ | 1388/1827 [00:16<00:05, 85.76it/s]
1703
  76%|███████▋ | 1397/1827 [00:16<00:05, 85.56it/s]
1704
  77%|███████▋ | 1406/1827 [00:16<00:04, 85.93it/s]
1705
  77%|███████▋ | 1415/1827 [00:16<00:04, 85.21it/s]
1706
  78%|███████▊ | 1424/1827 [00:17<00:04, 84.33it/s]
1707
  78%|███████▊ | 1433/1827 [00:17<00:04, 83.44it/s]
1708
  79%|███████▉ | 1442/1827 [00:17<00:04, 81.50it/s]
1709
  79%|███████▉ | 1451/1827 [00:17<00:04, 82.65it/s]
1710
  80%|███████▉ | 1460/1827 [00:17<00:04, 83.66it/s]
1711
  80%|████████ | 1469/1827 [00:17<00:04, 81.75it/s]
1712
  81%|████████ | 1478/1827 [00:17<00:04, 81.42it/s]
1713
  81%|████████▏ | 1487/1827 [00:17<00:04, 82.24it/s]
1714
  82%|████████▏ | 1496/1827 [00:17<00:04, 80.08it/s]
1715
  82%|████████▏ | 1505/1827 [00:18<00:03, 80.52it/s]
1716
  83%|████████▎ | 1514/1827 [00:18<00:03, 82.13it/s]
1717
  83%|████████▎ | 1523/1827 [00:18<00:03, 82.92it/s]
1718
  84%|████████▍ | 1532/1827 [00:18<00:03, 82.73it/s]
1719
  84%|████████▍ | 1541/1827 [00:18<00:03, 83.28it/s]
1720
  85%|████████▍ | 1550/1827 [00:18<00:03, 82.29it/s]
1721
  85%|████████▌ | 1559/1827 [00:18<00:03, 82.49it/s]
1722
  86%|████████▌ | 1568/1827 [00:18<00:03, 82.39it/s]
1723
  86%|████████▋ | 1577/1827 [00:18<00:03, 82.15it/s]
1724
  87%|████████▋ | 1586/1827 [00:19<00:02, 82.43it/s]
1725
  87%|████████▋ | 1595/1827 [00:19<00:02, 81.93it/s]
1726
  88%|████████▊ | 1604/1827 [00:19<00:02, 82.96it/s]
1727
  88%|████████▊ | 1613/1827 [00:19<00:02, 83.75it/s]
1728
  89%|████████▉ | 1622/1827 [00:19<00:02, 78.81it/s]
1729
  89%|████████▉ | 1630/1827 [00:19<00:02, 76.60it/s]
1730
  90%|████████▉ | 1638/1827 [00:19<00:02, 77.25it/s]
1731
  90%|█████████ | 1647/1827 [00:19<00:02, 79.86it/s]
1732
  91%|█████████ | 1656/1827 [00:19<00:02, 81.63it/s]
1733
  91%|█████████ | 1665/1827 [00:20<00:01, 81.29it/s]
1734
  92%|█████████▏| 1674/1827 [00:20<00:01, 82.22it/s]
1735
  92%|█████████▏| 1683/1827 [00:20<00:01, 84.30it/s]
1736
  93%|█████████▎| 1692/1827 [00:20<00:01, 82.83it/s]
1737
  93%|█████████▎| 1701/1827 [00:20<00:01, 83.31it/s]
1738
  94%|█████████▎| 1710/1827 [00:20<00:01, 84.27it/s]
1739
  94%|█████████▍| 1719/1827 [00:20<00:01, 81.37it/s]
1740
  95%|█████████▍| 1728/1827 [00:20<00:01, 82.82it/s]
1741
  95%|█████████▌| 1737/1827 [00:20<00:01, 83.49it/s]
1742
  96%|█████████▌| 1746/1827 [00:20<00:00, 84.78it/s]
1743
  96%|█████████▌| 1755/1827 [00:21<00:00, 85.12it/s]
1744
  97%|█████████▋| 1764/1827 [00:21<00:00, 81.96it/s]
1745
  97%|█████████▋| 1773/1827 [00:21<00:00, 82.90it/s]
1746
  98%|█████████▊| 1782/1827 [00:21<00:00, 84.18it/s]
1747
  98%|█████████▊| 1791/1827 [00:21<00:00, 84.49it/s]
1748
  99%|█████████▊| 1800/1827 [00:21<00:00, 81.26it/s]
1749
  99%|█████████▉| 1809/1827 [00:21<00:00, 82.04it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1450
  {'eval_loss': 0.005793666001409292, 'eval_precision': 0.9347826086956522, 'eval_recall': 0.9485294117647058, 'eval_f1': 0.9416058394160585, 'eval_accuracy': 0.9989083718950389, 'eval_runtime': 14.4595, 'eval_samples_per_second': 470.971, 'eval_steps_per_second': 58.923, 'epoch': 10.0}
1451
  {'train_runtime': 1349.0548, 'train_samples_per_second': 220.873, 'train_steps_per_second': 3.454, 'train_loss': 0.002772659832779558, 'epoch': 10.0}
1452
 
1453
+ ***** train metrics *****
1454
+ epoch = 10.0
1455
+ total_flos = 13704263GF
1456
+ train_loss = 0.0028
1457
+ train_runtime = 0:22:29.05
1458
+ train_samples = 29797
1459
+ train_samples_per_second = 220.873
1460
+ train_steps_per_second = 3.454
1461
+ 09/05/2024 23:34:48 - INFO - __main__ - *** Evaluate ***
1462
+ [INFO|trainer.py:811] 2024-09-05 23:34:48,810 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
1463
+ [INFO|trainer.py:3819] 2024-09-05 23:34:48,813 >>
1464
+ ***** Running Evaluation *****
1465
+ [INFO|trainer.py:3821] 2024-09-05 23:34:48,813 >> Num examples = 6810
1466
+ [INFO|trainer.py:3824] 2024-09-05 23:34:48,813 >> Batch size = 8
1467
+
1468
  0%| | 0/852 [00:00<?, ?it/s]
1469
  1%| | 10/852 [00:00<00:09, 89.77it/s]
1470
  2%|▏ | 19/852 [00:00<00:10, 77.96it/s]
1471
  3%|▎ | 27/852 [00:00<00:10, 78.13it/s]
1472
  4%|▍ | 36/852 [00:00<00:10, 78.74it/s]
1473
  5%|▌ | 45/852 [00:00<00:10, 80.47it/s]
1474
  6%|▋ | 54/852 [00:00<00:09, 81.97it/s]
1475
  7%|▋ | 63/852 [00:00<00:09, 81.96it/s]
1476
  8%|▊ | 72/852 [00:00<00:09, 80.15it/s]
1477
  10%|▉ | 81/852 [00:01<00:09, 80.19it/s]
1478
  11%|█ | 90/852 [00:01<00:09, 80.63it/s]
1479
  12%|█▏ | 99/852 [00:01<00:09, 80.42it/s]
1480
  13%|█▎ | 108/852 [00:01<00:09, 79.74it/s]
1481
  14%|█▎ | 117/852 [00:01<00:09, 80.86it/s]
1482
  15%|█▍ | 126/852 [00:01<00:09, 78.38it/s]
1483
  16%|█▌ | 135/852 [00:01<00:09, 79.16it/s]
1484
  17%|█▋ | 143/852 [00:01<00:08, 79.17it/s]
1485
  18%|█▊ | 151/852 [00:01<00:08, 78.56it/s]
1486
  19%|█▉ | 160/852 [00:01<00:08, 81.22it/s]
1487
  20%|█▉ | 169/852 [00:02<00:08, 81.16it/s]
1488
  21%|██ | 178/852 [00:02<00:08, 81.76it/s]
1489
  22%|██▏ | 187/852 [00:02<00:08, 82.46it/s]
1490
  23%|██▎ | 196/852 [00:02<00:07, 82.29it/s]
1491
  24%|██▍ | 205/852 [00:02<00:07, 83.12it/s]
1492
  25%|██▌ | 214/852 [00:02<00:07, 80.08it/s]
1493
  26%|██▌ | 223/852 [00:02<00:07, 81.13it/s]
1494
  27%|██▋ | 232/852 [00:02<00:07, 81.21it/s]
1495
  28%|██▊ | 241/852 [00:03<00:07, 78.10it/s]
1496
  29%|██▉ | 250/852 [00:03<00:07, 79.34it/s]
1497
  30%|███ | 259/852 [00:03<00:07, 80.76it/s]
1498
  31%|███▏ | 268/852 [00:03<00:07, 74.97it/s]
1499
  33%|███▎ | 277/852 [00:03<00:07, 77.71it/s]
1500
  34%|███▎ | 286/852 [00:03<00:07, 80.09it/s]
1501
  35%|███▍ | 295/852 [00:03<00:06, 80.08it/s]
1502
  36%|███▌ | 304/852 [00:03<00:06, 81.59it/s]
1503
  37%|███▋ | 313/852 [00:03<00:06, 80.30it/s]
1504
  38%|███▊ | 322/852 [00:04<00:06, 82.39it/s]
1505
  39%|███▉ | 331/852 [00:04<00:06, 82.16it/s]
1506
  40%|███▉ | 340/852 [00:04<00:06, 82.30it/s]
1507
  41%|████ | 349/852 [00:04<00:06, 82.47it/s]
1508
  42%|████▏ | 358/852 [00:04<00:06, 80.49it/s]
1509
  43%|████▎ | 367/852 [00:04<00:05, 81.17it/s]
1510
  44%|████▍ | 376/852 [00:04<00:05, 81.82it/s]
1511
  45%|████▌ | 385/852 [00:04<00:05, 81.26it/s]
1512
  46%|████▌ | 394/852 [00:04<00:05, 81.23it/s]
1513
  47%|████▋ | 403/852 [00:05<00:05, 81.17it/s]
1514
  48%|████▊ | 412/852 [00:05<00:05, 79.22it/s]
1515
  49%|████▉ | 421/852 [00:05<00:05, 80.98it/s]
1516
  50%|█████ | 430/852 [00:05<00:05, 80.11it/s]
1517
  52%|█████▏ | 439/852 [00:05<00:05, 81.83it/s]
1518
  53%|█████▎ | 448/852 [00:05<00:04, 82.37it/s]
1519
  54%|█████▎ | 457/852 [00:05<00:04, 83.25it/s]
1520
  55%|█████▍ | 466/852 [00:05<00:04, 80.61it/s]
1521
  56%|█████▌ | 475/852 [00:05<00:04, 77.21it/s]
1522
  57%|█████▋ | 484/852 [00:06<00:04, 77.43it/s]
1523
  58%|█████▊ | 493/852 [00:06<00:04, 79.71it/s]
1524
  59%|█████▉ | 502/852 [00:06<00:04, 81.52it/s]
1525
  60%|█████▉ | 511/852 [00:06<00:04, 81.79it/s]
1526
  61%|██████ | 520/852 [00:06<00:04, 82.93it/s]
1527
  62%|██████▏ | 529/852 [00:06<00:03, 80.76it/s]
1528
  63%|████��█▎ | 538/852 [00:06<00:03, 82.65it/s]
1529
  64%|██████▍ | 547/852 [00:06<00:03, 83.43it/s]
1530
  65%|██████▌ | 556/852 [00:06<00:03, 80.56it/s]
1531
  66%|██████▋ | 565/852 [00:06<00:03, 82.97it/s]
1532
  67%|██████▋ | 574/852 [00:07<00:03, 83.67it/s]
1533
  68%|██████▊ | 583/852 [00:07<00:03, 83.26it/s]
1534
  69%|██████▉ | 592/852 [00:07<00:03, 82.65it/s]
1535
  71%|███████ | 601/852 [00:07<00:03, 82.89it/s]
1536
  72%|███████▏ | 610/852 [00:07<00:02, 83.17it/s]
1537
  73%|███████▎ | 619/852 [00:07<00:02, 80.86it/s]
1538
  74%|███████▎ | 628/852 [00:07<00:02, 80.15it/s]
1539
  75%|███████▍ | 637/852 [00:07<00:02, 80.82it/s]
1540
  76%|███████▌ | 646/852 [00:07<00:02, 78.59it/s]
1541
  77%|███████▋ | 655/852 [00:08<00:02, 80.46it/s]
1542
  78%|███████▊ | 664/852 [00:08<00:02, 81.10it/s]
1543
  79%|███████▉ | 673/852 [00:08<00:02, 80.96it/s]
1544
  80%|████████ | 682/852 [00:08<00:02, 81.12it/s]
1545
  81%|████████ | 691/852 [00:08<00:01, 82.73it/s]
1546
  82%|████████▏ | 700/852 [00:08<00:01, 83.32it/s]
1547
  83%|████████▎ | 709/852 [00:08<00:01, 84.29it/s]
1548
  84%|████████▍ | 718/852 [00:08<00:01, 83.48it/s]
1549
  85%|████████▌ | 727/852 [00:08<00:01, 83.89it/s]
1550
  86%|████████▋ | 736/852 [00:09<00:01, 83.25it/s]
1551
  87%|████████▋ | 745/852 [00:09<00:01, 84.23it/s]
1552
  88%|████████▊ | 754/852 [00:09<00:01, 84.42it/s]
1553
  90%|████████▉ | 763/852 [00:09<00:01, 85.20it/s]
1554
  91%|█████████ | 772/852 [00:09<00:00, 83.63it/s]
1555
  92%|█████████▏| 781/852 [00:09<00:00, 82.15it/s]
1556
  93%|█████████▎| 790/852 [00:09<00:00, 82.20it/s]
1557
  94%|█████████▍| 799/852 [00:09<00:00, 82.36it/s]
1558
  95%|█████████▍| 808/852 [00:09<00:00, 83.98it/s]
1559
  96%|█████████▌| 817/852 [00:10<00:00, 82.39it/s]
1560
  97%|█████████▋| 826/852 [00:10<00:00, 83.62it/s]
1561
  98%|█████████▊| 835/852 [00:10<00:00, 83.71it/s]
1562
  99%|█████████▉| 844/852 [00:10<00:00, 82.35it/s]
1563
+ ***** eval metrics *****
1564
+ epoch = 10.0
1565
+ eval_accuracy = 0.9989
1566
+ eval_f1 = 0.9492
1567
+ eval_loss = 0.0048
1568
+ eval_precision = 0.9461
1569
+ eval_recall = 0.9522
1570
+ eval_runtime = 0:00:13.94
1571
+ eval_samples = 6810
1572
+ eval_samples_per_second = 488.256
1573
+ eval_steps_per_second = 61.086
1574
+ 09/05/2024 23:35:02 - INFO - __main__ - *** Predict ***
1575
+ [INFO|trainer.py:811] 2024-09-05 23:35:02,765 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
1576
+ [INFO|trainer.py:3819] 2024-09-05 23:35:02,767 >>
1577
+ ***** Running Prediction *****
1578
+ [INFO|trainer.py:3821] 2024-09-05 23:35:02,767 >> Num examples = 14614
1579
+ [INFO|trainer.py:3824] 2024-09-05 23:35:02,767 >> Batch size = 8
1580
+
1581
  0%| | 0/1827 [00:00<?, ?it/s]
1582
  1%| | 10/1827 [00:00<00:19, 90.92it/s]
1583
  1%| | 20/1827 [00:00<00:22, 80.87it/s]
1584
  2%|▏ | 29/1827 [00:00<00:21, 82.68it/s]
1585
  2%|▏ | 38/1827 [00:00<00:21, 81.95it/s]
1586
  3%|▎ | 47/1827 [00:00<00:21, 82.63it/s]
1587
  3%|▎ | 56/1827 [00:00<00:21, 82.47it/s]
1588
  4%|▎ | 65/1827 [00:00<00:21, 80.47it/s]
1589
  4%|▍ | 74/1827 [00:00<00:21, 81.66it/s]
1590
  5%|▍ | 83/1827 [00:01<00:21, 82.68it/s]
1591
  5%|▌ | 92/1827 [00:01<00:21, 82.58it/s]
1592
  6%|▌ | 101/1827 [00:01<00:20, 84.52it/s]
1593
  6%|▌ | 110/1827 [00:01<00:20, 85.29it/s]
1594
  7%|▋ | 119/1827 [00:01<00:20, 83.68it/s]
1595
  7%|▋ | 128/1827 [00:01<00:20, 82.79it/s]
1596
  7%|▋ | 137/1827 [00:01<00:20, 83.78it/s]
1597
  8%|▊ | 146/1827 [00:01<00:19, 84.13it/s]
1598
  8%|▊ | 155/1827 [00:01<00:19, 83.64it/s]
1599
  9%|▉ | 164/1827 [00:01<00:20, 81.81it/s]
1600
  9%|▉ | 173/1827 [00:02<00:19, 82.80it/s]
1601
  10%|▉ | 182/1827 [00:02<00:19, 84.13it/s]
1602
  10%|█ | 191/1827 [00:02<00:19, 84.70it/s]
1603
  11%|█ | 200/1827 [00:02<00:19, 83.62it/s]
1604
  11%|█▏ | 209/1827 [00:02<00:19, 84.42it/s]
1605
  12%|█▏ | 218/1827 [00:02<00:19, 83.10it/s]
1606
  12%|█▏ | 227/1827 [00:02<00:19, 83.75it/s]
1607
  13%|█▎ | 236/1827 [00:02<00:19, 80.40it/s]
1608
  13%|█▎ | 245/1827 [00:02<00:19, 81.38it/s]
1609
  14%|█▍ | 254/1827 [00:03<00:19, 81.71it/s]
1610
  14%|█▍ | 263/1827 [00:03<00:19, 80.90it/s]
1611
  15%|█▍ | 272/1827 [00:03<00:18, 83.07it/s]
1612
  15%|█▌ | 281/1827 [00:03<00:18, 84.44it/s]
1613
  16%|█▌ | 290/1827 [00:03<00:18, 83.94it/s]
1614
  16%|█▋ | 299/1827 [00:03<00:18, 84.45it/s]
1615
  17%|█▋ | 308/1827 [00:03<00:18, 83.65it/s]
1616
  17%|█▋ | 317/1827 [00:03<00:18, 83.14it/s]
1617
  18%|█▊ | 326/1827 [00:03<00:17, 84.05it/s]
1618
  18%|█▊ | 335/1827 [00:04<00:17, 83.67it/s]
1619
  19%|█▉ | 344/1827 [00:04<00:17, 84.53it/s]
1620
  19%|█▉ | 353/1827 [00:04<00:18, 79.63it/s]
1621
  20%|█▉ | 362/1827 [00:04<00:18, 80.95it/s]
1622
  20%|██ | 371/1827 [00:04<00:17, 82.08it/s]
1623
  21%|██ | 380/1827 [00:04<00:17, 83.24it/s]
1624
  21%|██▏ | 389/1827 [00:04<00:17, 82.54it/s]
1625
  22%|██▏ | 398/1827 [00:04<00:17, 81.91it/s]
1626
  22%|██▏ | 407/1827 [00:04<00:17, 83.46it/s]
1627
  23%|██▎ | 416/1827 [00:05<00:17, 81.20it/s]
1628
  23%|██▎ | 425/1827 [00:05<00:17, 82.39it/s]
1629
  24%|██▍ | 434/1827 [00:05<00:17, 81.62it/s]
1630
  24%|██▍ | 443/1827 [00:05<00:17, 80.18it/s]
1631
  25%|██▍ | 452/1827 [00:05<00:16, 82.39it/s]
1632
  25%|██▌ | 461/1827 [00:05<00:16, 83.63it/s]
1633
  26%|██▌ | 470/1827 [00:05<00:16, 83.17it/s]
1634
  26%|██▌ | 479/1827 [00:05<00:16, 81.50it/s]
1635
  27%|██▋ | 488/1827 [00:05<00:16, 81.09it/s]
1636
  27%|██▋ | 497/1827 [00:06<00:16, 80.25it/s]
1637
  28%|██▊ | 506/1827 [00:06<00:16, 81.31it/s]
1638
  28%|██▊ | 515/1827 [00:06<00:15, 82.50it/s]
1639
  29%|██▊ | 524/1827 [00:06<00:15, 84.16it/s]
1640
  29%|██▉ | 533/1827 [00:06<00:15, 83.22it/s]
1641
  30%|██▉ | 542/1827 [00:06<00:15, 84.11it/s]
1642
  30%|███ | 551/1827 [00:06<00:15, 83.08it/s]
1643
  31%|███ | 560/1827 [00:06<00:15, 82.78it/s]
1644
  31%|███ | 569/1827 [00:06<00:15, 83.19it/s]
1645
  32%|███▏ | 578/1827 [00:06<00:15, 82.89it/s]
1646
  32%|███▏ | 587/1827 [00:07<00:14, 82.68it/s]
1647
  33%|███▎ | 596/1827 [00:07<00:14, 82.70it/s]
1648
  33%|███▎ | 605/1827 [00:07<00:14, 82.71it/s]
1649
  34%|███▎ | 614/1827 [00:07<00:14, 82.53it/s]
1650
  34%|███▍ | 623/1827 [00:07<00:14, 83.35it/s]
1651
  35%|███▍ | 632/1827 [00:07<00:14, 83.93it/s]
1652
  35%|███▌ | 641/1827 [00:07<00:14, 84.33it/s]
1653
  36%|███▌ | 650/1827 [00:07<00:14, 79.40it/s]
1654
  36%|███▌ | 659/1827 [00:07<00:14, 79.16it/s]
1655
  37%|███▋ | 668/1827 [00:08<00:14, 80.89it/s]
1656
  37%|███▋ | 677/1827 [00:08<00:14, 78.78it/s]
1657
  38%|███▊ | 686/1827 [00:08<00:14, 80.15it/s]
1658
  38%|███▊ | 695/1827 [00:08<00:13, 81.68it/s]
1659
  39%|███▊ | 704/1827 [00:08<00:14, 78.28it/s]
1660
  39%|███▉ | 713/1827 [00:08<00:13, 80.50it/s]
1661
  40%|███▉ | 722/1827 [00:08<00:13, 82.68it/s]
1662
  40%|████ | 731/1827 [00:08<00:13, 83.49it/s]
1663
  41%|████ | 740/1827 [00:08<00:13, 82.99it/s]
1664
  41%|████ | 749/1827 [00:09<00:12, 84.43it/s]
1665
  41%|████▏ | 758/1827 [00:09<00:12, 84.86it/s]
1666
  42%|████▏ | 767/1827 [00:09<00:12, 85.25it/s]
1667
  42%|████▏ | 776/1827 [00:09<00:12, 85.62it/s]
1668
  43%|████▎ | 785/1827 [00:09<00:12, 82.78it/s]
1669
  43%|████▎ | 794/1827 [00:09<00:12, 84.06it/s]
1670
  44%|████▍ | 803/1827 [00:09<00:12, 83.01it/s]
1671
  44%|████▍ | 812/1827 [00:09<00:12, 83.78it/s]
1672
  45%|████▍ | 821/1827 [00:09<00:11, 84.28it/s]
1673
  45%|████▌ | 830/1827 [00:10<00:11, 84.61it/s]
1674
  46%|████▌ | 839/1827 [00:10<00:11, 83.72it/s]
1675
  46%|████▋ | 848/1827 [00:10<00:11, 85.15it/s]
1676
  47%|████▋ | 857/1827 [00:10<00:11, 85.42it/s]
1677
  47%|████▋ | 866/1827 [00:10<00:11, 85.71it/s]
1678
  48%|████▊ | 875/1827 [00:10<00:11, 86.06it/s]
1679
  48%|████▊ | 884/1827 [00:10<00:10, 86.43it/s]
1680
  49%|████▉ | 893/1827 [00:10<00:10, 85.57it/s]
1681
  49%|████▉ | 902/1827 [00:10<00:10, 86.11it/s]
1682
  50%|████▉ | 911/1827 [00:10<00:10, 86.50it/s]
1683
  50%|█████ | 920/1827 [00:11<00:10, 86.07it/s]
1684
  51%|█████ | 929/1827 [00:11<00:10, 86.11it/s]
1685
  51%|█████▏ | 938/1827 [00:11<00:10, 82.86it/s]
1686
  52%|█████▏ | 947/1827 [00:11<00:10, 81.36it/s]
1687
  52%|█████▏ | 956/1827 [00:11<00:10, 82.29it/s]
1688
  53%|█████▎ | 965/1827 [00:11<00:10, 83.27it/s]
1689
  53%|█████▎ | 974/1827 [00:11<00:10, 83.79it/s]
1690
  54%|█████▍ | 983/1827 [00:11<00:10, 83.54it/s]
1691
  54%|█████▍ | 992/1827 [00:11<00:10, 83.43it/s]
1692
  55%|█████▍ | 1001/1827 [00:12<00:09, 84.18it/s]
1693
  55%|█████▌ | 1010/1827 [00:12<00:09, 83.88it/s]
1694
  56%|█████▌ | 1019/1827 [00:12<00:09, 84.69it/s]
1695
  56%|█████▋ | 1028/1827 [00:12<00:09, 85.70it/s]
1696
  57%|█████▋ | 1037/1827 [00:12<00:09, 84.01it/s]
1697
  57%|█████▋ | 1046/1827 [00:12<00:09, 84.57it/s]
1698
  58%|█████▊ | 1055/1827 [00:12<00:09, 84.49it/s]
1699
  58%|████��▊ | 1064/1827 [00:12<00:09, 84.45it/s]
1700
  59%|█████▊ | 1073/1827 [00:12<00:08, 85.10it/s]
1701
  59%|█████▉ | 1082/1827 [00:13<00:08, 85.50it/s]
1702
  60%|█████▉ | 1091/1827 [00:13<00:08, 85.18it/s]
1703
  60%|██████ | 1100/1827 [00:13<00:08, 85.85it/s]
1704
  61%|██████ | 1109/1827 [00:13<00:08, 85.34it/s]
1705
  61%|██████ | 1118/1827 [00:13<00:08, 83.96it/s]
1706
  62%|██████▏ | 1127/1827 [00:13<00:08, 83.91it/s]
1707
  62%|██████▏ | 1136/1827 [00:13<00:08, 84.94it/s]
1708
  63%|██████▎ | 1145/1827 [00:13<00:08, 84.62it/s]
1709
  63%|██████▎ | 1154/1827 [00:13<00:07, 84.47it/s]
1710
  64%|██████▎ | 1163/1827 [00:13<00:08, 82.48it/s]
1711
  64%|██████▍ | 1172/1827 [00:14<00:07, 83.17it/s]
1712
  65%|██████▍ | 1181/1827 [00:14<00:07, 81.16it/s]
1713
  65%|██████▌ | 1190/1827 [00:14<00:07, 83.08it/s]
1714
  66%|██████▌ | 1199/1827 [00:14<00:07, 83.48it/s]
1715
  66%|██████▌ | 1208/1827 [00:14<00:07, 84.04it/s]
1716
  67%|██████▋ | 1217/1827 [00:14<00:07, 82.40it/s]
1717
  67%|██████▋ | 1226/1827 [00:14<00:07, 82.82it/s]
1718
  68%|██████▊ | 1235/1827 [00:14<00:07, 83.66it/s]
1719
  68%|██████▊ | 1244/1827 [00:14<00:06, 84.34it/s]
1720
  69%|██████▊ | 1253/1827 [00:15<00:07, 81.83it/s]
1721
  69%|██████▉ | 1262/1827 [00:15<00:06, 81.92it/s]
1722
  70%|██████▉ | 1271/1827 [00:15<00:06, 82.56it/s]
1723
  70%|███████ | 1280/1827 [00:15<00:06, 83.63it/s]
1724
  71%|███████ | 1289/1827 [00:15<00:06, 84.21it/s]
1725
  71%|███████ | 1298/1827 [00:15<00:06, 85.46it/s]
1726
  72%|███████▏ | 1307/1827 [00:15<00:06, 85.59it/s]
1727
  72%|███████▏ | 1316/1827 [00:15<00:05, 86.19it/s]
1728
  73%|███████▎ | 1325/1827 [00:15<00:05, 85.74it/s]
1729
  73%|███████▎ | 1334/1827 [00:16<00:05, 86.33it/s]
1730
  74%|███████▎ | 1343/1827 [00:16<00:05, 84.87it/s]
1731
  74%|███████▍ | 1352/1827 [00:16<00:05, 85.11it/s]
1732
  74%|███████▍ | 1361/1827 [00:16<00:05, 85.88it/s]
1733
  75%|███████▍ | 1370/1827 [00:16<00:05, 85.65it/s]
1734
  75%|███████▌ | 1379/1827 [00:16<00:05, 85.85it/s]
1735
  76%|███████▌ | 1388/1827 [00:16<00:05, 85.76it/s]
1736
  76%|███████▋ | 1397/1827 [00:16<00:05, 85.56it/s]
1737
  77%|███████▋ | 1406/1827 [00:16<00:04, 85.93it/s]
1738
  77%|███████▋ | 1415/1827 [00:16<00:04, 85.21it/s]
1739
  78%|███████▊ | 1424/1827 [00:17<00:04, 84.33it/s]
1740
  78%|███████▊ | 1433/1827 [00:17<00:04, 83.44it/s]
1741
  79%|███████▉ | 1442/1827 [00:17<00:04, 81.50it/s]
1742
  79%|███████▉ | 1451/1827 [00:17<00:04, 82.65it/s]
1743
  80%|███████▉ | 1460/1827 [00:17<00:04, 83.66it/s]
1744
  80%|████████ | 1469/1827 [00:17<00:04, 81.75it/s]
1745
  81%|████████ | 1478/1827 [00:17<00:04, 81.42it/s]
1746
  81%|████████▏ | 1487/1827 [00:17<00:04, 82.24it/s]
1747
  82%|████████▏ | 1496/1827 [00:17<00:04, 80.08it/s]
1748
  82%|████████▏ | 1505/1827 [00:18<00:03, 80.52it/s]
1749
  83%|████████▎ | 1514/1827 [00:18<00:03, 82.13it/s]
1750
  83%|████████▎ | 1523/1827 [00:18<00:03, 82.92it/s]
1751
  84%|████████▍ | 1532/1827 [00:18<00:03, 82.73it/s]
1752
  84%|████████▍ | 1541/1827 [00:18<00:03, 83.28it/s]
1753
  85%|████████▍ | 1550/1827 [00:18<00:03, 82.29it/s]
1754
  85%|████████▌ | 1559/1827 [00:18<00:03, 82.49it/s]
1755
  86%|████████▌ | 1568/1827 [00:18<00:03, 82.39it/s]
1756
  86%|████████▋ | 1577/1827 [00:18<00:03, 82.15it/s]
1757
  87%|████████▋ | 1586/1827 [00:19<00:02, 82.43it/s]
1758
  87%|████████▋ | 1595/1827 [00:19<00:02, 81.93it/s]
1759
  88%|████████▊ | 1604/1827 [00:19<00:02, 82.96it/s]
1760
  88%|████████▊ | 1613/1827 [00:19<00:02, 83.75it/s]
1761
  89%|████████▉ | 1622/1827 [00:19<00:02, 78.81it/s]
1762
  89%|████████▉ | 1630/1827 [00:19<00:02, 76.60it/s]
1763
  90%|████████▉ | 1638/1827 [00:19<00:02, 77.25it/s]
1764
  90%|█████████ | 1647/1827 [00:19<00:02, 79.86it/s]
1765
  91%|█████████ | 1656/1827 [00:19<00:02, 81.63it/s]
1766
  91%|█████████ | 1665/1827 [00:20<00:01, 81.29it/s]
1767
  92%|█████████▏| 1674/1827 [00:20<00:01, 82.22it/s]
1768
  92%|█████████▏| 1683/1827 [00:20<00:01, 84.30it/s]
1769
  93%|█████████▎| 1692/1827 [00:20<00:01, 82.83it/s]
1770
  93%|█████████▎| 1701/1827 [00:20<00:01, 83.31it/s]
1771
  94%|█████████▎| 1710/1827 [00:20<00:01, 84.27it/s]
1772
  94%|█████████▍| 1719/1827 [00:20<00:01, 81.37it/s]
1773
  95%|█████████▍| 1728/1827 [00:20<00:01, 82.82it/s]
1774
  95%|█████████▌| 1737/1827 [00:20<00:01, 83.49it/s]
1775
  96%|█████████▌| 1746/1827 [00:20<00:00, 84.78it/s]
1776
  96%|█████████▌| 1755/1827 [00:21<00:00, 85.12it/s]
1777
  97%|█████████▋| 1764/1827 [00:21<00:00, 81.96it/s]
1778
  97%|█████████▋| 1773/1827 [00:21<00:00, 82.90it/s]
1779
  98%|█████████▊| 1782/1827 [00:21<00:00, 84.18it/s]
1780
  98%|█████████▊| 1791/1827 [00:21<00:00, 84.49it/s]
1781
  99%|█████████▊| 1800/1827 [00:21<00:00, 81.26it/s]
1782
  99%|█████████▉| 1809/1827 [00:21<00:00, 82.04it/s]
1783
+ [INFO|trainer.py:3503] 2024-09-05 23:35:31,629 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
1784
+ [INFO|configuration_utils.py:472] 2024-09-05 23:35:31,631 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
1785
+ [INFO|modeling_utils.py:2799] 2024-09-05 23:35:32,981 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1786
+ [INFO|tokenization_utils_base.py:2684] 2024-09-05 23:35:32,982 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1787
+ [INFO|tokenization_utils_base.py:2693] 2024-09-05 23:35:32,982 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
1788
+ ***** predict metrics *****
1789
+ predict_accuracy = 0.9987
1790
+ predict_f1 = 0.9243
1791
+ predict_loss = 0.0059
1792
+ predict_precision = 0.907
1793
+ predict_recall = 0.9423
1794
+ predict_runtime = 0:00:28.21
1795
+ predict_samples_per_second = 517.899
1796
+ predict_steps_per_second = 64.746
1797
+
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 9.98998998998999,
3
- "total_flos": 1.5071241212671032e+16,
4
- "train_loss": 0.022475468706272407,
5
- "train_runtime": 1385.1143,
6
- "train_samples": 31947,
7
- "train_samples_per_second": 230.645,
8
- "train_steps_per_second": 3.603
9
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "total_flos": 1.4714840952259542e+16,
4
+ "train_loss": 0.002772659832779558,
5
+ "train_runtime": 1349.0548,
6
+ "train_samples": 29797,
7
+ "train_samples_per_second": 220.873,
8
+ "train_steps_per_second": 3.454
9
  }
trainer_state.json CHANGED
@@ -1,208 +1,208 @@
1
  {
2
- "best_metric": 0.8040201005025126,
3
- "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-3996",
4
- "epoch": 9.98998998998999,
5
  "eval_steps": 500,
6
- "global_step": 4990,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.998998998998999,
13
- "eval_accuracy": 0.9730869045820918,
14
- "eval_f1": 0.75963794837412,
15
- "eval_loss": 0.07385822385549545,
16
- "eval_precision": 0.7270588235294118,
17
- "eval_recall": 0.7952737482452036,
18
- "eval_runtime": 14.2023,
19
- "eval_samples_per_second": 479.501,
20
- "eval_steps_per_second": 59.99,
21
- "step": 499
22
- },
23
- {
24
- "epoch": 1.001001001001001,
25
- "grad_norm": 0.8427119851112366,
26
- "learning_rate": 4.4989979959919844e-05,
27
- "loss": 0.105,
28
  "step": 500
29
  },
30
  {
31
  "epoch": 2.0,
32
- "eval_accuracy": 0.972949592870776,
33
- "eval_f1": 0.7655806561471223,
34
- "eval_loss": 0.09075114130973816,
35
- "eval_precision": 0.7435501653803749,
36
- "eval_recall": 0.7889564810481984,
37
- "eval_runtime": 14.1492,
38
- "eval_samples_per_second": 481.299,
39
- "eval_steps_per_second": 60.215,
40
- "step": 999
41
- },
42
- {
43
- "epoch": 2.002002002002002,
44
- "grad_norm": 0.9121108651161194,
45
- "learning_rate": 3.997995991983968e-05,
46
- "loss": 0.0448,
47
  "step": 1000
48
  },
49
  {
50
- "epoch": 2.998998998998999,
51
- "eval_accuracy": 0.9743913658395924,
52
- "eval_f1": 0.7829875042989798,
53
- "eval_loss": 0.09297410398721695,
54
- "eval_precision": 0.7675882220723759,
55
- "eval_recall": 0.7990173139915769,
56
- "eval_runtime": 14.1376,
57
- "eval_samples_per_second": 481.693,
58
- "eval_steps_per_second": 60.265,
59
- "step": 1498
60
  },
61
  {
62
- "epoch": 3.003003003003003,
63
- "grad_norm": 0.5380845069885254,
64
- "learning_rate": 3.496993987975952e-05,
65
- "loss": 0.0255,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 4.0,
70
- "eval_accuracy": 0.9757438861960537,
71
- "eval_f1": 0.789358010410642,
72
- "eval_loss": 0.10520397126674652,
73
- "eval_precision": 0.7805994051704416,
74
- "eval_recall": 0.798315395414132,
75
- "eval_runtime": 14.3621,
76
- "eval_samples_per_second": 474.163,
77
- "eval_steps_per_second": 59.323,
78
- "step": 1998
79
- },
80
- {
81
- "epoch": 4.004004004004004,
82
- "grad_norm": 0.2705754339694977,
83
- "learning_rate": 2.9959919839679363e-05,
84
- "loss": 0.0164,
85
  "step": 2000
86
  },
87
  {
88
- "epoch": 4.998998998998999,
89
- "eval_accuracy": 0.9750435964683428,
90
- "eval_f1": 0.7879346074142298,
91
- "eval_loss": 0.10997848957777023,
92
- "eval_precision": 0.7756119673617408,
93
- "eval_recall": 0.8006551240056153,
94
- "eval_runtime": 14.2341,
95
- "eval_samples_per_second": 478.428,
96
- "eval_steps_per_second": 59.856,
97
- "step": 2497
98
  },
99
  {
100
- "epoch": 5.005005005005005,
101
- "grad_norm": 0.27666428685188293,
102
- "learning_rate": 2.49498997995992e-05,
103
- "loss": 0.0112,
104
  "step": 2500
105
  },
106
  {
107
  "epoch": 6.0,
108
- "eval_accuracy": 0.9768011863731858,
109
- "eval_f1": 0.799447386599125,
110
- "eval_loss": 0.12663568556308746,
111
- "eval_precision": 0.7869446962828649,
112
- "eval_recall": 0.8123537669630323,
113
- "eval_runtime": 14.5018,
114
- "eval_samples_per_second": 469.597,
115
- "eval_steps_per_second": 58.751,
116
- "step": 2997
117
- },
118
- {
119
- "epoch": 6.006006006006006,
120
- "grad_norm": 0.24552026391029358,
121
- "learning_rate": 1.993987975951904e-05,
122
- "loss": 0.0073,
123
  "step": 3000
124
  },
125
  {
126
- "epoch": 6.998998998998999,
127
- "eval_accuracy": 0.976293133041317,
128
- "eval_f1": 0.7968804562914678,
129
- "eval_loss": 0.12882493436336517,
130
- "eval_precision": 0.792911744266852,
131
- "eval_recall": 0.8008890968647637,
132
- "eval_runtime": 14.3476,
133
- "eval_samples_per_second": 474.643,
134
- "eval_steps_per_second": 59.383,
135
- "step": 3496
136
  },
137
  {
138
- "epoch": 7.007007007007007,
139
- "grad_norm": 0.2008085697889328,
140
- "learning_rate": 1.492985971943888e-05,
141
- "loss": 0.0054,
142
  "step": 3500
143
  },
144
  {
145
  "epoch": 8.0,
146
- "eval_accuracy": 0.9764853694371592,
147
- "eval_f1": 0.8040201005025126,
148
- "eval_loss": 0.14244574308395386,
149
- "eval_precision": 0.803175344384777,
150
- "eval_recall": 0.8048666354702855,
151
- "eval_runtime": 14.514,
152
- "eval_samples_per_second": 469.202,
153
- "eval_steps_per_second": 58.702,
154
- "step": 3996
155
- },
156
- {
157
- "epoch": 8.008008008008009,
158
- "grad_norm": 0.12597906589508057,
159
- "learning_rate": 9.919839679358718e-06,
160
- "loss": 0.0038,
161
  "step": 4000
162
  },
163
  {
164
- "epoch": 8.998998998999,
165
- "eval_accuracy": 0.9765059661938567,
166
- "eval_f1": 0.7970779220779219,
167
- "eval_loss": 0.14552859961986542,
168
- "eval_precision": 0.7901149425287356,
169
- "eval_recall": 0.8041647168928404,
170
- "eval_runtime": 14.2396,
171
- "eval_samples_per_second": 478.242,
172
- "eval_steps_per_second": 59.833,
173
- "step": 4495
174
  },
175
  {
176
- "epoch": 9.00900900900901,
177
- "grad_norm": 0.2577208876609802,
178
- "learning_rate": 4.9098196392785576e-06,
179
- "loss": 0.0028,
180
  "step": 4500
181
  },
182
  {
183
- "epoch": 9.98998998998999,
184
- "eval_accuracy": 0.9768286487154489,
185
- "eval_f1": 0.7984262902105993,
186
- "eval_loss": 0.14972682297229767,
187
- "eval_precision": 0.7898351648351648,
188
- "eval_recall": 0.8072063640617688,
189
- "eval_runtime": 14.3927,
190
- "eval_samples_per_second": 473.158,
191
- "eval_steps_per_second": 59.197,
192
- "step": 4990
193
- },
194
- {
195
- "epoch": 9.98998998998999,
196
- "step": 4990,
197
- "total_flos": 1.5071241212671032e+16,
198
- "train_loss": 0.022475468706272407,
199
- "train_runtime": 1385.1143,
200
- "train_samples_per_second": 230.645,
201
- "train_steps_per_second": 3.603
202
  }
203
  ],
204
  "logging_steps": 500,
205
- "max_steps": 4990,
206
  "num_input_tokens_seen": 0,
207
  "num_train_epochs": 10,
208
  "save_steps": 500,
@@ -218,7 +218,7 @@
218
  "attributes": {}
219
  }
220
  },
221
- "total_flos": 1.5071241212671032e+16,
222
  "train_batch_size": 32,
223
  "trial_name": null,
224
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9491525423728814,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2330",
4
+ "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 4660,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.9989495654084337,
14
+ "eval_f1": 0.9351598173515981,
15
+ "eval_loss": 0.0030670168343931437,
16
+ "eval_precision": 0.9292196007259528,
17
+ "eval_recall": 0.9411764705882353,
18
+ "eval_runtime": 13.9946,
19
+ "eval_samples_per_second": 486.615,
20
+ "eval_steps_per_second": 60.88,
21
+ "step": 466
22
+ },
23
+ {
24
+ "epoch": 1.0729613733905579,
25
+ "grad_norm": 0.07174628973007202,
26
+ "learning_rate": 4.4635193133047216e-05,
27
+ "loss": 0.0199,
28
  "step": 500
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "eval_accuracy": 0.9989358342373021,
33
+ "eval_f1": 0.9386834986474301,
34
+ "eval_loss": 0.0030621723271906376,
35
+ "eval_precision": 0.9212389380530973,
36
+ "eval_recall": 0.9568014705882353,
37
+ "eval_runtime": 14.0045,
38
+ "eval_samples_per_second": 486.272,
39
+ "eval_steps_per_second": 60.838,
40
+ "step": 932
41
+ },
42
+ {
43
+ "epoch": 2.1459227467811157,
44
+ "grad_norm": 0.13906870782375336,
45
+ "learning_rate": 3.927038626609442e-05,
46
+ "loss": 0.0026,
47
  "step": 1000
48
  },
49
  {
50
+ "epoch": 3.0,
51
+ "eval_accuracy": 0.9989152374806047,
52
+ "eval_f1": 0.9360919540229885,
53
+ "eval_loss": 0.004003152716904879,
54
+ "eval_precision": 0.9365225390984361,
55
+ "eval_recall": 0.9356617647058824,
56
+ "eval_runtime": 13.9451,
57
+ "eval_samples_per_second": 488.343,
58
+ "eval_steps_per_second": 61.097,
59
+ "step": 1398
60
  },
61
  {
62
+ "epoch": 3.218884120171674,
63
+ "grad_norm": 0.14111244678497314,
64
+ "learning_rate": 3.3905579399141636e-05,
65
+ "loss": 0.0011,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "eval_accuracy": 0.9987230010847625,
71
+ "eval_f1": 0.9308584686774942,
72
+ "eval_loss": 0.005216046702116728,
73
+ "eval_precision": 0.9400187441424555,
74
+ "eval_recall": 0.921875,
75
+ "eval_runtime": 14.2384,
76
+ "eval_samples_per_second": 478.283,
77
+ "eval_steps_per_second": 59.838,
78
+ "step": 1864
79
+ },
80
+ {
81
+ "epoch": 4.291845493562231,
82
+ "grad_norm": 0.09000600874423981,
83
+ "learning_rate": 2.8540772532188842e-05,
84
+ "loss": 0.001,
85
  "step": 2000
86
  },
87
  {
88
+ "epoch": 5.0,
89
+ "eval_accuracy": 0.9989426998228679,
90
+ "eval_f1": 0.9491525423728814,
91
+ "eval_loss": 0.004777050111442804,
92
+ "eval_precision": 0.9461187214611873,
93
+ "eval_recall": 0.9522058823529411,
94
+ "eval_runtime": 13.9397,
95
+ "eval_samples_per_second": 488.533,
96
+ "eval_steps_per_second": 61.12,
97
+ "step": 2330
98
  },
99
  {
100
+ "epoch": 5.364806866952789,
101
+ "grad_norm": 0.0045097870752215385,
102
+ "learning_rate": 2.3175965665236052e-05,
103
+ "loss": 0.0005,
104
  "step": 2500
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "eval_accuracy": 0.9988809095527758,
109
+ "eval_f1": 0.9448244414044688,
110
+ "eval_loss": 0.004644877277314663,
111
+ "eval_precision": 0.9375565610859729,
112
+ "eval_recall": 0.9522058823529411,
113
+ "eval_runtime": 14.0259,
114
+ "eval_samples_per_second": 485.531,
115
+ "eval_steps_per_second": 60.745,
116
+ "step": 2796
117
+ },
118
+ {
119
+ "epoch": 6.437768240343348,
120
+ "grad_norm": 0.029634617269039154,
121
+ "learning_rate": 1.7811158798283262e-05,
122
+ "loss": 0.0004,
123
  "step": 3000
124
  },
125
  {
126
+ "epoch": 7.0,
127
+ "eval_accuracy": 0.9989564309939994,
128
+ "eval_f1": 0.9446460980036298,
129
+ "eval_loss": 0.0049773636274039745,
130
+ "eval_precision": 0.9327956989247311,
131
+ "eval_recall": 0.9568014705882353,
132
+ "eval_runtime": 13.9217,
133
+ "eval_samples_per_second": 489.166,
134
+ "eval_steps_per_second": 61.2,
135
+ "step": 3262
136
  },
137
  {
138
+ "epoch": 7.510729613733906,
139
+ "grad_norm": 0.003798937890678644,
140
+ "learning_rate": 1.2446351931330473e-05,
141
+ "loss": 0.0002,
142
  "step": 3500
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "eval_accuracy": 0.9989221030661705,
147
+ "eval_f1": 0.9435520881138136,
148
+ "eval_loss": 0.005484889727085829,
149
+ "eval_precision": 0.9422548120989918,
150
+ "eval_recall": 0.9448529411764706,
151
+ "eval_runtime": 13.8923,
152
+ "eval_samples_per_second": 490.2,
153
+ "eval_steps_per_second": 61.329,
154
+ "step": 3728
155
+ },
156
+ {
157
+ "epoch": 8.583690987124463,
158
+ "grad_norm": 0.0004711664514616132,
159
+ "learning_rate": 7.0815450643776825e-06,
160
+ "loss": 0.0001,
161
  "step": 4000
162
  },
163
  {
164
+ "epoch": 9.0,
165
+ "eval_accuracy": 0.9989426998228679,
166
+ "eval_f1": 0.9441903019213176,
167
+ "eval_loss": 0.0057435426861047745,
168
+ "eval_precision": 0.9398907103825137,
169
+ "eval_recall": 0.9485294117647058,
170
+ "eval_runtime": 13.9755,
171
+ "eval_samples_per_second": 487.281,
172
+ "eval_steps_per_second": 60.964,
173
+ "step": 4194
174
  },
175
  {
176
+ "epoch": 9.656652360515022,
177
+ "grad_norm": 0.005946693476289511,
178
+ "learning_rate": 1.7167381974248929e-06,
179
+ "loss": 0.0001,
180
  "step": 4500
181
  },
182
  {
183
+ "epoch": 10.0,
184
+ "eval_accuracy": 0.9989083718950389,
185
+ "eval_f1": 0.9416058394160585,
186
+ "eval_loss": 0.005793666001409292,
187
+ "eval_precision": 0.9347826086956522,
188
+ "eval_recall": 0.9485294117647058,
189
+ "eval_runtime": 14.4595,
190
+ "eval_samples_per_second": 470.971,
191
+ "eval_steps_per_second": 58.923,
192
+ "step": 4660
193
+ },
194
+ {
195
+ "epoch": 10.0,
196
+ "step": 4660,
197
+ "total_flos": 1.4714840952259542e+16,
198
+ "train_loss": 0.002772659832779558,
199
+ "train_runtime": 1349.0548,
200
+ "train_samples_per_second": 220.873,
201
+ "train_steps_per_second": 3.454
202
  }
203
  ],
204
  "logging_steps": 500,
205
+ "max_steps": 4660,
206
  "num_input_tokens_seen": 0,
207
  "num_train_epochs": 10,
208
  "save_steps": 500,
 
218
  "attributes": {}
219
  }
220
  },
221
+ "total_flos": 1.4714840952259542e+16,
222
  "train_batch_size": 32,
223
  "trial_name": null,
224
  "trial_params": null