itsanastasiaminina commited on
Commit
ff6a98a
·
verified ·
1 Parent(s): 93d2d3d

Training in progress, epoch 3

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2ea6143365bb1b955914e1894d1a9de3c8e82b53b2d92377c19fa703ff614cb
3
  size 498625128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b087d365f9b47bcb5ac8af147a7ce444d50256e0f588622fd8a7c4b178e62a
3
  size 498625128
run-0/checkpoint-1689/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17de67ad92dabf88985a54e46778d6c5d57f8027da6db3b5f37de0c6b94ff74b
3
  size 498625128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b087d365f9b47bcb5ac8af147a7ce444d50256e0f588622fd8a7c4b178e62a
3
  size 498625128
run-0/checkpoint-1689/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18c48b13f56fb2a15d11eeb5145150b84f70944494eec88405255d80e4fcdee9
3
  size 997370106
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59729c12013004750cb65b3640afab9eb6d6b96e9d2d269fdbf4552e3fe0ac90
3
  size 997370106
run-0/checkpoint-1689/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:071c5e9be8c89f7e7601ef6e67c044701f3e899071b5431a452dcbeb2c3af0a4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68f1d83a24dea24b3ee8fe08ca2d8c2be0524aab6848d54221b60f93a60135a4
3
  size 14244
run-0/checkpoint-1689/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80de0fe2308c3690dd22295d36b26938d5f0098babe16dede9fcbd27e598439d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e4b58894049dba5ecea76fd6426a18215864e341b6a3172f1c3fd8e0a5dbbbe
3
  size 1064
run-0/checkpoint-1689/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.622779879885066,
3
  "best_model_checkpoint": "students_scores_model/run-0/checkpoint-1689",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
@@ -10,50 +10,50 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.8880994671403197,
13
- "grad_norm": 9.486105918884277,
14
- "learning_rate": 1.6060239187203855e-05,
15
- "loss": 1.0687,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_f1": 0.559496193541961,
21
- "eval_loss": 1.0184900760650635,
22
- "eval_runtime": 37.7502,
23
- "eval_samples_per_second": 59.602,
24
- "eval_steps_per_second": 7.47,
25
  "step": 563
26
  },
27
  {
28
  "epoch": 1.7761989342806395,
29
- "grad_norm": 16.897672653198242,
30
- "learning_rate": 9.306564171558836e-06,
31
- "loss": 0.8626,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_f1": 0.6141765793522762,
37
- "eval_loss": 0.877347469329834,
38
- "eval_runtime": 37.6032,
39
- "eval_samples_per_second": 59.835,
40
- "eval_steps_per_second": 7.499,
41
  "step": 1126
42
  },
43
  {
44
  "epoch": 2.664298401420959,
45
- "grad_norm": 14.929506301879883,
46
- "learning_rate": 2.552889155913817e-06,
47
- "loss": 0.7589,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_f1": 0.622779879885066,
53
- "eval_loss": 0.8738921880722046,
54
- "eval_runtime": 38.6362,
55
- "eval_samples_per_second": 58.236,
56
- "eval_steps_per_second": 7.299,
57
  "step": 1689
58
  }
59
  ],
@@ -78,9 +78,9 @@
78
  "train_batch_size": 16,
79
  "trial_name": null,
80
  "trial_params": {
81
- "learning_rate": 2.2813914202848873e-05,
82
  "num_train_epochs": 3,
83
  "per_device_train_batch_size": 16,
84
- "weight_decay": 0.0012064342197859815
85
  }
86
  }
 
1
  {
2
+ "best_metric": 0.625181470032157,
3
  "best_model_checkpoint": "students_scores_model/run-0/checkpoint-1689",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.8880994671403197,
13
+ "grad_norm": 15.162298202514648,
14
+ "learning_rate": 1.1081212788560944e-05,
15
+ "loss": 1.0807,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_f1": 0.5653313080683391,
21
+ "eval_loss": 0.9786226153373718,
22
+ "eval_runtime": 37.7159,
23
+ "eval_samples_per_second": 59.657,
24
+ "eval_steps_per_second": 7.477,
25
  "step": 563
26
  },
27
  {
28
  "epoch": 1.7761989342806395,
29
+ "grad_norm": 24.097002029418945,
30
+ "learning_rate": 6.421325156701843e-06,
31
+ "loss": 0.8868,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_f1": 0.6070480622856711,
37
+ "eval_loss": 0.8847860097885132,
38
+ "eval_runtime": 37.644,
39
+ "eval_samples_per_second": 59.771,
40
+ "eval_steps_per_second": 7.491,
41
  "step": 1126
42
  },
43
  {
44
  "epoch": 2.664298401420959,
45
+ "grad_norm": 11.891419410705566,
46
+ "learning_rate": 1.7614375248427405e-06,
47
+ "loss": 0.7962,
48
  "step": 1500
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_f1": 0.625181470032157,
53
+ "eval_loss": 0.87553471326828,
54
+ "eval_runtime": 38.7494,
55
+ "eval_samples_per_second": 58.065,
56
+ "eval_steps_per_second": 7.278,
57
  "step": 1689
58
  }
59
  ],
 
78
  "train_batch_size": 16,
79
  "trial_name": null,
80
  "trial_params": {
81
+ "learning_rate": 1.5741100420420047e-05,
82
  "num_train_epochs": 3,
83
  "per_device_train_batch_size": 16,
84
+ "weight_decay": 0.06367619176381757
85
  }
86
  }
run-0/checkpoint-1689/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86e506878344e67934428f3176c568da4bb4fdfd8b8918c71494a2c9eb08433b
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d79a0a1b1999dd0f4d771c0a4c6c2e334edf8b223fe42e75cbf50a67fc6c2de
3
  size 5368
runs/Dec09_12-41-02_ea67bac3fd95/events.out.tfevents.1733748065.ea67bac3fd95.23.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94897b1553c80b0d65888b8e664fb3a6ad62dc3aceab4d7d97de979095b887e1
3
- size 6428
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a8eb11f2865b9a7b2d36c666ed0103b6464a718040f159b71a69474b09a27d1
3
+ size 6639