HeshamElsherif685 commited on
Commit
c9c88cd
1 Parent(s): 7a2f9fb

Training in progress, step 500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:911ad2e3d0d2ebd06f39bfde975f4ec1fed24558e2f4f205bc9e42c412daaceb
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c29e0288f4ab949b658612b4efc595bc16ad1c35b955dd771212845f742fd59c
3
  size 268290900
run-1/checkpoint-1000/trainer_state.json CHANGED
@@ -10,43 +10,43 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5783870967741935,
14
- "eval_loss": 0.2046278566122055,
15
- "eval_runtime": 5.6717,
16
- "eval_samples_per_second": 546.572,
17
- "eval_steps_per_second": 11.46,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5409613251686096,
23
  "learning_rate": 1.4758909853249476e-05,
24
- "loss": 0.3241,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8125806451612904,
30
- "eval_loss": 0.10322821140289307,
31
- "eval_runtime": 6.1936,
32
- "eval_samples_per_second": 500.513,
33
- "eval_steps_per_second": 10.495,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8687096774193548,
39
- "eval_loss": 0.0720166265964508,
40
- "eval_runtime": 5.6413,
41
- "eval_samples_per_second": 549.518,
42
- "eval_steps_per_second": 11.522,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.46297112107276917,
48
  "learning_rate": 9.517819706498952e-06,
49
- "loss": 0.1192,
50
  "step": 1000
51
  }
52
  ],
@@ -71,8 +71,8 @@
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
- "alpha": 0.1698724421938158,
75
  "num_train_epochs": 6,
76
- "temperature": 11
77
  }
78
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.567741935483871,
14
+ "eval_loss": 0.19664955139160156,
15
+ "eval_runtime": 5.5309,
16
+ "eval_samples_per_second": 560.487,
17
+ "eval_steps_per_second": 11.752,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5286217927932739,
23
  "learning_rate": 1.4758909853249476e-05,
24
+ "loss": 0.3117,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8083870967741935,
30
+ "eval_loss": 0.10034344345331192,
31
+ "eval_runtime": 6.1068,
32
+ "eval_samples_per_second": 507.628,
33
+ "eval_steps_per_second": 10.644,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8664516129032258,
39
+ "eval_loss": 0.07075813412666321,
40
+ "eval_runtime": 5.786,
41
+ "eval_samples_per_second": 535.779,
42
+ "eval_steps_per_second": 11.234,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.45189452171325684,
48
  "learning_rate": 9.517819706498952e-06,
49
+ "loss": 0.1157,
50
  "step": 1000
51
  }
52
  ],
 
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
+ "alpha": 0.4479260334486119,
75
  "num_train_epochs": 6,
76
+ "temperature": 18
77
  }
78
  }
run-1/checkpoint-1500/trainer_state.json CHANGED
@@ -10,59 +10,59 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5783870967741935,
14
- "eval_loss": 0.2046278566122055,
15
- "eval_runtime": 5.6717,
16
- "eval_samples_per_second": 546.572,
17
- "eval_steps_per_second": 11.46,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5409613251686096,
23
  "learning_rate": 1.4758909853249476e-05,
24
- "loss": 0.3241,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8125806451612904,
30
- "eval_loss": 0.10322821140289307,
31
- "eval_runtime": 6.1936,
32
- "eval_samples_per_second": 500.513,
33
- "eval_steps_per_second": 10.495,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8687096774193548,
39
- "eval_loss": 0.0720166265964508,
40
- "eval_runtime": 5.6413,
41
- "eval_samples_per_second": 549.518,
42
- "eval_steps_per_second": 11.522,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.46297112107276917,
48
  "learning_rate": 9.517819706498952e-06,
49
- "loss": 0.1192,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.8912903225806451,
55
- "eval_loss": 0.058526117354631424,
56
- "eval_runtime": 5.7211,
57
- "eval_samples_per_second": 541.849,
58
- "eval_steps_per_second": 11.361,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.3392227292060852,
64
  "learning_rate": 4.276729559748428e-06,
65
- "loss": 0.0816,
66
  "step": 1500
67
  }
68
  ],
@@ -87,8 +87,8 @@
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
- "alpha": 0.1698724421938158,
91
  "num_train_epochs": 6,
92
- "temperature": 11
93
  }
94
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.567741935483871,
14
+ "eval_loss": 0.19664955139160156,
15
+ "eval_runtime": 5.5309,
16
+ "eval_samples_per_second": 560.487,
17
+ "eval_steps_per_second": 11.752,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5286217927932739,
23
  "learning_rate": 1.4758909853249476e-05,
24
+ "loss": 0.3117,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8083870967741935,
30
+ "eval_loss": 0.10034344345331192,
31
+ "eval_runtime": 6.1068,
32
+ "eval_samples_per_second": 507.628,
33
+ "eval_steps_per_second": 10.644,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8664516129032258,
39
+ "eval_loss": 0.07075813412666321,
40
+ "eval_runtime": 5.786,
41
+ "eval_samples_per_second": 535.779,
42
+ "eval_steps_per_second": 11.234,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.45189452171325684,
48
  "learning_rate": 9.517819706498952e-06,
49
+ "loss": 0.1157,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.8890322580645161,
55
+ "eval_loss": 0.057840555906295776,
56
+ "eval_runtime": 5.8497,
57
+ "eval_samples_per_second": 529.945,
58
+ "eval_steps_per_second": 11.112,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.327290415763855,
64
  "learning_rate": 4.276729559748428e-06,
65
+ "loss": 0.0799,
66
  "step": 1500
67
  }
68
  ],
 
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
+ "alpha": 0.4479260334486119,
91
  "num_train_epochs": 6,
92
+ "temperature": 18
93
  }
94
  }
run-1/checkpoint-1908/trainer_state.json CHANGED
@@ -10,68 +10,68 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5783870967741935,
14
- "eval_loss": 0.2046278566122055,
15
- "eval_runtime": 5.6717,
16
- "eval_samples_per_second": 546.572,
17
- "eval_steps_per_second": 11.46,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5409613251686096,
23
  "learning_rate": 1.4758909853249476e-05,
24
- "loss": 0.3241,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8125806451612904,
30
- "eval_loss": 0.10322821140289307,
31
- "eval_runtime": 6.1936,
32
- "eval_samples_per_second": 500.513,
33
- "eval_steps_per_second": 10.495,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8687096774193548,
39
- "eval_loss": 0.0720166265964508,
40
- "eval_runtime": 5.6413,
41
- "eval_samples_per_second": 549.518,
42
- "eval_steps_per_second": 11.522,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.46297112107276917,
48
  "learning_rate": 9.517819706498952e-06,
49
- "loss": 0.1192,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.8912903225806451,
55
- "eval_loss": 0.058526117354631424,
56
- "eval_runtime": 5.7211,
57
- "eval_samples_per_second": 541.849,
58
- "eval_steps_per_second": 11.361,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.3392227292060852,
64
  "learning_rate": 4.276729559748428e-06,
65
- "loss": 0.0816,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
- "eval_accuracy": 0.8941935483870967,
71
- "eval_loss": 0.05137300118803978,
72
- "eval_runtime": 5.9715,
73
- "eval_samples_per_second": 519.136,
74
- "eval_steps_per_second": 10.885,
75
  "step": 1590
76
  }
77
  ],
@@ -96,8 +96,8 @@
96
  "train_batch_size": 48,
97
  "trial_name": null,
98
  "trial_params": {
99
- "alpha": 0.1698724421938158,
100
  "num_train_epochs": 6,
101
- "temperature": 11
102
  }
103
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.567741935483871,
14
+ "eval_loss": 0.19664955139160156,
15
+ "eval_runtime": 5.5309,
16
+ "eval_samples_per_second": 560.487,
17
+ "eval_steps_per_second": 11.752,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5286217927932739,
23
  "learning_rate": 1.4758909853249476e-05,
24
+ "loss": 0.3117,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8083870967741935,
30
+ "eval_loss": 0.10034344345331192,
31
+ "eval_runtime": 6.1068,
32
+ "eval_samples_per_second": 507.628,
33
+ "eval_steps_per_second": 10.644,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8664516129032258,
39
+ "eval_loss": 0.07075813412666321,
40
+ "eval_runtime": 5.786,
41
+ "eval_samples_per_second": 535.779,
42
+ "eval_steps_per_second": 11.234,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.45189452171325684,
48
  "learning_rate": 9.517819706498952e-06,
49
+ "loss": 0.1157,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.8890322580645161,
55
+ "eval_loss": 0.057840555906295776,
56
+ "eval_runtime": 5.8497,
57
+ "eval_samples_per_second": 529.945,
58
+ "eval_steps_per_second": 11.112,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.327290415763855,
64
  "learning_rate": 4.276729559748428e-06,
65
+ "loss": 0.0799,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
+ "eval_accuracy": 0.8938709677419355,
71
+ "eval_loss": 0.05099958926439285,
72
+ "eval_runtime": 6.1102,
73
+ "eval_samples_per_second": 507.352,
74
+ "eval_steps_per_second": 10.638,
75
  "step": 1590
76
  }
77
  ],
 
96
  "train_batch_size": 48,
97
  "trial_name": null,
98
  "trial_params": {
99
+ "alpha": 0.4479260334486119,
100
  "num_train_epochs": 6,
101
+ "temperature": 18
102
  }
103
  }
run-1/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eacb34859c1848548f9646318e1d765056a5be2f637bbdd796cf5bd49701af2d
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c29e0288f4ab949b658612b4efc595bc16ad1c35b955dd771212845f742fd59c
3
  size 268290900
run-1/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bd322f129186b261f05e3f9f627f18bad9945db582fe9b3175b22d33e9d4025
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21d5d1e2a71a38ce22eb489c68c85dda378278838f448f2ce48fb6cb9fbd870f
3
  size 536643898
run-1/checkpoint-500/trainer_state.json CHANGED
@@ -10,18 +10,18 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5783870967741935,
14
- "eval_loss": 0.2046278566122055,
15
- "eval_runtime": 5.6717,
16
- "eval_samples_per_second": 546.572,
17
- "eval_steps_per_second": 11.46,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5409613251686096,
23
  "learning_rate": 1.4758909853249476e-05,
24
- "loss": 0.3241,
25
  "step": 500
26
  }
27
  ],
@@ -46,8 +46,8 @@
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
- "alpha": 0.1698724421938158,
50
  "num_train_epochs": 6,
51
- "temperature": 11
52
  }
53
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.567741935483871,
14
+ "eval_loss": 0.19664955139160156,
15
+ "eval_runtime": 5.5309,
16
+ "eval_samples_per_second": 560.487,
17
+ "eval_steps_per_second": 11.752,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5286217927932739,
23
  "learning_rate": 1.4758909853249476e-05,
24
+ "loss": 0.3117,
25
  "step": 500
26
  }
27
  ],
 
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
+ "alpha": 0.4479260334486119,
50
  "num_train_epochs": 6,
51
+ "temperature": 18
52
  }
53
  }
run-1/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8eb768576f75bcbf8b2b232d539edf6d6c6d5e888deb050362d521063dfbb74
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f879f71f2d173f98b817b4311cd97605daaadeaea76b087d279d7808a2906e61
3
  size 5176
runs/Aug10_17-36-17_bc748cae6928/events.out.tfevents.1723318310.bc748cae6928.590.10 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59f0459d3e5e682153ad23fe042f5fcbf7562fd3998f87d81a96ee41d15aa5aa
3
+ size 14622
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a795e38fc17d23ea7e491b0b97849dc60c777dbce157fcfdc837d87b44f51896
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f879f71f2d173f98b817b4311cd97605daaadeaea76b087d279d7808a2906e61
3
  size 5176