feng-2052 commited on
Commit
0b6e38d
1 Parent(s): 73d8c0e

Training in progress, step 500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2b79ce96a29dfd90b469227636d4c69f39baa91d8c602442edf0d95debb71c6
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:294322a3921efabcf477dc40b7b403c369a45a4957c3e80807fc97e64c4a09cf
3
  size 268290900
run-2/checkpoint-1000/trainer_state.json CHANGED
@@ -10,50 +10,50 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.587741935483871,
14
- "eval_loss": 0.19460442662239075,
15
- "eval_runtime": 5.4234,
16
- "eval_samples_per_second": 571.599,
17
- "eval_steps_per_second": 11.985,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5228433012962341,
23
- "learning_rate": 1.685534591194969e-05,
24
- "loss": 0.3144,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8212903225806452,
30
- "eval_loss": 0.09393113851547241,
31
- "eval_runtime": 5.9301,
32
- "eval_samples_per_second": 522.759,
33
- "eval_steps_per_second": 10.961,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8793548387096775,
39
- "eval_loss": 0.06259110569953918,
40
- "eval_runtime": 5.99,
41
- "eval_samples_per_second": 517.529,
42
- "eval_steps_per_second": 10.851,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.4349106550216675,
48
- "learning_rate": 1.371069182389937e-05,
49
- "loss": 0.109,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
- "max_steps": 3180,
55
  "num_input_tokens_seen": 0,
56
- "num_train_epochs": 10,
57
  "save_steps": 500,
58
  "stateful_callbacks": {
59
  "TrainerControl": {
@@ -71,8 +71,8 @@
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
- "alpha": 0.11018489395767606,
75
- "num_train_epochs": 10,
76
- "temperature": 13
77
  }
78
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6690322580645162,
14
+ "eval_loss": 0.42948082089424133,
15
+ "eval_runtime": 5.3921,
16
+ "eval_samples_per_second": 574.917,
17
+ "eval_steps_per_second": 12.055,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.8646999001502991,
23
+ "learning_rate": 1.371069182389937e-05,
24
+ "loss": 0.6591,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8229032258064516,
30
+ "eval_loss": 0.16900987923145294,
31
+ "eval_runtime": 5.4801,
32
+ "eval_samples_per_second": 565.687,
33
+ "eval_steps_per_second": 11.861,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8729032258064516,
39
+ "eval_loss": 0.0963619202375412,
40
+ "eval_runtime": 5.6559,
41
+ "eval_samples_per_second": 548.098,
42
+ "eval_steps_per_second": 11.492,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.7748064398765564,
48
+ "learning_rate": 7.421383647798742e-06,
49
+ "loss": 0.1958,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
+ "max_steps": 1590,
55
  "num_input_tokens_seen": 0,
56
+ "num_train_epochs": 5,
57
  "save_steps": 500,
58
  "stateful_callbacks": {
59
  "TrainerControl": {
 
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
+ "alpha": 0.1651888586206014,
75
+ "num_train_epochs": 5,
76
+ "temperature": 2
77
  }
78
  }
run-2/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd23b96be7e9aeadde19459e2d9f65b515679421480dfd74d9a3d3c96f50456d
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:294322a3921efabcf477dc40b7b403c369a45a4957c3e80807fc97e64c4a09cf
3
  size 268290900
run-2/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:968bf33e52d67e7f5bc0284c9b15119d0d36792cf76bafe4b7c7dc6d084a9bac
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c90eb90b8269b7374850d7f98358b4fcfca644626c4d7e9cece862afe8f3803
3
  size 536643898
run-2/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04366f62f8f88f5a8265df59adb051b320463277845db80e7fa43f13110c18c9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e1264523e958cf7990dc5f42d876cc12129475c4603804cf66868aaf25c2c24
3
  size 1064
run-2/checkpoint-500/trainer_state.json CHANGED
@@ -10,25 +10,25 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.587741935483871,
14
- "eval_loss": 0.19460442662239075,
15
- "eval_runtime": 5.4234,
16
- "eval_samples_per_second": 571.599,
17
- "eval_steps_per_second": 11.985,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5228433012962341,
23
- "learning_rate": 1.685534591194969e-05,
24
- "loss": 0.3144,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
- "max_steps": 3180,
30
  "num_input_tokens_seen": 0,
31
- "num_train_epochs": 10,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
@@ -46,8 +46,8 @@
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
- "alpha": 0.11018489395767606,
50
- "num_train_epochs": 10,
51
- "temperature": 13
52
  }
53
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6690322580645162,
14
+ "eval_loss": 0.42948082089424133,
15
+ "eval_runtime": 5.3921,
16
+ "eval_samples_per_second": 574.917,
17
+ "eval_steps_per_second": 12.055,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.8646999001502991,
23
+ "learning_rate": 1.371069182389937e-05,
24
+ "loss": 0.6591,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
+ "max_steps": 1590,
30
  "num_input_tokens_seen": 0,
31
+ "num_train_epochs": 5,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
 
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
+ "alpha": 0.1651888586206014,
50
+ "num_train_epochs": 5,
51
+ "temperature": 2
52
  }
53
  }
run-2/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54a21424fcc7b0ab6ff9f908cdab480c20a3c07931af6e79061f4c4e5b5311db
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3bb46aada442b39115d169cd9c125713b5e861f4eaf782c3cede72bc75c5fe3
3
  size 5304
runs/Nov12_03-50-03_fde6a76996dd/events.out.tfevents.1731385960.fde6a76996dd.16977.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7299387b0655afa7853258c09274f2fb1a3e6607c6a114e90ca322d5189eb5a7
3
+ size 13811
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a2c86f2e5c80ae7557f0774f6f26d6131e8b67e4418482403db97fa0118b688
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3bb46aada442b39115d169cd9c125713b5e861f4eaf782c3cede72bc75c5fe3
3
  size 5304