feng-2052 commited on
Commit
7492c27
1 Parent(s): b1aedac

Training in progress, step 500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:181452760f4889fb1745514a4de50facf35adde53641198d68d40c6fbe42eedd
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db7b0ba484d365a8232c614da6b35c1ca8564e014b51324208c27b61981f817d
3
  size 268290900
run-0/checkpoint-1000/trainer_state.json CHANGED
@@ -10,50 +10,50 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5751612903225807,
14
- "eval_loss": 0.1956518292427063,
15
- "eval_runtime": 5.3591,
16
- "eval_samples_per_second": 578.453,
17
- "eval_steps_per_second": 12.129,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5248314738273621,
23
- "learning_rate": 1.550763701707098e-05,
24
- "loss": 0.3123,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.812258064516129,
30
- "eval_loss": 0.09787755459547043,
31
- "eval_runtime": 5.5145,
32
- "eval_samples_per_second": 562.15,
33
- "eval_steps_per_second": 11.787,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8709677419354839,
39
- "eval_loss": 0.06755243241786957,
40
- "eval_runtime": 5.3827,
41
- "eval_samples_per_second": 575.922,
42
- "eval_steps_per_second": 12.076,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.4460333585739136,
48
- "learning_rate": 1.101527403414196e-05,
49
- "loss": 0.1131,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
- "max_steps": 2226,
55
  "num_input_tokens_seen": 0,
56
- "num_train_epochs": 7,
57
  "save_steps": 500,
58
  "stateful_callbacks": {
59
  "TrainerControl": {
@@ -71,8 +71,8 @@
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
- "alpha": 0.5049198984839713,
75
- "num_train_epochs": 7,
76
- "temperature": 16
77
  }
78
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6083870967741936,
14
+ "eval_loss": 0.26111724972724915,
15
+ "eval_runtime": 5.3925,
16
+ "eval_samples_per_second": 574.869,
17
+ "eval_steps_per_second": 12.054,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.612690269947052,
23
+ "learning_rate": 1.371069182389937e-05,
24
+ "loss": 0.4067,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8261290322580646,
30
+ "eval_loss": 0.1251312643289566,
31
+ "eval_runtime": 6.2656,
32
+ "eval_samples_per_second": 494.764,
33
+ "eval_steps_per_second": 10.374,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8719354838709678,
39
+ "eval_loss": 0.08443494886159897,
40
+ "eval_runtime": 5.7046,
41
+ "eval_samples_per_second": 543.422,
42
+ "eval_steps_per_second": 11.394,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.5761224031448364,
48
+ "learning_rate": 7.421383647798742e-06,
49
+ "loss": 0.1453,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
+ "max_steps": 1590,
55
  "num_input_tokens_seen": 0,
56
+ "num_train_epochs": 5,
57
  "save_steps": 500,
58
  "stateful_callbacks": {
59
  "TrainerControl": {
 
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
+ "alpha": 0.41459555473064347,
75
+ "num_train_epochs": 5,
76
+ "temperature": 4
77
  }
78
  }
run-0/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b81f5de55f5e8a8af9f6b46510b501dfc71d3179122d526c4f056fedd070e010
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db7b0ba484d365a8232c614da6b35c1ca8564e014b51324208c27b61981f817d
3
  size 268290900
run-0/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb511dfed27a93f9022cf9613ca1369f6d0223cdc69fb471ea40d8da8227a380
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91a7ce88c184ba71db5dc4fa9fb784f1d4bdb6de5d88f8d28bb6d60f143ead33
3
  size 536643898
run-0/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11e744277c61f87520794334442fae36c5f9ff6e10cb79d4bfee5176ca7eafe2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e1264523e958cf7990dc5f42d876cc12129475c4603804cf66868aaf25c2c24
3
  size 1064
run-0/checkpoint-500/trainer_state.json CHANGED
@@ -10,25 +10,25 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5751612903225807,
14
- "eval_loss": 0.1956518292427063,
15
- "eval_runtime": 5.3591,
16
- "eval_samples_per_second": 578.453,
17
- "eval_steps_per_second": 12.129,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5248314738273621,
23
- "learning_rate": 1.550763701707098e-05,
24
- "loss": 0.3123,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
- "max_steps": 2226,
30
  "num_input_tokens_seen": 0,
31
- "num_train_epochs": 7,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
@@ -46,8 +46,8 @@
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
- "alpha": 0.5049198984839713,
50
- "num_train_epochs": 7,
51
- "temperature": 16
52
  }
53
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6083870967741936,
14
+ "eval_loss": 0.26111724972724915,
15
+ "eval_runtime": 5.3925,
16
+ "eval_samples_per_second": 574.869,
17
+ "eval_steps_per_second": 12.054,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.612690269947052,
23
+ "learning_rate": 1.371069182389937e-05,
24
+ "loss": 0.4067,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
+ "max_steps": 1590,
30
  "num_input_tokens_seen": 0,
31
+ "num_train_epochs": 5,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
 
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
+ "alpha": 0.41459555473064347,
50
+ "num_train_epochs": 5,
51
+ "temperature": 4
52
  }
53
  }
run-0/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:974ac49529ba17d5c7ed773c227ea8996054ebf15fdbbafbe2ab45abb5f14d2b
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dbf6901b1fd8b4b70b58bb5a6861520d7a2d5accf1885587865c5346ea0a506
3
  size 5304
runs/Nov12_03-50-03_fde6a76996dd/events.out.tfevents.1731384970.fde6a76996dd.16977.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d78fe2eea9fea9447c96f89abcf049db8759dc8f41e9add56d784e9792530b29
3
+ size 13848
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18278fc3ffb0f8a89a794464595c9b39421f549988787cb6690252ca15809610
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dbf6901b1fd8b4b70b58bb5a6861520d7a2d5accf1885587865c5346ea0a506
3
  size 5304