mamung commited on
Commit
f4f6162
·
verified ·
1 Parent(s): 15b825d

Training in progress, step 24, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83f95ac312a53b2b1e37a2a1a0e6bc5cddbc7bc08eca720057376c302fd1a8e9
3
  size 97728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1308ab5b004737cda37c6f011533f981173cee903cd2ccf6c1b76fed4bc8922e
3
  size 97728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0c8161e8e5681b86049a052bac0f569052ade09319433589f7aa50cceefee2f
3
  size 212298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f7d32e659796260eb80ee744578b61feb131752c58c91330bd88c49794e6421
3
  size 212298
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2eee07d3fc4d8a0abcc37fc9f663b2748ef62d5b07c21e50a8324927531b82b1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9da8f5d493f0f752036f2ae2fae7ea8749d37cf1119ef3a2c45408ef763d8466
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:605d23f463d574da26fdafa2a2fc396d0b00160053ea75ca175c5c8e6f2990e2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4293e50fb3c9f4c5f80a712103de6c5deaef7eaac0675ada5b61f2e11e3e6a04
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5601750547045952,
5
  "eval_steps": 8,
6
- "global_step": 16,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -66,6 +66,35 @@
66
  "eval_samples_per_second": 134.683,
67
  "eval_steps_per_second": 68.036,
68
  "step": 16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  }
70
  ],
71
  "logging_steps": 3,
@@ -85,7 +114,7 @@
85
  "attributes": {}
86
  }
87
  },
88
- "total_flos": 6856254554112.0,
89
  "train_batch_size": 2,
90
  "trial_name": null,
91
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8402625820568927,
5
  "eval_steps": 8,
6
+ "global_step": 24,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
66
  "eval_samples_per_second": 134.683,
67
  "eval_steps_per_second": 68.036,
68
  "step": 16
69
+ },
70
+ {
71
+ "epoch": 0.6301969365426696,
72
+ "grad_norm": 0.12901803851127625,
73
+ "learning_rate": 0.00018,
74
+ "loss": 10.3699,
75
+ "step": 18
76
+ },
77
+ {
78
+ "epoch": 0.7352297592997812,
79
+ "grad_norm": 0.13586793839931488,
80
+ "learning_rate": 0.0001998867339183008,
81
+ "loss": 10.366,
82
+ "step": 21
83
+ },
84
+ {
85
+ "epoch": 0.8402625820568927,
86
+ "grad_norm": 0.18823161721229553,
87
+ "learning_rate": 0.00019819286972627066,
88
+ "loss": 10.3635,
89
+ "step": 24
90
+ },
91
+ {
92
+ "epoch": 0.8402625820568927,
93
+ "eval_loss": 10.358380317687988,
94
+ "eval_runtime": 0.7207,
95
+ "eval_samples_per_second": 134.583,
96
+ "eval_steps_per_second": 67.985,
97
+ "step": 24
98
  }
99
  ],
100
  "logging_steps": 3,
 
114
  "attributes": {}
115
  }
116
  },
117
+ "total_flos": 10291064340480.0,
118
  "train_batch_size": 2,
119
  "trial_name": null,
120
  "trial_params": null