antonpolishko commited on
Commit
b9907cc
·
verified ·
1 Parent(s): c6d8dd1

Model save

Browse files
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/polyagent/huggingface/runs/bm5sjqkj)
31
 
32
  This model was trained with SFT.
33
 
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/polyagent/huggingface/runs/vkmqfqba)
31
 
32
  This model was trained with SFT.
33
 
all_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 3.0,
3
  "total_flos": 7.187967826039144e+18,
4
  "train_loss": 1.5352961912832626,
5
- "train_runtime": 6906.6629,
6
  "train_samples": 95663,
7
- "train_samples_per_second": 10.159,
8
- "train_steps_per_second": 0.159
9
  }
 
2
  "epoch": 3.0,
3
  "total_flos": 7.187967826039144e+18,
4
  "train_loss": 1.5352961912832626,
5
+ "train_runtime": 6782.5225,
6
  "train_samples": 95663,
7
+ "train_samples_per_second": 10.345,
8
+ "train_steps_per_second": 0.162
9
  }
runs/Dec26_15-04-59_mia1-gpu-110/events.out.tfevents.1735225539.mia1-gpu-110.3949310.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d292a3c8ae6549f27f080bec078e2d7d53a775f85fbc9777ab9f3de1211f723c
3
- size 53072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5fbca0ac8860670dcea47250e8f7e7140d862e1a0bfd09b12bca1a327504fc9
3
+ size 53426
train_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 3.0,
3
  "total_flos": 7.187967826039144e+18,
4
  "train_loss": 1.5352961912832626,
5
- "train_runtime": 6906.6629,
6
  "train_samples": 95663,
7
- "train_samples_per_second": 10.159,
8
- "train_steps_per_second": 0.159
9
  }
 
2
  "epoch": 3.0,
3
  "total_flos": 7.187967826039144e+18,
4
  "train_loss": 1.5352961912832626,
5
+ "train_runtime": 6782.5225,
6
  "train_samples": 95663,
7
+ "train_samples_per_second": 10.345,
8
+ "train_steps_per_second": 0.162
9
  }
trainer_state.json CHANGED
@@ -437,10 +437,10 @@
437
  },
438
  {
439
  "epoch": 0.819672131147541,
440
- "eval_loss": 1.5333800315856934,
441
- "eval_runtime": 19.9646,
442
- "eval_samples_per_second": 35.463,
443
- "eval_steps_per_second": 1.152,
444
  "step": 300
445
  },
446
  {
@@ -865,10 +865,10 @@
865
  },
866
  {
867
  "epoch": 1.639344262295082,
868
- "eval_loss": 1.5181070566177368,
869
- "eval_runtime": 19.9478,
870
- "eval_samples_per_second": 35.493,
871
- "eval_steps_per_second": 1.153,
872
  "step": 600
873
  },
874
  {
@@ -1293,10 +1293,10 @@
1293
  },
1294
  {
1295
  "epoch": 2.459016393442623,
1296
- "eval_loss": 1.5161317586898804,
1297
- "eval_runtime": 19.9284,
1298
- "eval_samples_per_second": 35.527,
1299
- "eval_steps_per_second": 1.154,
1300
  "step": 900
1301
  },
1302
  {
@@ -1577,9 +1577,9 @@
1577
  "step": 1098,
1578
  "total_flos": 7.187967826039144e+18,
1579
  "train_loss": 1.5352961912832626,
1580
- "train_runtime": 6906.6629,
1581
- "train_samples_per_second": 10.159,
1582
- "train_steps_per_second": 0.159
1583
  }
1584
  ],
1585
  "logging_steps": 5,
 
437
  },
438
  {
439
  "epoch": 0.819672131147541,
440
+ "eval_loss": 1.533558964729309,
441
+ "eval_runtime": 19.6321,
442
+ "eval_samples_per_second": 36.063,
443
+ "eval_steps_per_second": 1.172,
444
  "step": 300
445
  },
446
  {
 
865
  },
866
  {
867
  "epoch": 1.639344262295082,
868
+ "eval_loss": 1.5182926654815674,
869
+ "eval_runtime": 19.6502,
870
+ "eval_samples_per_second": 36.03,
871
+ "eval_steps_per_second": 1.17,
872
  "step": 600
873
  },
874
  {
 
1293
  },
1294
  {
1295
  "epoch": 2.459016393442623,
1296
+ "eval_loss": 1.5163270235061646,
1297
+ "eval_runtime": 19.6134,
1298
+ "eval_samples_per_second": 36.098,
1299
+ "eval_steps_per_second": 1.173,
1300
  "step": 900
1301
  },
1302
  {
 
1577
  "step": 1098,
1578
  "total_flos": 7.187967826039144e+18,
1579
  "train_loss": 1.5352961912832626,
1580
+ "train_runtime": 6782.5225,
1581
+ "train_samples_per_second": 10.345,
1582
+ "train_steps_per_second": 0.162
1583
  }
1584
  ],
1585
  "logging_steps": 5,