Minbyul commited on
Commit
045c29f
·
verified ·
1 Parent(s): 3325974

Model save

Browse files
README.md CHANGED
@@ -2,15 +2,12 @@
2
  license: apache-2.0
3
  base_model: BioMistral/BioMistral-7B
4
  tags:
5
- - alignment-handbook
6
- - trl
7
- - sft
8
- - generated_from_trainer
9
  - trl
10
  - sft
 
11
  - generated_from_trainer
12
  datasets:
13
- - HuggingFaceH4/deita-10k-v0-sft
14
  model-index:
15
  - name: biomistral-7b-1e-6-wo-kqa_silver_wogold-iter-sft-step1_lr
16
  results: []
@@ -21,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  # biomistral-7b-1e-6-wo-kqa_silver_wogold-iter-sft-step1_lr
23
 
24
- This model is a fine-tuned version of [BioMistral/BioMistral-7B](https://huggingface.co/BioMistral/BioMistral-7B) on the HuggingFaceH4/deita-10k-v0-sft dataset.
25
  It achieves the following results on the evaluation set:
26
  - Loss: 1.8593
27
 
@@ -61,7 +58,7 @@ The following hyperparameters were used during training:
61
  | Training Loss | Epoch | Step | Validation Loss |
62
  |:-------------:|:-----:|:----:|:---------------:|
63
  | 1.7014 | 0.96 | 6 | 1.9007 |
64
- | 1.6285 | 1.92 | 12 | 1.8676 |
65
  | 1.5573 | 2.88 | 18 | 1.8593 |
66
 
67
 
 
2
  license: apache-2.0
3
  base_model: BioMistral/BioMistral-7B
4
  tags:
 
 
 
 
5
  - trl
6
  - sft
7
+ - alignment-handbook
8
  - generated_from_trainer
9
  datasets:
10
+ - generator
11
  model-index:
12
  - name: biomistral-7b-1e-6-wo-kqa_silver_wogold-iter-sft-step1_lr
13
  results: []
 
18
 
19
  # biomistral-7b-1e-6-wo-kqa_silver_wogold-iter-sft-step1_lr
20
 
21
+ This model is a fine-tuned version of [BioMistral/BioMistral-7B](https://huggingface.co/BioMistral/BioMistral-7B) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
  - Loss: 1.8593
24
 
 
58
  | Training Loss | Epoch | Step | Validation Loss |
59
  |:-------------:|:-----:|:----:|:---------------:|
60
  | 1.7014 | 0.96 | 6 | 1.9007 |
61
+ | 1.6286 | 1.92 | 12 | 1.8676 |
62
  | 1.5573 | 2.88 | 18 | 1.8593 |
63
 
64
 
all_results.json CHANGED
@@ -5,9 +5,9 @@
5
  "eval_samples": 904,
6
  "eval_samples_per_second": 11.397,
7
  "eval_steps_per_second": 0.76,
8
- "train_loss": 1.6118707987997267,
9
- "train_runtime": 322.7432,
10
  "train_samples": 4047,
11
- "train_samples_per_second": 3.625,
12
  "train_steps_per_second": 0.056
13
  }
 
5
  "eval_samples": 904,
6
  "eval_samples_per_second": 11.397,
7
  "eval_steps_per_second": 0.76,
8
+ "train_loss": 1.6118529703882005,
9
+ "train_runtime": 323.9524,
10
  "train_samples": 4047,
11
+ "train_samples_per_second": 3.612,
12
  "train_steps_per_second": 0.056
13
  }
config.json CHANGED
@@ -21,6 +21,6 @@
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.39.0.dev0",
24
- "use_cache": true,
25
  "vocab_size": 32000
26
  }
 
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "bfloat16",
23
  "transformers_version": "4.39.0.dev0",
24
+ "use_cache": false,
25
  "vocab_size": 32000
26
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31f127cd02a3bf51168e566309eedc16bd6f8a26d8411875db56b442610c3124
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8297138653b893c49719f137e7ab7ab967c7f84a67590b8926db3065d3ed8c97
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99f6030f49ed6dc82696a145e6f31c62037ba7ad1920a569c8fa2fd0d3988087
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1a7ef2aac167b812aed6e137d98a0bb018c58745a0bcce324c05385232dc108
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3074bb494885b3e4657c12c64650763105c56e73c183931df7bb9cde8f93419
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f85a7c4cd93f2e154c58c6533469d614beb02201e1425004b4bb84d1eb59a23
3
  size 4540516344
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.88,
3
- "train_loss": 1.6118707987997267,
4
- "train_runtime": 322.7432,
5
  "train_samples": 4047,
6
- "train_samples_per_second": 3.625,
7
  "train_steps_per_second": 0.056
8
  }
 
1
  {
2
  "epoch": 2.88,
3
+ "train_loss": 1.6118529703882005,
4
+ "train_runtime": 323.9524,
5
  "train_samples": 4047,
6
+ "train_samples_per_second": 3.612,
7
  "train_steps_per_second": 0.056
8
  }
trainer_state.json CHANGED
@@ -10,63 +10,63 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.16,
13
- "grad_norm": 10.593756006766313,
14
  "learning_rate": 5e-07,
15
  "loss": 1.7036,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.8,
20
- "grad_norm": 7.23312775238484,
21
  "learning_rate": 9.157348061512726e-07,
22
  "loss": 1.7014,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.96,
27
- "eval_loss": 1.9007213115692139,
28
- "eval_runtime": 5.4737,
29
- "eval_samples_per_second": 10.961,
30
- "eval_steps_per_second": 0.731,
31
  "step": 6
32
  },
33
  {
34
  "epoch": 1.6,
35
- "grad_norm": 5.80431534123651,
36
  "learning_rate": 5e-07,
37
- "loss": 1.6285,
38
  "step": 10
39
  },
40
  {
41
  "epoch": 1.92,
42
- "eval_loss": 1.8675754070281982,
43
- "eval_runtime": 5.3012,
44
- "eval_samples_per_second": 11.318,
45
- "eval_steps_per_second": 0.755,
46
  "step": 12
47
  },
48
  {
49
  "epoch": 2.4,
50
- "grad_norm": 4.830213221606365,
51
  "learning_rate": 8.426519384872732e-08,
52
  "loss": 1.5573,
53
  "step": 15
54
  },
55
  {
56
  "epoch": 2.88,
57
- "eval_loss": 1.859323263168335,
58
- "eval_runtime": 5.2878,
59
- "eval_samples_per_second": 11.347,
60
- "eval_steps_per_second": 0.756,
61
  "step": 18
62
  },
63
  {
64
  "epoch": 2.88,
65
  "step": 18,
66
  "total_flos": 3716488888320.0,
67
- "train_loss": 1.6118707987997267,
68
- "train_runtime": 322.7432,
69
- "train_samples_per_second": 3.625,
70
  "train_steps_per_second": 0.056
71
  }
72
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.16,
13
+ "grad_norm": 10.590332717346035,
14
  "learning_rate": 5e-07,
15
  "loss": 1.7036,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.8,
20
+ "grad_norm": 7.214343866416259,
21
  "learning_rate": 9.157348061512726e-07,
22
  "loss": 1.7014,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.96,
27
+ "eval_loss": 1.9007039070129395,
28
+ "eval_runtime": 5.4927,
29
+ "eval_samples_per_second": 10.924,
30
+ "eval_steps_per_second": 0.728,
31
  "step": 6
32
  },
33
  {
34
  "epoch": 1.6,
35
+ "grad_norm": 5.81180350509424,
36
  "learning_rate": 5e-07,
37
+ "loss": 1.6286,
38
  "step": 10
39
  },
40
  {
41
  "epoch": 1.92,
42
+ "eval_loss": 1.8676100969314575,
43
+ "eval_runtime": 5.3411,
44
+ "eval_samples_per_second": 11.234,
45
+ "eval_steps_per_second": 0.749,
46
  "step": 12
47
  },
48
  {
49
  "epoch": 2.4,
50
+ "grad_norm": 4.832124501533971,
51
  "learning_rate": 8.426519384872732e-08,
52
  "loss": 1.5573,
53
  "step": 15
54
  },
55
  {
56
  "epoch": 2.88,
57
+ "eval_loss": 1.859337568283081,
58
+ "eval_runtime": 5.2859,
59
+ "eval_samples_per_second": 11.351,
60
+ "eval_steps_per_second": 0.757,
61
  "step": 18
62
  },
63
  {
64
  "epoch": 2.88,
65
  "step": 18,
66
  "total_flos": 3716488888320.0,
67
+ "train_loss": 1.6118529703882005,
68
+ "train_runtime": 323.9524,
69
+ "train_samples_per_second": 3.612,
70
  "train_steps_per_second": 0.056
71
  }
72
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fef48fe19ee49be96461e1e68e3e6c8577ba3fa2893dcbbc1ab65e5bfc0ef3a5
3
  size 6328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3198a07998eb6e49380b77fcbaae1efc9d388db270fe16f6b81061e68888540b
3
  size 6328