error577 commited on
Commit
c9a47ae
·
verified ·
1 Parent(s): 20cf361

End of training

Browse files
Files changed (2) hide show
  1. README.md +14 -14
  2. adapter_model.bin +1 -1
README.md CHANGED
@@ -42,19 +42,19 @@ deepspeed: null
42
  early_stopping_patience: null
43
  eval_max_new_tokens: 128
44
  eval_table_size: null
45
- evals_per_epoch: 2
46
  flash_attention: true
47
  fp16: null
48
  fsdp: null
49
  fsdp_config: null
50
- gradient_accumulation_steps: 32
51
  gradient_checkpointing: false
52
  group_by_length: false
53
  hub_model_id: error577/be3c53b4-2dbf-4a12-957b-9bf2e80845f8
54
  hub_repo: null
55
  hub_strategy: checkpoint
56
  hub_token: null
57
- learning_rate: 0.0001
58
  load_in_4bit: true
59
  load_in_8bit: false
60
  local_rank: null
@@ -77,7 +77,7 @@ pad_to_sequence_len: true
77
  resume_from_checkpoint: null
78
  s2_attention: null
79
  sample_packing: false
80
- saves_per_epoch: 4
81
  sequence_len: 512
82
  special_tokens:
83
  pad_token: </s>
@@ -86,7 +86,7 @@ tf32: false
86
  tokenizer_type: AutoTokenizer
87
  train_on_inputs: false
88
  trust_remote_code: true
89
- val_set_size: 0.05
90
  wandb_entity: null
91
  wandb_mode: online
92
  wandb_name: 838ffd28-d356-4c40-a584-abc51f2d4a95
@@ -105,7 +105,7 @@ xformers_attention: null
105
 
106
  This model is a fine-tuned version of [codellama/CodeLlama-7b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf) on the None dataset.
107
  It achieves the following results on the evaluation set:
108
- - Loss: 2.5603
109
 
110
  ## Model description
111
 
@@ -124,12 +124,12 @@ More information needed
124
  ### Training hyperparameters
125
 
126
  The following hyperparameters were used during training:
127
- - learning_rate: 0.0001
128
  - train_batch_size: 1
129
  - eval_batch_size: 1
130
  - seed: 42
131
- - gradient_accumulation_steps: 32
132
- - total_train_batch_size: 32
133
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
134
  - lr_scheduler_type: cosine
135
  - lr_scheduler_warmup_steps: 10
@@ -137,11 +137,11 @@ The following hyperparameters were used during training:
137
 
138
  ### Training results
139
 
140
- | Training Loss | Epoch | Step | Validation Loss |
141
- |:-------------:|:------:|:----:|:---------------:|
142
- | 2.9505 | 0.0084 | 1 | 3.0549 |
143
- | 2.6023 | 0.4190 | 50 | 2.5784 |
144
- | 2.4252 | 0.8379 | 100 | 2.5603 |
145
 
146
 
147
  ### Framework versions
 
42
  early_stopping_patience: null
43
  eval_max_new_tokens: 128
44
  eval_table_size: null
45
+ eval_steps: 50
46
  flash_attention: true
47
  fp16: null
48
  fsdp: null
49
  fsdp_config: null
50
+ gradient_accumulation_steps: 8
51
  gradient_checkpointing: false
52
  group_by_length: false
53
  hub_model_id: error577/be3c53b4-2dbf-4a12-957b-9bf2e80845f8
54
  hub_repo: null
55
  hub_strategy: checkpoint
56
  hub_token: null
57
+ learning_rate: 0.001
58
  load_in_4bit: true
59
  load_in_8bit: false
60
  local_rank: null
 
77
  resume_from_checkpoint: null
78
  s2_attention: null
79
  sample_packing: false
80
+ saves_per_epoch: 1
81
  sequence_len: 512
82
  special_tokens:
83
  pad_token: </s>
 
86
  tokenizer_type: AutoTokenizer
87
  train_on_inputs: false
88
  trust_remote_code: true
89
+ val_set_size: 0.005
90
  wandb_entity: null
91
  wandb_mode: online
92
  wandb_name: 838ffd28-d356-4c40-a584-abc51f2d4a95
 
105
 
106
  This model is a fine-tuned version of [codellama/CodeLlama-7b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf) on the None dataset.
107
  It achieves the following results on the evaluation set:
108
+ - Loss: 2.5279
109
 
110
  ## Model description
111
 
 
124
  ### Training hyperparameters
125
 
126
  The following hyperparameters were used during training:
127
+ - learning_rate: 0.001
128
  - train_batch_size: 1
129
  - eval_batch_size: 1
130
  - seed: 42
131
+ - gradient_accumulation_steps: 8
132
+ - total_train_batch_size: 8
133
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
134
  - lr_scheduler_type: cosine
135
  - lr_scheduler_warmup_steps: 10
 
137
 
138
  ### Training results
139
 
140
+ | Training Loss | Epoch | Step | Validation Loss |
141
+ |:-------------:|:-----:|:----:|:---------------:|
142
+ | 2.7769 | 0.002 | 1 | 3.0125 |
143
+ | 2.4637 | 0.1 | 50 | 2.5838 |
144
+ | 2.1737 | 0.2 | 100 | 2.5279 |
145
 
146
 
147
  ### Framework versions
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5904314606d4ef749802638e716fb731135b5cd3a3531a2adcc18e94e1f2a525
3
  size 80115210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58668547fc92a09603fd5d5334cd1ed152385fcec7e51fc3e1c7f2ace842917c
3
  size 80115210