givyboy commited on
Commit
271a08b
1 Parent(s): 3b45618

End of training

Browse files
README.md CHANGED
@@ -6,6 +6,7 @@ tags:
6
  model-index:
7
  - name: mental-health-companion
8
  results: []
 
9
  ---
10
 
11
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -14,6 +15,8 @@ should probably proofread and complete it, then remove this comment. -->
14
  # mental-health-companion
15
 
16
  This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on an unknown dataset.
 
 
17
 
18
  ## Model description
19
 
@@ -29,27 +32,48 @@ More information needed
29
 
30
  ## Training procedure
31
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
- - learning_rate: 0.0002
36
  - train_batch_size: 2
37
- - eval_batch_size: 8
38
  - seed: 42
39
- - gradient_accumulation_steps: 32
40
- - total_train_batch_size: 64
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
- - lr_scheduler_type: cosine
43
- - lr_scheduler_warmup_ratio: 0.05
44
  - num_epochs: 1
 
45
 
46
  ### Training results
47
 
 
 
 
 
 
 
 
48
 
49
 
50
  ### Framework versions
51
 
52
- - Transformers 4.31.0
 
53
  - Pytorch 2.1.2
54
  - Datasets 2.16.1
55
- - Tokenizers 0.13.3
 
6
  model-index:
7
  - name: mental-health-companion
8
  results: []
9
+ library_name: peft
10
  ---
11
 
12
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
15
  # mental-health-companion
16
 
17
  This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 1.6625
20
 
21
  ## Model description
22
 
 
32
 
33
  ## Training procedure
34
 
35
+
36
+ The following `bitsandbytes` quantization config was used during training:
37
+ - quant_method: QuantizationMethod.BITS_AND_BYTES
38
+ - load_in_8bit: False
39
+ - load_in_4bit: True
40
+ - llm_int8_threshold: 6.0
41
+ - llm_int8_skip_modules: None
42
+ - llm_int8_enable_fp32_cpu_offload: False
43
+ - llm_int8_has_fp16_weight: False
44
+ - bnb_4bit_quant_type: nf4
45
+ - bnb_4bit_use_double_quant: True
46
+ - bnb_4bit_compute_dtype: float16
47
  ### Training hyperparameters
48
 
49
  The following hyperparameters were used during training:
50
+ - learning_rate: 5e-05
51
  - train_batch_size: 2
52
+ - eval_batch_size: 2
53
  - seed: 42
54
+ - gradient_accumulation_steps: 5
55
+ - total_train_batch_size: 10
56
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
57
+ - lr_scheduler_type: linear
58
+ - lr_scheduler_warmup_steps: 50
59
  - num_epochs: 1
60
+ - mixed_precision_training: Native AMP
61
 
62
  ### Training results
63
 
64
+ | Training Loss | Epoch | Step | Validation Loss |
65
+ |:-------------:|:-----:|:----:|:---------------:|
66
+ | 1.8698 | 0.17 | 100 | 1.8382 |
67
+ | 1.8349 | 0.35 | 200 | 1.7864 |
68
+ | 1.8077 | 0.52 | 300 | 1.7370 |
69
+ | 1.7457 | 0.7 | 400 | 1.6964 |
70
+ | 1.717 | 0.87 | 500 | 1.6625 |
71
 
72
 
73
  ### Framework versions
74
 
75
+ - PEFT 0.4.0
76
+ - Transformers 4.38.0.dev0
77
  - Pytorch 2.1.2
78
  - Datasets 2.16.1
79
+ - Tokenizers 0.15.0
adapter_config.json CHANGED
@@ -7,14 +7,17 @@
7
  "init_lora_weights": true,
8
  "layers_pattern": null,
9
  "layers_to_transform": null,
10
- "lora_alpha": 64,
11
  "lora_dropout": 0.05,
12
  "modules_to_save": null,
13
  "peft_type": "LORA",
14
- "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
- "Wqkv",
 
 
 
18
  "fc1",
19
  "fc2"
20
  ],
 
7
  "init_lora_weights": true,
8
  "layers_pattern": null,
9
  "layers_to_transform": null,
10
+ "lora_alpha": 32,
11
  "lora_dropout": 0.05,
12
  "modules_to_save": null,
13
  "peft_type": "LORA",
14
+ "r": 16,
15
  "revision": null,
16
  "target_modules": [
17
+ "q_proj",
18
+ "k_proj",
19
+ "v_proj",
20
+ "dense",
21
  "fc1",
22
  "fc2"
23
  ],
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eee2d2a24bfbc2c28468fc429eb4017175280365380d82bf8393489ccca32efd
3
+ size 94422368
special_tokens_map.json CHANGED
@@ -1,6 +1,24 @@
1
  {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
 
 
 
4
  "pad_token": "<|endoftext|>",
5
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
  "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
  }
tokenizer.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "version": "1.0",
3
  "truncation": {
4
- "direction": "Right",
5
  "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
 
1
  {
2
  "version": "1.0",
3
  "truncation": {
4
+ "direction": "Left",
5
  "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "50256": {
@@ -318,6 +319,7 @@
318
  "clean_up_tokenization_spaces": true,
319
  "eos_token": "<|endoftext|>",
320
  "model_max_length": 2048,
 
321
  "tokenizer_class": "CodeGenTokenizer",
322
  "unk_token": "<|endoftext|>"
323
  }
 
1
  {
2
+ "add_eos_token": true,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
  "50256": {
 
319
  "clean_up_tokenization_spaces": true,
320
  "eos_token": "<|endoftext|>",
321
  "model_max_length": 2048,
322
+ "pad_token": "<|endoftext|>",
323
  "tokenizer_class": "CodeGenTokenizer",
324
  "unk_token": "<|endoftext|>"
325
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:caba37b5fb684255c15d6029caa91a05293b3e1ca865d6eac70b1e5166fcfae7
3
- size 4408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67447bcf3f0b975bd643ce328706d370f11be3583ddcb44f841f1e29e3850472
3
+ size 4664