pbaoo2705 commited on
Commit
8dd13a9
1 Parent(s): b4d5945

AdaLora applied

Browse files
README.md CHANGED
@@ -48,7 +48,7 @@ The following hyperparameters were used during training:
48
 
49
  ### Framework versions
50
 
51
- - Transformers 4.33.0
52
  - Pytorch 2.0.1+cu118
53
- - Datasets 2.14.4
54
  - Tokenizers 0.13.3
 
48
 
49
  ### Framework versions
50
 
51
+ - Transformers 4.33.1
52
  - Pytorch 2.0.1+cu118
53
+ - Datasets 2.14.5
54
  - Tokenizers 0.13.3
adapter_config.json CHANGED
@@ -1,17 +1,23 @@
1
  {
2
  "auto_mapping": null,
3
  "base_model_name_or_path": "ybelkada/falcon-7b-sharded-bf16",
 
 
4
  "bias": "none",
 
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
7
  "init_lora_weights": true,
 
8
  "layers_pattern": null,
9
  "layers_to_transform": null,
10
- "lora_alpha": 16,
11
  "lora_dropout": 0.1,
12
  "modules_to_save": null,
13
- "peft_type": "LORA",
14
- "r": 64,
 
 
15
  "revision": null,
16
  "target_modules": [
17
  "query_key_value",
@@ -19,5 +25,9 @@
19
  "dense_h_to_4h",
20
  "dense_4h_to_h"
21
  ],
22
- "task_type": "CAUSAL_LM"
 
 
 
 
23
  }
 
1
  {
2
  "auto_mapping": null,
3
  "base_model_name_or_path": "ybelkada/falcon-7b-sharded-bf16",
4
+ "beta1": 0.85,
5
+ "beta2": 0.85,
6
  "bias": "none",
7
+ "deltaT": 10,
8
  "fan_in_fan_out": false,
9
  "inference_mode": true,
10
  "init_lora_weights": true,
11
+ "init_r": 12,
12
  "layers_pattern": null,
13
  "layers_to_transform": null,
14
+ "lora_alpha": 32,
15
  "lora_dropout": 0.1,
16
  "modules_to_save": null,
17
+ "orth_reg_weight": 0.5,
18
+ "peft_type": "ADALORA",
19
+ "r": 8,
20
+ "rank_pattern": null,
21
  "revision": null,
22
  "target_modules": [
23
  "query_key_value",
 
25
  "dense_h_to_4h",
26
  "dense_4h_to_h"
27
  ],
28
+ "target_r": 8,
29
+ "task_type": "CAUSAL_LM",
30
+ "tfinal": 1000,
31
+ "tinit": 200,
32
+ "total_step": null
33
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cecf2b811a1274593cea4ae150a33c2c605679fd9676f7ebcef176558c527cdb
3
- size 522284877
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c792a581359dc16d6699c36cb99520e650463ae363abf2ee5b58f8e35f22728b
3
+ size 98048461
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 2048,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
tokenizer_config.json CHANGED
@@ -2,6 +2,10 @@
2
  "add_prefix_space": false,
3
  "clean_up_tokenization_spaces": true,
4
  "eos_token": "<|endoftext|>",
 
5
  "model_max_length": 2048,
6
- "tokenizer_class": "PreTrainedTokenizerFast"
 
 
 
7
  }
 
2
  "add_prefix_space": false,
3
  "clean_up_tokenization_spaces": true,
4
  "eos_token": "<|endoftext|>",
5
+ "max_length": 512,
6
  "model_max_length": 2048,
7
+ "stride": 0,
8
+ "tokenizer_class": "PreTrainedTokenizerFast",
9
+ "truncation_side": "right",
10
+ "truncation_strategy": "longest_first"
11
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85e466f3ba9005647973c0664cd227f988b211d6fdab266efa245a21338bd9af
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b95e79169feae07ebaa19157ef4f9dcb71d781de175315415776e3b1e7aaf5e7
3
  size 4027