clean up
Browse files- README.md +8 -3
- checkpoint-1152/config.json +0 -26
- checkpoint-1152/generation_config.json +0 -6
- checkpoint-1152/optimizer.pt +0 -3
- checkpoint-1152/pytorch_model.bin +0 -3
- checkpoint-1152/rng_state_0.pth +0 -3
- checkpoint-1152/rng_state_1.pth +0 -3
- checkpoint-1152/rng_state_2.pth +0 -3
- checkpoint-1152/rng_state_3.pth +0 -3
- checkpoint-1152/scheduler.pt +0 -3
- checkpoint-1152/special_tokens_map.json +0 -5
- checkpoint-1152/tokenizer.json +0 -0
- checkpoint-1152/tokenizer_config.json +0 -10
- checkpoint-1152/trainer_state.json +0 -0
- checkpoint-1152/training_args.bin +0 -3
- checkpoint-1280/config.json +0 -26
- checkpoint-1280/generation_config.json +0 -6
- checkpoint-1280/optimizer.pt +0 -3
- checkpoint-1280/pytorch_model.bin +0 -3
- checkpoint-1280/rng_state_0.pth +0 -3
- checkpoint-1280/rng_state_1.pth +0 -3
- checkpoint-1280/rng_state_2.pth +0 -3
- checkpoint-1280/rng_state_3.pth +0 -3
- checkpoint-1280/scheduler.pt +0 -3
- checkpoint-1280/special_tokens_map.json +0 -5
- checkpoint-1280/tokenizer.json +0 -0
- checkpoint-1280/tokenizer_config.json +0 -10
- checkpoint-1280/trainer_state.json +0 -0
- checkpoint-1280/training_args.bin +0 -3
- generation_config.json +0 -6
- model.safetensors +0 -3
- trainer_state.json +0 -0
- training_args.bin +0 -3
README.md
CHANGED
@@ -1,15 +1,20 @@
|
|
1 |
---
|
2 |
datasets:
|
3 |
-
- ewof/koishi-instruct-metharme
|
4 |
---
|
5 |
-
|
|
|
|
|
6 |
## Base Model
|
|
|
7 |
native fine tune of togethercomputer/RedPajama-INCITE-Base-3B-v1
|
8 |
|
9 |
## Prompting
|
|
|
10 |
The current model version has been trained on prompts using three different roles, which are denoted by the following tokens: `<|system|>`, `<|user|>` and `<|model|>`.
|
11 |
|
12 |
The `<|system|>` prompt can be used to inject out-of-channel information behind the scenes, while the `<|user|>` prompt should be used to indicate user input. The `<|model|>` token should then be used to indicate that the model should generate a response. These tokens can happen multiple times and be chained up to form a conversation history.
|
13 |
|
14 |
## Benchmarks
|
15 |
-
|
|
|
|
1 |
---
|
2 |
datasets:
|
3 |
+
- ewof/koishi-instruct-metharme
|
4 |
---
|
5 |
+
|
6 |
+
trained on commit 33b25a5 of the linked dataset (lamini)
|
7 |
+
|
8 |
## Base Model
|
9 |
+
|
10 |
native fine tune of togethercomputer/RedPajama-INCITE-Base-3B-v1
|
11 |
|
12 |
## Prompting
|
13 |
+
|
14 |
The current model version has been trained on prompts using three different roles, which are denoted by the following tokens: `<|system|>`, `<|user|>` and `<|model|>`.
|
15 |
|
16 |
The `<|system|>` prompt can be used to inject out-of-channel information behind the scenes, while the `<|user|>` prompt should be used to indicate user input. The `<|model|>` token should then be used to indicate that the model should generate a response. These tokens can happen multiple times and be chained up to form a conversation history.
|
17 |
|
18 |
## Benchmarks
|
19 |
+
|
20 |
+
![koishi_instruct_3b_v2_benchmarks.png](https://media.discordapp.net/attachments/1108805965421228052/1110025834699427840/tDcBmTvnLLSTdOUEKyHZMfra.png)
|
checkpoint-1152/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_name_or_path": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
|
3 |
-
"architectures": [
|
4 |
-
"GPTNeoXForCausalLM"
|
5 |
-
],
|
6 |
-
"bos_token_id": 0,
|
7 |
-
"classifier_dropout": 0.1,
|
8 |
-
"eos_token_id": 0,
|
9 |
-
"hidden_act": "gelu",
|
10 |
-
"hidden_size": 2560,
|
11 |
-
"initializer_range": 0.02,
|
12 |
-
"intermediate_size": 10240,
|
13 |
-
"layer_norm_eps": 1e-05,
|
14 |
-
"max_position_embeddings": 2048,
|
15 |
-
"model_type": "gpt_neox",
|
16 |
-
"num_attention_heads": 32,
|
17 |
-
"num_hidden_layers": 32,
|
18 |
-
"rotary_emb_base": 10000,
|
19 |
-
"rotary_pct": 1.0,
|
20 |
-
"tie_word_embeddings": false,
|
21 |
-
"torch_dtype": "bfloat16",
|
22 |
-
"transformers_version": "4.29.2",
|
23 |
-
"use_cache": true,
|
24 |
-
"use_parallel_residual": false,
|
25 |
-
"vocab_size": 50432
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
checkpoint-1152/generation_config.json
DELETED
@@ -1,6 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_from_model_config": true,
|
3 |
-
"bos_token_id": 0,
|
4 |
-
"eos_token_id": 0,
|
5 |
-
"transformers_version": "4.29.2"
|
6 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
checkpoint-1152/optimizer.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9b25b8f0a1201a309f55820005ee38578885318a62130ec1b97b989e7abdadea
|
3 |
-
size 11103802425
|
|
|
|
|
|
|
|
checkpoint-1152/pytorch_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:3f475276b7157f00d019aaf75109e397bd986eb303968718156498b9a505ac9c
|
3 |
-
size 5686115609
|
|
|
|
|
|
|
|
checkpoint-1152/rng_state_0.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:4aa38064359163a436b682a76922629be0d47715cc93798f53c30584786df380
|
3 |
-
size 17655
|
|
|
|
|
|
|
|
checkpoint-1152/rng_state_1.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:386882d88441fb270dac4c0a02c6b12d9125a96b7fa06d4456448d9d1b9da975
|
3 |
-
size 17655
|
|
|
|
|
|
|
|
checkpoint-1152/rng_state_2.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:3c2119f0f2530fd325c172a94b131c42f068b4b61340e0b39e5188ebdebae961
|
3 |
-
size 17655
|
|
|
|
|
|
|
|
checkpoint-1152/rng_state_3.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:b265d61c7eae8db516afc3f2f413284853452f9b41a53ad49381bf25c87ed7a9
|
3 |
-
size 17655
|
|
|
|
|
|
|
|
checkpoint-1152/scheduler.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:4e37413fae3fcd5fe95558c91c09a103a88f23895d3432b1ae34f6a82c6f7cbb
|
3 |
-
size 627
|
|
|
|
|
|
|
|
checkpoint-1152/special_tokens_map.json
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bos_token": "<|endoftext|>",
|
3 |
-
"eos_token": "<|endoftext|>",
|
4 |
-
"unk_token": "<|endoftext|>"
|
5 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
checkpoint-1152/tokenizer.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1152/tokenizer_config.json
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"add_prefix_space": false,
|
3 |
-
"bos_token": "<|endoftext|>",
|
4 |
-
"clean_up_tokenization_spaces": true,
|
5 |
-
"eos_token": "<|endoftext|>",
|
6 |
-
"model_max_length": 2048,
|
7 |
-
"padding_side": "right",
|
8 |
-
"tokenizer_class": "GPTNeoXTokenizer",
|
9 |
-
"unk_token": "<|endoftext|>"
|
10 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
checkpoint-1152/trainer_state.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1152/training_args.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8be3425ea525f1956ea7f5605ed90c71e428f1252e73ed5c2bc17a50d1e3c2e7
|
3 |
-
size 3963
|
|
|
|
|
|
|
|
checkpoint-1280/config.json
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_name_or_path": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
|
3 |
-
"architectures": [
|
4 |
-
"GPTNeoXForCausalLM"
|
5 |
-
],
|
6 |
-
"bos_token_id": 0,
|
7 |
-
"classifier_dropout": 0.1,
|
8 |
-
"eos_token_id": 0,
|
9 |
-
"hidden_act": "gelu",
|
10 |
-
"hidden_size": 2560,
|
11 |
-
"initializer_range": 0.02,
|
12 |
-
"intermediate_size": 10240,
|
13 |
-
"layer_norm_eps": 1e-05,
|
14 |
-
"max_position_embeddings": 2048,
|
15 |
-
"model_type": "gpt_neox",
|
16 |
-
"num_attention_heads": 32,
|
17 |
-
"num_hidden_layers": 32,
|
18 |
-
"rotary_emb_base": 10000,
|
19 |
-
"rotary_pct": 1.0,
|
20 |
-
"tie_word_embeddings": false,
|
21 |
-
"torch_dtype": "bfloat16",
|
22 |
-
"transformers_version": "4.29.2",
|
23 |
-
"use_cache": true,
|
24 |
-
"use_parallel_residual": false,
|
25 |
-
"vocab_size": 50432
|
26 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
checkpoint-1280/generation_config.json
DELETED
@@ -1,6 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_from_model_config": true,
|
3 |
-
"bos_token_id": 0,
|
4 |
-
"eos_token_id": 0,
|
5 |
-
"transformers_version": "4.29.2"
|
6 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
checkpoint-1280/optimizer.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:fc188693373b13578ce9d267ef6764c0c25828da06e2718bc8232f1cb2b3258c
|
3 |
-
size 11103802425
|
|
|
|
|
|
|
|
checkpoint-1280/pytorch_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:c574ec6e80f7fa816231824d9516314a3e767ff2c16553b90f63c05b4525ff1f
|
3 |
-
size 5686115609
|
|
|
|
|
|
|
|
checkpoint-1280/rng_state_0.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:0224d29557651897de6e344d78bbb9b785843ad78a49f005c99eacfe7e07dea7
|
3 |
-
size 17655
|
|
|
|
|
|
|
|
checkpoint-1280/rng_state_1.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:15ed5ccba70f86cbd59f7bfb2752c652c43123154fc2f2e2615a588c8a797fce
|
3 |
-
size 17655
|
|
|
|
|
|
|
|
checkpoint-1280/rng_state_2.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:4e25f55f214706ee17b931bb7b10e6773ce251919d45f1744d962e3fc19fc7e8
|
3 |
-
size 17655
|
|
|
|
|
|
|
|
checkpoint-1280/rng_state_3.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:7f73f25b15884c097e03156b078a6a0d9264fdc7ac3e6463c53858c1241a5820
|
3 |
-
size 17655
|
|
|
|
|
|
|
|
checkpoint-1280/scheduler.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:edc59fa361c757fcde2a46aea7281b6c186e025947718063c5049653c82086f7
|
3 |
-
size 627
|
|
|
|
|
|
|
|
checkpoint-1280/special_tokens_map.json
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bos_token": "<|endoftext|>",
|
3 |
-
"eos_token": "<|endoftext|>",
|
4 |
-
"unk_token": "<|endoftext|>"
|
5 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
checkpoint-1280/tokenizer.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1280/tokenizer_config.json
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"add_prefix_space": false,
|
3 |
-
"bos_token": "<|endoftext|>",
|
4 |
-
"clean_up_tokenization_spaces": true,
|
5 |
-
"eos_token": "<|endoftext|>",
|
6 |
-
"model_max_length": 2048,
|
7 |
-
"padding_side": "right",
|
8 |
-
"tokenizer_class": "GPTNeoXTokenizer",
|
9 |
-
"unk_token": "<|endoftext|>"
|
10 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
checkpoint-1280/trainer_state.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1280/training_args.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8be3425ea525f1956ea7f5605ed90c71e428f1252e73ed5c2bc17a50d1e3c2e7
|
3 |
-
size 3963
|
|
|
|
|
|
|
|
generation_config.json
DELETED
@@ -1,6 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_from_model_config": true,
|
3 |
-
"bos_token_id": 0,
|
4 |
-
"eos_token_id": 0,
|
5 |
-
"transformers_version": "4.29.2"
|
6 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:9501d11a8002a39d54b448386e3a64cdae1290a309e2f5b4c7bb0b12fde1a6e1
|
3 |
-
size 5686008384
|
|
|
|
|
|
|
|
trainer_state.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8be3425ea525f1956ea7f5605ed90c71e428f1252e73ed5c2bc17a50d1e3c2e7
|
3 |
-
size 3963
|
|
|
|
|
|
|
|