0Tick commited on
Commit
386ce94
1 Parent(s): 7fe18e4

Upload version with tags in posts shuffled before training

Browse files

This model was trained on the same dataset but the tags from each post were shuffled before used for training

config.json CHANGED
@@ -40,7 +40,7 @@
40
  }
41
  },
42
  "torch_dtype": "float32",
43
- "transformers_version": "4.27.0.dev0",
44
  "use_cache": true,
45
  "vocab_size": 50257
46
  }
 
40
  }
41
  },
42
  "torch_dtype": "float32",
43
+ "transformers_version": "4.31.0.dev0",
44
  "use_cache": true,
45
  "vocab_size": 50257
46
  }
merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
 
1
+ #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6353ddc64072be10bc35861e3cc73e639debd29ddfd763ce379bd894004e84de
3
- size 333970169
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99e39f00ff01123155098e7ec9fdc36432ba403c5d4182a59f2affb76d05a828
3
+ size 327674773
runs/Jun08_11-12-20_671aa6aed209/events.out.tfevents.1686222803.671aa6aed209.1153.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a2d97deabcf9a45ed754f0fcd585686c3b89b48c616d33e9205d00d3dfe9284
3
+ size 6931
runs/Jun08_11-12-20_671aa6aed209/events.out.tfevents.1686232154.671aa6aed209.1153.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1a15fd04acb34741f690785349d2fdaa82f9627a84c18ff520b73509541313a
3
+ size 411
tokenizer.json CHANGED
@@ -39,6 +39,7 @@
39
  "continuing_subword_prefix": "",
40
  "end_of_word_suffix": "",
41
  "fuse_unk": false,
 
42
  "vocab": {
43
  "!": 0,
44
  "\"": 1,
 
39
  "continuing_subword_prefix": "",
40
  "end_of_word_suffix": "",
41
  "fuse_unk": false,
42
+ "byte_fallback": false,
43
  "vocab": {
44
  "!": 0,
45
  "\"": 1,
tokenizer_config.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "add_prefix_space": false,
3
  "bos_token": "<|endoftext|>",
 
4
  "eos_token": "<|endoftext|>",
5
  "model_max_length": 1024,
6
- "special_tokens_map_file": null,
7
  "tokenizer_class": "GPT2Tokenizer",
8
  "unk_token": "<|endoftext|>"
9
  }
 
1
  {
2
  "add_prefix_space": false,
3
  "bos_token": "<|endoftext|>",
4
+ "clean_up_tokenization_spaces": true,
5
  "eos_token": "<|endoftext|>",
6
  "model_max_length": 1024,
 
7
  "tokenizer_class": "GPT2Tokenizer",
8
  "unk_token": "<|endoftext|>"
9
  }