Safetensors
English
olmoe
Mixture of Experts
olmo
Muennighoff commited on
Commit
94619a7
1 Parent(s): f5c551e

Move over ckpt from no-load-balancing

Browse files
config.json CHANGED
@@ -22,7 +22,7 @@
22
  "pad_token_id": 1,
23
  "rope_scaling": null,
24
  "rope_theta": 10000.0,
25
- "router_aux_loss_coef": 0.001,
26
  "tie_word_embeddings": false,
27
  "torch_dtype": "bfloat16",
28
  "transformers_version": "4.44.0.dev0",
 
22
  "pad_token_id": 1,
23
  "rope_scaling": null,
24
  "rope_theta": 10000.0,
25
+ "router_aux_loss_coef": 0.01,
26
  "tie_word_embeddings": false,
27
  "torch_dtype": "bfloat16",
28
  "transformers_version": "4.44.0.dev0",
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edb91fc3570bc49c613773ec760eca6a17c7e0b8caba371882b820b66b1df49b
3
  size 4997744872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:660f006150a625e0b2f6df2bb1344a1d33ba2185fe92dccfffbf3f7494856c6b
3
  size 4997744872
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c67b73b712ea70ff68799c2ab20c1282c8abb829cfb34d97645c30e729fdbafe
3
  size 4997235176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5688cc530e68648e30a86f33a0a682c31e764e54b4feca21e866b9298a50e390
3
  size 4997235176
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd30f88d43bfc1e5d42b042543d552d98a1125bfbd0064be0c50cef097607da2
3
  size 3843741912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed7c22a0294829f52e8a6a66571c3acaeb1cc53b0c890075622b21f43ceebe59
3
  size 3843741912