Wauplin HF staff commited on
Commit
68d6937
1 Parent(s): 3a5aea0

Push model using huggingface_hub.

Browse files
Files changed (3) hide show
  1. README.md +13 -3
  2. config.json +31 -14
  3. model.safetensors +3 -0
README.md CHANGED
@@ -1,3 +1,13 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: mamba_ssm
3
+ tags:
4
+ - arXiv:2312.00752
5
+ - arXiv:2405.21060
6
+ - mamba2
7
+ - model_hub_mixin
8
+ - pytorch_model_hub_mixin
9
+ ---
10
+
11
+ This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
12
+ - Library: https://github.com/state-spaces/mamba
13
+ - Docs: [More Information Needed]
config.json CHANGED
@@ -1,16 +1,33 @@
1
  {
2
- "d_model": 768,
3
- "d_intermediate": 0,
4
- "n_layer": 24,
5
- "vocab_size": 50277,
6
- "ssm_cfg": {
7
- "layer": "Mamba2"
8
- },
9
- "attn_layer_idx": [],
10
- "attn_cfg": {},
11
- "rms_norm": true,
12
- "residual_in_fp32": true,
13
- "fused_add_norm": true,
14
- "pad_vocab_size_multiple": 16,
15
- "tie_embeddings": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  }
 
1
  {
2
+ "A_init_range": [
3
+ 1,
4
+ 16
5
+ ],
6
+ "D_has_hdim": false,
7
+ "bias": false,
8
+ "chunk_size": 256,
9
+ "conv_bias": true,
10
+ "conv_init": null,
11
+ "d_conv": 4,
12
+ "d_model": 768,
13
+ "d_ssm": null,
14
+ "d_state": 128,
15
+ "device": null,
16
+ "dt_init_floor": 0.0001,
17
+ "dt_limit": [
18
+ 0.0,
19
+ Infinity
20
+ ],
21
+ "dt_max": 0.1,
22
+ "dt_min": 0.001,
23
+ "dtype": null,
24
+ "expand": 2,
25
+ "headdim": 64,
26
+ "layer_idx": null,
27
+ "ngroups": 1,
28
+ "norm_before_gate": false,
29
+ "process_group": null,
30
+ "rmsnorm": true,
31
+ "sequence_parallel": true,
32
+ "use_mem_eff_path": true
33
  }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90dd1091ae7b6acb34537e8b103a0f937becb92e0eb69256ff1e0f6430058d0d
3
+ size 15058808