Push model using huggingface_hub.

Files changed (3) hide show

README.md CHANGED Viewed

@@ -1,3 +1,13 @@
----
-license: apache-2.0
----

+---
+library_name: mamba_ssm
+tags:
+- arXiv:2312.00752
+- arXiv:2405.21060
+- mamba2
+- model_hub_mixin
+- pytorch_model_hub_mixin
+---
+This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
+- Library: https://github.com/state-spaces/mamba
+- Docs: [More Information Needed]

config.json CHANGED Viewed

@@ -1,16 +1,33 @@
 {
-    "d_model": 768,
-    "d_intermediate": 0,
-    "n_layer": 24,
-    "vocab_size": 50277,
-    "ssm_cfg": {
-        "layer": "Mamba2"
-    },
-    "attn_layer_idx": [],
-    "attn_cfg": {},
-    "rms_norm": true,
-    "residual_in_fp32": true,
-    "fused_add_norm": true,
-    "pad_vocab_size_multiple": 16,
-    "tie_embeddings": true
 }

 {
+  "A_init_range": [
+    1,
+    16
+  ],
+  "D_has_hdim": false,
+  "bias": false,
+  "chunk_size": 256,
+  "conv_bias": true,
+  "conv_init": null,
+  "d_conv": 4,
+  "d_model": 768,
+  "d_ssm": null,
+  "d_state": 128,
+  "device": null,
+  "dt_init_floor": 0.0001,
+  "dt_limit": [
+    0.0,
+    Infinity
+  ],
+  "dt_max": 0.1,
+  "dt_min": 0.001,
+  "dtype": null,
+  "expand": 2,
+  "headdim": 64,
+  "layer_idx": null,
+  "ngroups": 1,
+  "norm_before_gate": false,
+  "process_group": null,
+  "rmsnorm": true,
+  "sequence_parallel": true,
+  "use_mem_eff_path": true
 }

model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:90dd1091ae7b6acb34537e8b103a0f937becb92e0eb69256ff1e0f6430058d0d
+size 15058808