Larger Pre-trained model

Files changed (3) hide show

README.md CHANGED Viewed

@@ -1,3 +1,9 @@
----
-license: apache-2.0
----

+---
+tags:
+- model_hub_mixin
+- pytorch_model_hub_mixin
+---
+This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
+- Library: [More Information Needed]
+- Docs: [More Information Needed]

config.json ADDED Viewed

+{
+  "d_model": null,
+  "device": "cpu",
+  "model_kwargs": {},
+  "model_name": "MOMENT",
+  "patch_len": 8,
+  "patch_stride_len": 8,
+  "seq_len": 512,
+  "t5_config": {
+    "architectures": [
+      "T5ForConditionalGeneration"
+    ],
+    "attn_implementation": null,
+    "d_ff": 2816,
+    "d_kv": 64,
+    "d_model": 1024,
+    "decoder_start_token_id": 0,
+    "dropout_rate": 0.1,
+    "eos_token_id": 1,
+    "feed_forward_proj": "gated-gelu",
+    "initializer_factor": 1.0,
+    "is_encoder_decoder": true,
+    "layer_norm_epsilon": 1e-06,
+    "model_type": "t5",
+    "n_positions": 512,
+    "num_decoder_layers": 24,
+    "num_heads": 16,
+    "num_layers": 24,
+    "output_past": true,
+    "pad_token_id": 0,
+    "relative_attention_max_distance": 128,
+    "relative_attention_num_buckets": 32,
+    "tie_word_embeddings": false,
+    "use_cache": true,
+    "vocab_size": 32128
+  },
+  "task_name": "reconstruction",
+  "transformer_backbone": "google/flan-t5-large",
+  "transformer_type": "encoder_only"
+}

model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:1585873fdec51e02e640e5ceee37fc7d841694ca960753939499f85cdd7a2c1d
+size 1436063888