Upload folder using huggingface_hub
Browse files- .gitattributes +2 -0
- gemma2-2b-it/_CHECKPOINT_METADATA +1 -0
- gemma2-2b-it/_METADATA +1 -0
- gemma2-2b-it/checkpoint +0 -0
- gemma2-2b-it/d/b5a4695f4be0a2f41ec1e25616ebd7e7 +0 -0
- gemma2-2b-it/descriptor/descriptor.pbtxt +1 -0
- gemma2-2b-it/manifest.ocdbt +0 -0
- gemma2-2b-it/ocdbt.process_0/d/834bb4bf1e3854eb09f6208c95c071b2 +3 -0
- gemma2-2b-it/ocdbt.process_0/d/bf69258061ae5f35eb7a5669fe6877d4 +3 -0
- gemma2-2b-it/ocdbt.process_0/d/fc20151969d7ca91ea9d8275bda0e219 +0 -0
- gemma2-2b-it/ocdbt.process_0/manifest.ocdbt +0 -0
- tokenizer.model +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
gemma2-2b-it/ocdbt.process_0/d/834bb4bf1e3854eb09f6208c95c071b2 filter=lfs diff=lfs merge=lfs -text
|
37 |
+
gemma2-2b-it/ocdbt.process_0/d/bf69258061ae5f35eb7a5669fe6877d4 filter=lfs diff=lfs merge=lfs -text
|
gemma2-2b-it/_CHECKPOINT_METADATA
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"init_timestamp_nsecs": 1722363011005331076, "commit_timestamp_nsecs": 1722363330099482227}
|
gemma2-2b-it/_METADATA
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"tree_metadata": {"('transformer/embedder', 'input_embedding')": {"key_metadata": [{"key": "transformer/embedder", "key_type": 2}, {"key": "input_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/final_norm', 'scale')": {"key_metadata": [{"key": "transformer/final_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_0/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_0/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_0/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_0/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_0/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_0/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_0/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_0/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_0/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_1/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_1/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_1/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_1/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_1/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_1/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_1/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_1/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_1/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_10/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_10/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_10/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_10/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_10/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_10/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_10/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_10/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_10/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_11/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_11/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_11/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_11/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_11/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_11/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_11/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_11/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_11/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_12/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_12/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_12/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_12/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_12/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_12/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_12/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_12/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_12/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_13/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_13/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_13/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_13/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_13/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_13/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_13/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_13/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_13/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_14/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_14/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_14/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_14/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_14/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_14/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_14/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_14/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_14/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_15/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_15/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_15/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_15/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_15/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_15/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_15/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_15/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_15/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_16/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_16/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_16/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_16/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_16/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_16/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_16/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_16/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_16/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_17/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_17/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_17/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_17/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_17/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_17/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_17/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_17/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_17/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_18/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_18/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_18/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_18/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_18/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_18/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_18/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_18/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_18/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_19/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_19/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_19/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_19/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_19/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_19/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_19/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_19/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_19/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_2/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_2/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_2/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_2/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_2/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_2/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_2/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_2/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_2/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_20/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_20/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_20/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_20/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_20/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_20/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_20/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_20/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_20/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_21/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_21/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_21/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_21/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_21/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_21/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_21/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_21/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_21/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_22/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_22/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_22/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_22/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_22/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_22/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_22/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_22/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_22/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_23/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_23/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_23/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_23/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_23/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_23/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_23/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_23/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_23/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_24/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_24/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_24/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_24/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_24/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_24/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_24/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_24/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_24/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_25/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_25/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_25/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_25/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_25/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_25/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_25/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_25/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_25/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_3/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_3/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_3/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_3/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_3/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_3/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_3/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_3/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_3/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_4/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_4/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_4/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_4/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_4/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_4/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_4/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_4/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_4/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_5/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_5/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_5/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_5/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_5/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_5/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_5/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_5/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_5/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_6/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_6/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_6/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_6/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_6/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_6/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_6/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_6/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_6/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_7/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_7/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_7/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_7/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_7/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_7/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_7/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_7/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_7/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_8/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_8/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_8/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_8/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_8/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_8/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_8/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_8/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_8/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_9/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_9/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_9/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_9/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_9/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_9/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_9/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_9/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_9/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}}, "use_zarr3": false}
|
gemma2-2b-it/checkpoint
ADDED
Binary file (23.1 kB). View file
|
|
gemma2-2b-it/d/b5a4695f4be0a2f41ec1e25616ebd7e7
ADDED
Binary file (2.72 kB). View file
|
|
gemma2-2b-it/descriptor/descriptor.pbtxt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
uuid: "fbbc2e3d-c57a-4e26-b4cc-5f447d84a641"
|
gemma2-2b-it/manifest.ocdbt
ADDED
Binary file (118 Bytes). View file
|
|
gemma2-2b-it/ocdbt.process_0/d/834bb4bf1e3854eb09f6208c95c071b2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:247ff6b4b123666d4a6b1eb64f8a4df3ca8a1428d21041ef6bb4eb50e3919d93
|
3 |
+
size 1826081806
|
gemma2-2b-it/ocdbt.process_0/d/bf69258061ae5f35eb7a5669fe6877d4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef438aa46d4c28a2afb32f4d5fcddf5ce13dc1d5f9f6283ff7fd1839f03b4985
|
3 |
+
size 2270987380
|
gemma2-2b-it/ocdbt.process_0/d/fc20151969d7ca91ea9d8275bda0e219
ADDED
Binary file (2.71 kB). View file
|
|
gemma2-2b-it/ocdbt.process_0/manifest.ocdbt
ADDED
Binary file (180 Bytes). View file
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2
|
3 |
+
size 4241003
|