felarof01 commited on
Commit
653a2d8
·
verified ·
1 Parent(s): b8506eb

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ gemma2-2b-it/ocdbt.process_0/d/834bb4bf1e3854eb09f6208c95c071b2 filter=lfs diff=lfs merge=lfs -text
37
+ gemma2-2b-it/ocdbt.process_0/d/bf69258061ae5f35eb7a5669fe6877d4 filter=lfs diff=lfs merge=lfs -text
gemma2-2b-it/_CHECKPOINT_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"init_timestamp_nsecs": 1722363011005331076, "commit_timestamp_nsecs": 1722363330099482227}
gemma2-2b-it/_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tree_metadata": {"('transformer/embedder', 'input_embedding')": {"key_metadata": [{"key": "transformer/embedder", "key_type": 2}, {"key": "input_embedding", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/final_norm', 'scale')": {"key_metadata": [{"key": "transformer/final_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_0/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_0/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_0/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_0/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_0/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_0/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_0/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_0/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_0/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_0/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_1/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_1/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_1/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_1/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_1/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_1/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_1/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_1/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_1/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_1/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_10/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_10/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_10/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_10/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_10/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_10/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_10/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_10/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_10/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_10/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_11/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_11/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_11/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_11/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_11/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_11/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_11/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_11/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_11/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_11/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_12/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_12/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_12/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_12/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_12/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_12/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_12/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_12/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_12/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_12/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_13/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_13/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_13/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_13/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_13/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_13/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_13/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_13/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_13/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_13/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_14/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_14/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_14/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_14/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_14/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_14/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_14/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_14/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_14/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_14/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_15/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_15/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_15/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_15/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_15/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_15/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_15/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_15/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_15/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_15/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_16/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_16/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_16/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_16/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_16/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_16/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_16/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_16/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_16/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_16/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_17/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_17/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_17/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_17/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_17/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_17/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_17/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_17/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_17/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_17/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_18/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_18/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_18/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_18/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_18/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_18/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_18/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_18/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_18/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_18/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_19/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_19/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_19/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_19/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_19/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_19/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_19/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_19/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_19/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_19/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_2/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_2/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_2/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_2/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_2/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_2/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_2/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_2/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_2/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_2/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_20/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_20/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_20/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_20/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_20/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_20/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_20/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_20/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_20/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_20/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_21/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_21/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_21/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_21/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_21/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_21/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_21/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_21/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_21/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_21/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_22/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_22/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_22/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_22/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_22/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_22/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_22/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_22/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_22/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_22/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_23/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_23/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_23/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_23/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_23/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_23/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_23/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_23/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_23/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_23/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_24/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_24/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_24/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_24/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_24/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_24/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_24/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_24/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_24/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_24/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_25/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_25/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_25/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_25/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_25/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_25/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_25/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_25/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_25/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_25/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_3/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_3/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_3/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_3/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_3/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_3/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_3/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_3/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_3/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_3/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_4/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_4/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_4/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_4/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_4/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_4/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_4/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_4/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_4/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_4/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_5/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_5/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_5/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_5/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_5/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_5/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_5/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_5/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_5/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_5/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_6/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_6/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_6/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_6/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_6/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_6/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_6/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_6/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_6/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_6/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_7/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_7/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_7/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_7/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_7/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_7/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_7/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_7/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_7/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_7/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_8/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_8/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_8/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_8/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_8/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_8/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_8/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_8/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_8/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_8/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/attn/attn_vec_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_9/attn/attn_vec_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/attn/kv_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_9/attn/kv_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/attn/q_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_9/attn/q_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/mlp/gating_einsum', 'w')": {"key_metadata": [{"key": "transformer/layer_9/mlp/gating_einsum", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/mlp/linear', 'w')": {"key_metadata": [{"key": "transformer/layer_9/mlp/linear", "key_type": 2}, {"key": "w", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/post_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_9/post_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/post_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_9/post_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/pre_attention_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_9/pre_attention_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}, "('transformer/layer_9/pre_ffw_norm', 'scale')": {"key_metadata": [{"key": "transformer/layer_9/pre_ffw_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "np.ndarray", "skip_deserialize": false}}}, "use_zarr3": false}
gemma2-2b-it/checkpoint ADDED
Binary file (23.1 kB). View file
 
gemma2-2b-it/d/b5a4695f4be0a2f41ec1e25616ebd7e7 ADDED
Binary file (2.72 kB). View file
 
gemma2-2b-it/descriptor/descriptor.pbtxt ADDED
@@ -0,0 +1 @@
 
 
1
+ uuid: "fbbc2e3d-c57a-4e26-b4cc-5f447d84a641"
gemma2-2b-it/manifest.ocdbt ADDED
Binary file (118 Bytes). View file
 
gemma2-2b-it/ocdbt.process_0/d/834bb4bf1e3854eb09f6208c95c071b2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247ff6b4b123666d4a6b1eb64f8a4df3ca8a1428d21041ef6bb4eb50e3919d93
3
+ size 1826081806
gemma2-2b-it/ocdbt.process_0/d/bf69258061ae5f35eb7a5669fe6877d4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef438aa46d4c28a2afb32f4d5fcddf5ce13dc1d5f9f6283ff7fd1839f03b4985
3
+ size 2270987380
gemma2-2b-it/ocdbt.process_0/d/fc20151969d7ca91ea9d8275bda0e219 ADDED
Binary file (2.71 kB). View file
 
gemma2-2b-it/ocdbt.process_0/manifest.ocdbt ADDED
Binary file (180 Bytes). View file
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2
3
+ size 4241003