philipp-zettl
commited on
Commit
•
41776c6
1
Parent(s):
ff925fd
Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +93 -0
- config.json +30 -0
- generation_config.json +7 -0
- merges.txt +0 -0
- model.layers.0.input_layernorm.weight +0 -0
- model.layers.0.post_attention_layernorm.weight +0 -0
- model.layers.1.input_layernorm.weight +0 -0
- model.layers.1.post_attention_layernorm.weight +0 -0
- model.layers.10.input_layernorm.weight +0 -0
- model.layers.10.post_attention_layernorm.weight +0 -0
- model.layers.11.input_layernorm.weight +0 -0
- model.layers.11.post_attention_layernorm.weight +0 -0
- model.layers.12.input_layernorm.weight +0 -0
- model.layers.12.post_attention_layernorm.weight +0 -0
- model.layers.13.input_layernorm.weight +0 -0
- model.layers.13.post_attention_layernorm.weight +0 -0
- model.layers.14.input_layernorm.weight +0 -0
- model.layers.14.post_attention_layernorm.weight +0 -0
- model.layers.15.input_layernorm.weight +0 -0
- model.layers.15.post_attention_layernorm.weight +0 -0
- model.layers.16.input_layernorm.weight +0 -0
- model.layers.16.post_attention_layernorm.weight +0 -0
- model.layers.17.input_layernorm.weight +0 -0
- model.layers.17.post_attention_layernorm.weight +0 -0
- model.layers.18.input_layernorm.weight +0 -0
- model.layers.18.post_attention_layernorm.weight +0 -0
- model.layers.19.input_layernorm.weight +0 -0
- model.layers.19.post_attention_layernorm.weight +0 -0
- model.layers.2.input_layernorm.weight +0 -0
- model.layers.2.post_attention_layernorm.weight +0 -0
- model.layers.20.input_layernorm.weight +0 -0
- model.layers.20.post_attention_layernorm.weight +0 -0
- model.layers.21.input_layernorm.weight +0 -0
- model.layers.21.post_attention_layernorm.weight +0 -0
- model.layers.22.input_layernorm.weight +0 -0
- model.layers.22.post_attention_layernorm.weight +0 -0
- model.layers.23.input_layernorm.weight +0 -0
- model.layers.23.post_attention_layernorm.weight +0 -0
- model.layers.3.input_layernorm.weight +0 -0
- model.layers.3.post_attention_layernorm.weight +0 -0
- model.layers.4.input_layernorm.weight +0 -0
- model.layers.4.post_attention_layernorm.weight +0 -0
- model.layers.5.input_layernorm.weight +0 -0
- model.layers.5.post_attention_layernorm.weight +0 -0
- model.layers.6.input_layernorm.weight +0 -0
- model.layers.6.post_attention_layernorm.weight +0 -0
- model.layers.7.input_layernorm.weight +0 -0
- model.layers.7.post_attention_layernorm.weight +0 -0
- model.layers.8.input_layernorm.weight +0 -0
- model.layers.8.post_attention_layernorm.weight +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,96 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
onnx__MatMul_5365 filter=lfs diff=lfs merge=lfs -text
|
37 |
+
onnx__MatMul_5335 filter=lfs diff=lfs merge=lfs -text
|
38 |
+
onnx__MatMul_5190 filter=lfs diff=lfs merge=lfs -text
|
39 |
+
onnx__MatMul_5244 filter=lfs diff=lfs merge=lfs -text
|
40 |
+
onnx__MatMul_5248 filter=lfs diff=lfs merge=lfs -text
|
41 |
+
onnx__MatMul_5598 filter=lfs diff=lfs merge=lfs -text
|
42 |
+
onnx__MatMul_5103 filter=lfs diff=lfs merge=lfs -text
|
43 |
+
onnx__MatMul_4930 filter=lfs diff=lfs merge=lfs -text
|
44 |
+
onnx__MatMul_5163 filter=lfs diff=lfs merge=lfs -text
|
45 |
+
onnx__MatMul_4954 filter=lfs diff=lfs merge=lfs -text
|
46 |
+
onnx__MatMul_5596 filter=lfs diff=lfs merge=lfs -text
|
47 |
+
onnx__MatMul_5278 filter=lfs diff=lfs merge=lfs -text
|
48 |
+
onnx__MatMul_5044 filter=lfs diff=lfs merge=lfs -text
|
49 |
+
onnx__MatMul_5597 filter=lfs diff=lfs merge=lfs -text
|
50 |
+
onnx__MatMul_4957 filter=lfs diff=lfs merge=lfs -text
|
51 |
+
onnx__MatMul_5336 filter=lfs diff=lfs merge=lfs -text
|
52 |
+
onnx__MatMul_5481 filter=lfs diff=lfs merge=lfs -text
|
53 |
+
onnx__MatMul_5221 filter=lfs diff=lfs merge=lfs -text
|
54 |
+
onnx__MatMul_4959 filter=lfs diff=lfs merge=lfs -text
|
55 |
+
onnx__MatMul_5250 filter=lfs diff=lfs merge=lfs -text
|
56 |
+
onnx__MatMul_5418 filter=lfs diff=lfs merge=lfs -text
|
57 |
+
onnx__MatMul_5186 filter=lfs diff=lfs merge=lfs -text
|
58 |
+
onnx__MatMul_5366 filter=lfs diff=lfs merge=lfs -text
|
59 |
+
onnx__MatMul_5277 filter=lfs diff=lfs merge=lfs -text
|
60 |
+
onnx__MatMul_5129 filter=lfs diff=lfs merge=lfs -text
|
61 |
+
onnx__MatMul_5015 filter=lfs diff=lfs merge=lfs -text
|
62 |
+
onnx__MatMul_5131 filter=lfs diff=lfs merge=lfs -text
|
63 |
+
onnx__MatMul_5043 filter=lfs diff=lfs merge=lfs -text
|
64 |
+
onnx__MatMul_5477 filter=lfs diff=lfs merge=lfs -text
|
65 |
+
onnx__MatMul_5247 filter=lfs diff=lfs merge=lfs -text
|
66 |
+
onnx__MatMul_5191 filter=lfs diff=lfs merge=lfs -text
|
67 |
+
onnx__MatMul_5074 filter=lfs diff=lfs merge=lfs -text
|
68 |
+
onnx__MatMul_5511 filter=lfs diff=lfs merge=lfs -text
|
69 |
+
onnx__MatMul_5274 filter=lfs diff=lfs merge=lfs -text
|
70 |
+
onnx__MatMul_5218 filter=lfs diff=lfs merge=lfs -text
|
71 |
+
onnx__MatMul_5534 filter=lfs diff=lfs merge=lfs -text
|
72 |
+
onnx__MatMul_5624 filter=lfs diff=lfs merge=lfs -text
|
73 |
+
onnx__MatMul_4984 filter=lfs diff=lfs merge=lfs -text
|
74 |
+
onnx__MatMul_5479 filter=lfs diff=lfs merge=lfs -text
|
75 |
+
onnx__MatMul_5189 filter=lfs diff=lfs merge=lfs -text
|
76 |
+
onnx__MatMul_5625 filter=lfs diff=lfs merge=lfs -text
|
77 |
+
onnx__MatMul_5623 filter=lfs diff=lfs merge=lfs -text
|
78 |
+
onnx__MatMul_5507 filter=lfs diff=lfs merge=lfs -text
|
79 |
+
onnx__MatMul_5482 filter=lfs diff=lfs merge=lfs -text
|
80 |
+
onnx__MatMul_4987 filter=lfs diff=lfs merge=lfs -text
|
81 |
+
onnx__MatMul_5279 filter=lfs diff=lfs merge=lfs -text
|
82 |
+
onnx__MatMul_5592 filter=lfs diff=lfs merge=lfs -text
|
83 |
+
onnx__MatMul_5451 filter=lfs diff=lfs merge=lfs -text
|
84 |
+
onnx__MatMul_5042 filter=lfs diff=lfs merge=lfs -text
|
85 |
+
onnx__MatMul_5161 filter=lfs diff=lfs merge=lfs -text
|
86 |
+
onnx__MatMul_5192 filter=lfs diff=lfs merge=lfs -text
|
87 |
+
onnx__MatMul_5099 filter=lfs diff=lfs merge=lfs -text
|
88 |
+
onnx__MatMul_5391 filter=lfs diff=lfs merge=lfs -text
|
89 |
+
onnx__MatMul_5422 filter=lfs diff=lfs merge=lfs -text
|
90 |
+
onnx__MatMul_5506 filter=lfs diff=lfs merge=lfs -text
|
91 |
+
onnx__MatMul_5361 filter=lfs diff=lfs merge=lfs -text
|
92 |
+
onnx__MatMul_5041 filter=lfs diff=lfs merge=lfs -text
|
93 |
+
onnx__MatMul_5564 filter=lfs diff=lfs merge=lfs -text
|
94 |
+
onnx__MatMul_5393 filter=lfs diff=lfs merge=lfs -text
|
95 |
+
onnx__MatMul_5104 filter=lfs diff=lfs merge=lfs -text
|
96 |
+
onnx__MatMul_5158 filter=lfs diff=lfs merge=lfs -text
|
97 |
+
onnx__MatMul_4989 filter=lfs diff=lfs merge=lfs -text
|
98 |
+
onnx__MatMul_5394 filter=lfs diff=lfs merge=lfs -text
|
99 |
+
onnx__MatMul_5447 filter=lfs diff=lfs merge=lfs -text
|
100 |
+
onnx__MatMul_5076 filter=lfs diff=lfs merge=lfs -text
|
101 |
+
onnx__MatMul_5568 filter=lfs diff=lfs merge=lfs -text
|
102 |
+
onnx__MatMul_5188 filter=lfs diff=lfs merge=lfs -text
|
103 |
+
onnx__MatMul_5539 filter=lfs diff=lfs merge=lfs -text
|
104 |
+
onnx__MatMul_5538 filter=lfs diff=lfs merge=lfs -text
|
105 |
+
onnx__MatMul_5159 filter=lfs diff=lfs merge=lfs -text
|
106 |
+
onnx__MatMul_5453 filter=lfs diff=lfs merge=lfs -text
|
107 |
+
onnx__MatMul_5478 filter=lfs diff=lfs merge=lfs -text
|
108 |
+
onnx__MatMul_4983 filter=lfs diff=lfs merge=lfs -text
|
109 |
+
onnx__MatMul_5334 filter=lfs diff=lfs merge=lfs -text
|
110 |
+
onnx__MatMul_5302 filter=lfs diff=lfs merge=lfs -text
|
111 |
+
onnx__MatMul_5362 filter=lfs diff=lfs merge=lfs -text
|
112 |
+
onnx__MatMul_5018 filter=lfs diff=lfs merge=lfs -text
|
113 |
+
onnx__MatMul_5508 filter=lfs diff=lfs merge=lfs -text
|
114 |
+
onnx__MatMul_5333 filter=lfs diff=lfs merge=lfs -text
|
115 |
+
onnx__MatMul_5275 filter=lfs diff=lfs merge=lfs -text
|
116 |
+
onnx__MatMul_5133 filter=lfs diff=lfs merge=lfs -text
|
117 |
+
onnx__MatMul_5105 filter=lfs diff=lfs merge=lfs -text
|
118 |
+
onnx__MatMul_4956 filter=lfs diff=lfs merge=lfs -text
|
119 |
+
onnx__MatMul_5249 filter=lfs diff=lfs merge=lfs -text
|
120 |
+
onnx__MatMul_5305 filter=lfs diff=lfs merge=lfs -text
|
121 |
+
onnx__MatMul_5102 filter=lfs diff=lfs merge=lfs -text
|
122 |
+
onnx__MatMul_5509 filter=lfs diff=lfs merge=lfs -text
|
123 |
+
onnx__MatMul_5128 filter=lfs diff=lfs merge=lfs -text
|
124 |
+
onnx__MatMul_5220 filter=lfs diff=lfs merge=lfs -text
|
125 |
+
onnx__MatMul_4960 filter=lfs diff=lfs merge=lfs -text
|
126 |
+
onnx__MatMul_5245 filter=lfs diff=lfs merge=lfs -text
|
127 |
+
onnx__MatMul_5017 filter=lfs diff=lfs merge=lfs -text
|
128 |
+
onnx__MatMul_5215 filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_attn_implementation_autoset": true,
|
3 |
+
"_name_or_path": "numind/NuExtract-1.5-smol",
|
4 |
+
"architectures": [
|
5 |
+
"LlamaForCausalLM"
|
6 |
+
],
|
7 |
+
"attention_bias": false,
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"bos_token_id": 0,
|
10 |
+
"eos_token_id": 0,
|
11 |
+
"head_dim": 64,
|
12 |
+
"hidden_act": "silu",
|
13 |
+
"hidden_size": 2048,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 8192,
|
16 |
+
"max_position_embeddings": 8192,
|
17 |
+
"mlp_bias": false,
|
18 |
+
"model_type": "llama",
|
19 |
+
"num_attention_heads": 32,
|
20 |
+
"num_hidden_layers": 24,
|
21 |
+
"num_key_value_heads": 32,
|
22 |
+
"pretraining_tp": 1,
|
23 |
+
"rms_norm_eps": 1e-05,
|
24 |
+
"rope_scaling": null,
|
25 |
+
"rope_theta": 130000,
|
26 |
+
"tie_word_embeddings": true,
|
27 |
+
"transformers_version": "4.46.2",
|
28 |
+
"use_cache": true,
|
29 |
+
"vocab_size": 49152
|
30 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 0,
|
4 |
+
"eos_token_id": 0,
|
5 |
+
"pad_token_id": 0,
|
6 |
+
"transformers_version": "4.46.2"
|
7 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.layers.0.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.0.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.1.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.1.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.10.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.10.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.11.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.11.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.12.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.12.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.13.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.13.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.14.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.14.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.15.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.15.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.16.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.16.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.17.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.17.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.18.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.18.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.19.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.19.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.2.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.2.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.20.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.20.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.21.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.21.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.22.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.22.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.23.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.23.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.3.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.3.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.4.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.4.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.5.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.5.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.6.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.6.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.7.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.7.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.8.input_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|
model.layers.8.post_attention_layernorm.weight
ADDED
Binary file (8.19 kB). View file
|
|