philipp-zettl commited on
Commit
41776c6
1 Parent(s): ff925fd

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +93 -0
  2. config.json +30 -0
  3. generation_config.json +7 -0
  4. merges.txt +0 -0
  5. model.layers.0.input_layernorm.weight +0 -0
  6. model.layers.0.post_attention_layernorm.weight +0 -0
  7. model.layers.1.input_layernorm.weight +0 -0
  8. model.layers.1.post_attention_layernorm.weight +0 -0
  9. model.layers.10.input_layernorm.weight +0 -0
  10. model.layers.10.post_attention_layernorm.weight +0 -0
  11. model.layers.11.input_layernorm.weight +0 -0
  12. model.layers.11.post_attention_layernorm.weight +0 -0
  13. model.layers.12.input_layernorm.weight +0 -0
  14. model.layers.12.post_attention_layernorm.weight +0 -0
  15. model.layers.13.input_layernorm.weight +0 -0
  16. model.layers.13.post_attention_layernorm.weight +0 -0
  17. model.layers.14.input_layernorm.weight +0 -0
  18. model.layers.14.post_attention_layernorm.weight +0 -0
  19. model.layers.15.input_layernorm.weight +0 -0
  20. model.layers.15.post_attention_layernorm.weight +0 -0
  21. model.layers.16.input_layernorm.weight +0 -0
  22. model.layers.16.post_attention_layernorm.weight +0 -0
  23. model.layers.17.input_layernorm.weight +0 -0
  24. model.layers.17.post_attention_layernorm.weight +0 -0
  25. model.layers.18.input_layernorm.weight +0 -0
  26. model.layers.18.post_attention_layernorm.weight +0 -0
  27. model.layers.19.input_layernorm.weight +0 -0
  28. model.layers.19.post_attention_layernorm.weight +0 -0
  29. model.layers.2.input_layernorm.weight +0 -0
  30. model.layers.2.post_attention_layernorm.weight +0 -0
  31. model.layers.20.input_layernorm.weight +0 -0
  32. model.layers.20.post_attention_layernorm.weight +0 -0
  33. model.layers.21.input_layernorm.weight +0 -0
  34. model.layers.21.post_attention_layernorm.weight +0 -0
  35. model.layers.22.input_layernorm.weight +0 -0
  36. model.layers.22.post_attention_layernorm.weight +0 -0
  37. model.layers.23.input_layernorm.weight +0 -0
  38. model.layers.23.post_attention_layernorm.weight +0 -0
  39. model.layers.3.input_layernorm.weight +0 -0
  40. model.layers.3.post_attention_layernorm.weight +0 -0
  41. model.layers.4.input_layernorm.weight +0 -0
  42. model.layers.4.post_attention_layernorm.weight +0 -0
  43. model.layers.5.input_layernorm.weight +0 -0
  44. model.layers.5.post_attention_layernorm.weight +0 -0
  45. model.layers.6.input_layernorm.weight +0 -0
  46. model.layers.6.post_attention_layernorm.weight +0 -0
  47. model.layers.7.input_layernorm.weight +0 -0
  48. model.layers.7.post_attention_layernorm.weight +0 -0
  49. model.layers.8.input_layernorm.weight +0 -0
  50. model.layers.8.post_attention_layernorm.weight +0 -0
.gitattributes CHANGED
@@ -33,3 +33,96 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ onnx__MatMul_5365 filter=lfs diff=lfs merge=lfs -text
37
+ onnx__MatMul_5335 filter=lfs diff=lfs merge=lfs -text
38
+ onnx__MatMul_5190 filter=lfs diff=lfs merge=lfs -text
39
+ onnx__MatMul_5244 filter=lfs diff=lfs merge=lfs -text
40
+ onnx__MatMul_5248 filter=lfs diff=lfs merge=lfs -text
41
+ onnx__MatMul_5598 filter=lfs diff=lfs merge=lfs -text
42
+ onnx__MatMul_5103 filter=lfs diff=lfs merge=lfs -text
43
+ onnx__MatMul_4930 filter=lfs diff=lfs merge=lfs -text
44
+ onnx__MatMul_5163 filter=lfs diff=lfs merge=lfs -text
45
+ onnx__MatMul_4954 filter=lfs diff=lfs merge=lfs -text
46
+ onnx__MatMul_5596 filter=lfs diff=lfs merge=lfs -text
47
+ onnx__MatMul_5278 filter=lfs diff=lfs merge=lfs -text
48
+ onnx__MatMul_5044 filter=lfs diff=lfs merge=lfs -text
49
+ onnx__MatMul_5597 filter=lfs diff=lfs merge=lfs -text
50
+ onnx__MatMul_4957 filter=lfs diff=lfs merge=lfs -text
51
+ onnx__MatMul_5336 filter=lfs diff=lfs merge=lfs -text
52
+ onnx__MatMul_5481 filter=lfs diff=lfs merge=lfs -text
53
+ onnx__MatMul_5221 filter=lfs diff=lfs merge=lfs -text
54
+ onnx__MatMul_4959 filter=lfs diff=lfs merge=lfs -text
55
+ onnx__MatMul_5250 filter=lfs diff=lfs merge=lfs -text
56
+ onnx__MatMul_5418 filter=lfs diff=lfs merge=lfs -text
57
+ onnx__MatMul_5186 filter=lfs diff=lfs merge=lfs -text
58
+ onnx__MatMul_5366 filter=lfs diff=lfs merge=lfs -text
59
+ onnx__MatMul_5277 filter=lfs diff=lfs merge=lfs -text
60
+ onnx__MatMul_5129 filter=lfs diff=lfs merge=lfs -text
61
+ onnx__MatMul_5015 filter=lfs diff=lfs merge=lfs -text
62
+ onnx__MatMul_5131 filter=lfs diff=lfs merge=lfs -text
63
+ onnx__MatMul_5043 filter=lfs diff=lfs merge=lfs -text
64
+ onnx__MatMul_5477 filter=lfs diff=lfs merge=lfs -text
65
+ onnx__MatMul_5247 filter=lfs diff=lfs merge=lfs -text
66
+ onnx__MatMul_5191 filter=lfs diff=lfs merge=lfs -text
67
+ onnx__MatMul_5074 filter=lfs diff=lfs merge=lfs -text
68
+ onnx__MatMul_5511 filter=lfs diff=lfs merge=lfs -text
69
+ onnx__MatMul_5274 filter=lfs diff=lfs merge=lfs -text
70
+ onnx__MatMul_5218 filter=lfs diff=lfs merge=lfs -text
71
+ onnx__MatMul_5534 filter=lfs diff=lfs merge=lfs -text
72
+ onnx__MatMul_5624 filter=lfs diff=lfs merge=lfs -text
73
+ onnx__MatMul_4984 filter=lfs diff=lfs merge=lfs -text
74
+ onnx__MatMul_5479 filter=lfs diff=lfs merge=lfs -text
75
+ onnx__MatMul_5189 filter=lfs diff=lfs merge=lfs -text
76
+ onnx__MatMul_5625 filter=lfs diff=lfs merge=lfs -text
77
+ onnx__MatMul_5623 filter=lfs diff=lfs merge=lfs -text
78
+ onnx__MatMul_5507 filter=lfs diff=lfs merge=lfs -text
79
+ onnx__MatMul_5482 filter=lfs diff=lfs merge=lfs -text
80
+ onnx__MatMul_4987 filter=lfs diff=lfs merge=lfs -text
81
+ onnx__MatMul_5279 filter=lfs diff=lfs merge=lfs -text
82
+ onnx__MatMul_5592 filter=lfs diff=lfs merge=lfs -text
83
+ onnx__MatMul_5451 filter=lfs diff=lfs merge=lfs -text
84
+ onnx__MatMul_5042 filter=lfs diff=lfs merge=lfs -text
85
+ onnx__MatMul_5161 filter=lfs diff=lfs merge=lfs -text
86
+ onnx__MatMul_5192 filter=lfs diff=lfs merge=lfs -text
87
+ onnx__MatMul_5099 filter=lfs diff=lfs merge=lfs -text
88
+ onnx__MatMul_5391 filter=lfs diff=lfs merge=lfs -text
89
+ onnx__MatMul_5422 filter=lfs diff=lfs merge=lfs -text
90
+ onnx__MatMul_5506 filter=lfs diff=lfs merge=lfs -text
91
+ onnx__MatMul_5361 filter=lfs diff=lfs merge=lfs -text
92
+ onnx__MatMul_5041 filter=lfs diff=lfs merge=lfs -text
93
+ onnx__MatMul_5564 filter=lfs diff=lfs merge=lfs -text
94
+ onnx__MatMul_5393 filter=lfs diff=lfs merge=lfs -text
95
+ onnx__MatMul_5104 filter=lfs diff=lfs merge=lfs -text
96
+ onnx__MatMul_5158 filter=lfs diff=lfs merge=lfs -text
97
+ onnx__MatMul_4989 filter=lfs diff=lfs merge=lfs -text
98
+ onnx__MatMul_5394 filter=lfs diff=lfs merge=lfs -text
99
+ onnx__MatMul_5447 filter=lfs diff=lfs merge=lfs -text
100
+ onnx__MatMul_5076 filter=lfs diff=lfs merge=lfs -text
101
+ onnx__MatMul_5568 filter=lfs diff=lfs merge=lfs -text
102
+ onnx__MatMul_5188 filter=lfs diff=lfs merge=lfs -text
103
+ onnx__MatMul_5539 filter=lfs diff=lfs merge=lfs -text
104
+ onnx__MatMul_5538 filter=lfs diff=lfs merge=lfs -text
105
+ onnx__MatMul_5159 filter=lfs diff=lfs merge=lfs -text
106
+ onnx__MatMul_5453 filter=lfs diff=lfs merge=lfs -text
107
+ onnx__MatMul_5478 filter=lfs diff=lfs merge=lfs -text
108
+ onnx__MatMul_4983 filter=lfs diff=lfs merge=lfs -text
109
+ onnx__MatMul_5334 filter=lfs diff=lfs merge=lfs -text
110
+ onnx__MatMul_5302 filter=lfs diff=lfs merge=lfs -text
111
+ onnx__MatMul_5362 filter=lfs diff=lfs merge=lfs -text
112
+ onnx__MatMul_5018 filter=lfs diff=lfs merge=lfs -text
113
+ onnx__MatMul_5508 filter=lfs diff=lfs merge=lfs -text
114
+ onnx__MatMul_5333 filter=lfs diff=lfs merge=lfs -text
115
+ onnx__MatMul_5275 filter=lfs diff=lfs merge=lfs -text
116
+ onnx__MatMul_5133 filter=lfs diff=lfs merge=lfs -text
117
+ onnx__MatMul_5105 filter=lfs diff=lfs merge=lfs -text
118
+ onnx__MatMul_4956 filter=lfs diff=lfs merge=lfs -text
119
+ onnx__MatMul_5249 filter=lfs diff=lfs merge=lfs -text
120
+ onnx__MatMul_5305 filter=lfs diff=lfs merge=lfs -text
121
+ onnx__MatMul_5102 filter=lfs diff=lfs merge=lfs -text
122
+ onnx__MatMul_5509 filter=lfs diff=lfs merge=lfs -text
123
+ onnx__MatMul_5128 filter=lfs diff=lfs merge=lfs -text
124
+ onnx__MatMul_5220 filter=lfs diff=lfs merge=lfs -text
125
+ onnx__MatMul_4960 filter=lfs diff=lfs merge=lfs -text
126
+ onnx__MatMul_5245 filter=lfs diff=lfs merge=lfs -text
127
+ onnx__MatMul_5017 filter=lfs diff=lfs merge=lfs -text
128
+ onnx__MatMul_5215 filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": true,
3
+ "_name_or_path": "numind/NuExtract-1.5-smol",
4
+ "architectures": [
5
+ "LlamaForCausalLM"
6
+ ],
7
+ "attention_bias": false,
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 0,
10
+ "eos_token_id": 0,
11
+ "head_dim": 64,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 2048,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 8192,
16
+ "max_position_embeddings": 8192,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 32,
20
+ "num_hidden_layers": 24,
21
+ "num_key_value_heads": 32,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-05,
24
+ "rope_scaling": null,
25
+ "rope_theta": 130000,
26
+ "tie_word_embeddings": true,
27
+ "transformers_version": "4.46.2",
28
+ "use_cache": true,
29
+ "vocab_size": 49152
30
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 0,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.46.2"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.layers.0.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.0.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.1.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.1.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.10.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.10.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.11.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.11.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.12.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.12.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.13.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.13.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.14.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.14.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.15.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.15.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.16.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.16.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.17.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.17.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.18.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.18.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.19.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.19.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.2.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.2.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.20.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.20.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.21.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.21.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.22.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.22.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.23.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.23.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.3.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.3.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.4.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.4.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.5.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.5.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.6.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.6.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.7.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.7.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.8.input_layernorm.weight ADDED
Binary file (8.19 kB). View file
 
model.layers.8.post_attention_layernorm.weight ADDED
Binary file (8.19 kB). View file