File size: 8,852 Bytes
882ad6b
0500801
882ad6b
9b11711
 
 
e7a3e8c
9b11711
763cb78
77747ed
d29d252
dffd22c
124bc08
763cb78
 
4900d82
066ec75
763cb78
e7a3e8c
763cb78
4c8feda
7a4d37c
124bc08
 
066ec75
dffd22c
5e283bc
3978f38
3b1bd74
4d733e3
124bc08
3b1bd74
 
066ec75
9b11711
 
7614503
22cec3a
e7a3e8c
7614503
bf7077a
9b11711
bf7077a
9b11711
9e7d98d
bf7077a
763cb78
6a2c2f5
adcdd21
124bc08
e7a3e8c
3978f38
9e7d98d
bf7077a
7614503
9b11711
f78bde8
124bc08
066ec75
dffd22c
5e283bc
124bc08
763cb78
878a23e
9b11711
124bc08
dffd22c
124bc08
bf7077a
18dc13d
cde4c0e
bf7077a
124bc08
 
e7a3e8c
763cb78
 
e7a3e8c
9b11711
124bc08
 
25558c6
3b1bd74
b1416f7
5905770
124bc08
763cb78
18dc13d
a3aca97
7614503
066ec75
9b11711
9e7d98d
bf7077a
b1416f7
9b11711
680c5b3
5e283bc
18dc13d
124bc08
9b11711
a3aca97
bf7077a
3b1bd74
5e283bc
9e7d98d
763cb78
3b1bd74
dffd22c
763cb78
066ec75
e7a3e8c
763cb78
124bc08
 
7614503
dffd22c
b1416f7
e7a3e8c
066ec75
dffd22c
3b1bd74
bf7077a
 
7614503
9b11711
dffd22c
e7a3e8c
 
066ec75
 
763cb78
bf7077a
066ec75
9b11711
5e283bc
e7a3e8c
124bc08
9b11711
124bc08
763cb78
066ec75
763cb78
9b11711
763cb78
e7a3e8c
b7c8840
3b1bd74
066ec75
bf7077a
680c5b3
9e7d98d
5905770
124bc08
763cb78
 
124bc08
8aafe12
763cb78
dffd22c
9e7d98d
b1416f7
7614503
124bc08
5e283bc
763cb78
066ec75
124bc08
64e2e1c
9b11711
763cb78
481f400
ef9535d
066ec75
763cb78
7614503
124bc08
9b11711
763cb78
3b1bd74
bf7077a
 
3b1bd74
124bc08
a3aca97
9e7d98d
5338e46
b1416f7
a3aca97
7614503
066ec75
124bc08
b1416f7
124bc08
 
615674d
b1416f7
 
3b1bd74
7614503
5e283bc
b1416f7
 
dffd22c
124bc08
d29d252
124bc08
5e283bc
e7a3e8c
763cb78
22cec3a
3978f38
763cb78
3b1bd74
763cb78
bf7077a
124bc08
 
3b1bd74
e7a3e8c
10ef4c6
b1416f7
3b1bd74
e7a3e8c
bf7077a
124bc08
7614503
5e283bc
763cb78
3b1bd74
124bc08
066ec75
763cb78
481f400
124bc08
763cb78
3978f38
f78bde8
b1416f7
124bc08
b1416f7
066ec75
763cb78
 
e7a3e8c
 
3b1bd74
8aafe12
481f400
 
763cb78
124bc08
b1416f7
118652f
763cb78
 
7614503
b1416f7
e7a3e8c
124bc08
9b11711
882ad6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
{
  "_name_or_path": "distributed/optimized-gpt2-1b",
  "activation_function": "gelu_new",
  "all_reduce_scores": {
    "0": "NON_PARTICIPATING",
    "1": "NON_PARTICIPATING",
    "10": "NON_PARTICIPATING",
    "100": "NON_PARTICIPATING",
    "101": "NON_PARTICIPATING",
    "102": "NON_PARTICIPATING",
    "103": "NON_PARTICIPATING",
    "104": "NON_PARTICIPATING",
    "105": "SUCCESS",
    "106": "NON_PARTICIPATING",
    "107": "NON_PARTICIPATING",
    "108": "NON_PARTICIPATING",
    "109": "NON_PARTICIPATING",
    "11": "NON_PARTICIPATING",
    "110": "NON_PARTICIPATING",
    "111": "NON_PARTICIPATING",
    "112": "NON_PARTICIPATING",
    "113": "NON_PARTICIPATING",
    "114": "NON_PARTICIPATING",
    "115": "SUCCESS",
    "116": "NON_PARTICIPATING",
    "117": "NON_PARTICIPATING",
    "118": "NON_PARTICIPATING",
    "119": "NON_PARTICIPATING",
    "12": "NON_PARTICIPATING",
    "120": "NON_PARTICIPATING",
    "121": "NON_PARTICIPATING",
    "122": "NON_PARTICIPATING",
    "123": "NON_PARTICIPATING",
    "124": "NON_PARTICIPATING",
    "125": "NON_PARTICIPATING",
    "126": "NON_PARTICIPATING",
    "127": "NON_PARTICIPATING",
    "128": "NON_PARTICIPATING",
    "129": "NON_PARTICIPATING",
    "13": "NON_PARTICIPATING",
    "130": "NON_PARTICIPATING",
    "131": "NON_PARTICIPATING",
    "132": "NON_PARTICIPATING",
    "133": "NON_PARTICIPATING",
    "134": "NON_PARTICIPATING",
    "135": "NON_PARTICIPATING",
    "136": "NON_PARTICIPATING",
    "137": "NON_PARTICIPATING",
    "138": "NON_PARTICIPATING",
    "139": "SUCCESS",
    "14": "NON_PARTICIPATING",
    "140": "NON_PARTICIPATING",
    "141": "NON_PARTICIPATING",
    "142": "NON_PARTICIPATING",
    "143": "NON_PARTICIPATING",
    "144": "NON_PARTICIPATING",
    "145": "NON_PARTICIPATING",
    "146": "SUCCESS",
    "147": "NON_PARTICIPATING",
    "148": "NON_PARTICIPATING",
    "149": "NON_PARTICIPATING",
    "15": "SUCCESS",
    "150": "NON_PARTICIPATING",
    "151": "NON_PARTICIPATING",
    "152": "NON_PARTICIPATING",
    "153": "SUCCESS",
    "154": "NON_PARTICIPATING",
    "155": "SUCCESS",
    "156": "NON_PARTICIPATING",
    "157": "NON_PARTICIPATING",
    "158": "NON_PARTICIPATING",
    "159": "NON_PARTICIPATING",
    "16": "SUCCESS",
    "160": "NON_PARTICIPATING",
    "161": "NON_PARTICIPATING",
    "162": "NON_PARTICIPATING",
    "163": "NON_PARTICIPATING",
    "164": "NON_PARTICIPATING",
    "165": "NON_PARTICIPATING",
    "166": "SUCCESS",
    "167": "NON_PARTICIPATING",
    "168": "NON_PARTICIPATING",
    "169": "SUCCESS",
    "17": "NON_PARTICIPATING",
    "170": "NON_PARTICIPATING",
    "171": "SUCCESS",
    "172": "NON_PARTICIPATING",
    "173": "NON_PARTICIPATING",
    "174": "NON_PARTICIPATING",
    "175": "NON_PARTICIPATING",
    "176": "NON_PARTICIPATING",
    "177": "NON_PARTICIPATING",
    "178": "NON_PARTICIPATING",
    "179": "NON_PARTICIPATING",
    "18": "NON_PARTICIPATING",
    "180": "NON_PARTICIPATING",
    "181": "NON_PARTICIPATING",
    "182": "NON_PARTICIPATING",
    "183": "NON_PARTICIPATING",
    "184": "NON_PARTICIPATING",
    "185": "NON_PARTICIPATING",
    "186": "NON_PARTICIPATING",
    "187": "NON_PARTICIPATING",
    "188": "NON_PARTICIPATING",
    "189": "NON_PARTICIPATING",
    "19": "NON_PARTICIPATING",
    "190": "NON_PARTICIPATING",
    "191": "NON_PARTICIPATING",
    "192": "NON_PARTICIPATING",
    "193": "NON_PARTICIPATING",
    "194": "NON_PARTICIPATING",
    "195": "NON_PARTICIPATING",
    "196": "NON_PARTICIPATING",
    "197": "SUCCESS",
    "198": "NON_PARTICIPATING",
    "199": "NON_PARTICIPATING",
    "2": "NON_PARTICIPATING",
    "20": "NON_PARTICIPATING",
    "200": "NON_PARTICIPATING",
    "201": "NON_PARTICIPATING",
    "202": "NON_PARTICIPATING",
    "203": "SUCCESS",
    "204": "NON_PARTICIPATING",
    "205": "NON_PARTICIPATING",
    "206": "NON_PARTICIPATING",
    "207": "NON_PARTICIPATING",
    "208": "NON_PARTICIPATING",
    "209": "NON_PARTICIPATING",
    "21": "NON_PARTICIPATING",
    "210": "NON_PARTICIPATING",
    "211": "NON_PARTICIPATING",
    "212": "NON_PARTICIPATING",
    "213": "NON_PARTICIPATING",
    "214": "NON_PARTICIPATING",
    "215": "NON_PARTICIPATING",
    "216": "NON_PARTICIPATING",
    "217": "NON_PARTICIPATING",
    "218": "SUCCESS",
    "219": "NON_PARTICIPATING",
    "22": "SUCCESS",
    "220": "NON_PARTICIPATING",
    "221": "NON_PARTICIPATING",
    "222": "NON_PARTICIPATING",
    "223": "NON_PARTICIPATING",
    "224": "NON_PARTICIPATING",
    "225": "NON_PARTICIPATING",
    "226": "NON_PARTICIPATING",
    "227": "NON_PARTICIPATING",
    "228": "NON_PARTICIPATING",
    "229": "NON_PARTICIPATING",
    "23": "NON_PARTICIPATING",
    "230": "NON_PARTICIPATING",
    "231": "NON_PARTICIPATING",
    "232": "NON_PARTICIPATING",
    "233": "NON_PARTICIPATING",
    "234": "NON_PARTICIPATING",
    "235": "NON_PARTICIPATING",
    "236": "NON_PARTICIPATING",
    "237": "NON_PARTICIPATING",
    "238": "NON_PARTICIPATING",
    "239": "NON_PARTICIPATING",
    "24": "NON_PARTICIPATING",
    "240": "NON_PARTICIPATING",
    "241": "SUCCESS",
    "242": "NON_PARTICIPATING",
    "243": "NON_PARTICIPATING",
    "244": "NON_PARTICIPATING",
    "245": "NON_PARTICIPATING",
    "246": "NON_PARTICIPATING",
    "247": "NON_PARTICIPATING",
    "248": "NON_PARTICIPATING",
    "249": "NON_PARTICIPATING",
    "25": "SUCCESS",
    "250": "NON_PARTICIPATING",
    "251": "NON_PARTICIPATING",
    "252": "NON_PARTICIPATING",
    "253": "NON_PARTICIPATING",
    "254": "NON_PARTICIPATING",
    "255": "NON_PARTICIPATING",
    "26": "NON_PARTICIPATING",
    "27": "NON_PARTICIPATING",
    "28": "NON_PARTICIPATING",
    "29": "NON_PARTICIPATING",
    "3": "NON_PARTICIPATING",
    "30": "NON_PARTICIPATING",
    "31": "NON_PARTICIPATING",
    "32": "NON_PARTICIPATING",
    "33": "NON_PARTICIPATING",
    "34": "NON_PARTICIPATING",
    "35": "NON_PARTICIPATING",
    "36": "NON_PARTICIPATING",
    "37": "SUCCESS",
    "38": "NON_PARTICIPATING",
    "39": "SUCCESS",
    "4": "SUCCESS",
    "40": "NON_PARTICIPATING",
    "41": "NON_PARTICIPATING",
    "42": "NON_PARTICIPATING",
    "43": "NON_PARTICIPATING",
    "44": "NON_PARTICIPATING",
    "45": "NON_PARTICIPATING",
    "46": "NON_PARTICIPATING",
    "47": "NON_PARTICIPATING",
    "48": "NON_PARTICIPATING",
    "49": "NON_PARTICIPATING",
    "5": "NON_PARTICIPATING",
    "50": "SUCCESS",
    "51": "NON_PARTICIPATING",
    "52": "NON_PARTICIPATING",
    "53": "NON_PARTICIPATING",
    "54": "NON_PARTICIPATING",
    "55": "NON_PARTICIPATING",
    "56": "NON_PARTICIPATING",
    "57": "SUCCESS",
    "58": "NON_PARTICIPATING",
    "59": "NON_PARTICIPATING",
    "6": "NON_PARTICIPATING",
    "60": "NON_PARTICIPATING",
    "61": "NON_PARTICIPATING",
    "62": "NON_PARTICIPATING",
    "63": "NON_PARTICIPATING",
    "64": "NON_PARTICIPATING",
    "65": "SUCCESS",
    "66": "NON_PARTICIPATING",
    "67": "NON_PARTICIPATING",
    "68": "SUCCESS",
    "69": "NON_PARTICIPATING",
    "7": "NON_PARTICIPATING",
    "70": "NON_PARTICIPATING",
    "71": "NON_PARTICIPATING",
    "72": "SUCCESS",
    "73": "SUCCESS",
    "74": "NON_PARTICIPATING",
    "75": "NON_PARTICIPATING",
    "76": "SUCCESS",
    "77": "NON_PARTICIPATING",
    "78": "NON_PARTICIPATING",
    "79": "NON_PARTICIPATING",
    "8": "NON_PARTICIPATING",
    "80": "SUCCESS",
    "81": "NON_PARTICIPATING",
    "82": "NON_PARTICIPATING",
    "83": "NON_PARTICIPATING",
    "84": "NON_PARTICIPATING",
    "85": "NON_PARTICIPATING",
    "86": "NON_PARTICIPATING",
    "87": "NON_PARTICIPATING",
    "88": "NON_PARTICIPATING",
    "89": "NON_PARTICIPATING",
    "9": "NON_PARTICIPATING",
    "90": "NON_PARTICIPATING",
    "91": "SUCCESS",
    "92": "NON_PARTICIPATING",
    "93": "NON_PARTICIPATING",
    "94": "NON_PARTICIPATING",
    "95": "NON_PARTICIPATING",
    "96": "NON_PARTICIPATING",
    "97": "NON_PARTICIPATING",
    "98": "NON_PARTICIPATING",
    "99": "SUCCESS"
  },
  "architectures": [
    "GPTOptim"
  ],
  "attn_pdrop": 0.1,
  "auto_map": {
    "AutoConfig": "distributed/optimized-gpt2-500m--configuration_gpt_optimized.GPTOptimConfig",
    "AutoModelForCausalLM": "distributed/optimized-gpt2-500m--modeling_gpt_optimized.GPTOptim"
  },
  "block_size": 1024,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt_optimized",
  "n_embd": 1280,
  "n_head": 32,
  "n_inner": null,
  "n_layer": 48,
  "n_positions": 1024,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "torch_dtype": "float32",
  "transformers_version": "4.39.3",
  "use_cache": true,
  "vocab_size": 50257
}