File size: 1,052 Bytes
d8c32d6 bec7219 d8c32d6 1154b07 d8c32d6 1154b07 d8c32d6 1154b07 d8c32d6 af7da7a d8c32d6 1154b07 96d3180 1154b07 96d3180 1154b07 d8c32d6 65d0480 2fa47e3 86a8a9d 2fa47e3 86a8a9d 9151ed9 65d0480 0149ae5 bec7219 d2d081b 1154b07 d8c32d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
{
"_name_or_path": "./output-small",
"activation_function": "gelu_new",
"architectures": [
"GPT2LMHeadModel"
],
"attn_pdrop": 0.1,
"bos_token_id": 50256,
"embd_pdrop": 0.1,
"eos_token_id": 50256,
"gradient_checkpointing": false,
"initializer_range": 0.02,
"layer_norm_epsilon": 1e-05,
"model_type": "gpt2",
"n_ctx": 1024,
"n_embd": 768,
"n_head": 12,
"n_inner": null,
"n_layer": 12,
"n_positions": 1024,
"reorder_and_upcast_attn": false,
"resid_pdrop": 0.1,
"scale_attn_by_inverse_layer_idx": false,
"scale_attn_weights": true,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "cls_index",
"summary_use_proj": true,
"task_specific_params": {
"conversational": {
"do_sample": true,
"max_length": 1000,
"no_repeat_ngram_size": 4,
"temperature": 0.8,
"top_k": 100,
"top_p": 0.7
}
},
"torch_dtype": "float32",
"transformers_version": "4.20.1",
"use_cache": true,
"vocab_size": 50257
}
|