raygx commited on
Commit
047a25b
1 Parent(s): 2a58e25

Upload model

Browse files
Files changed (4) hide show
  1. README.md +8 -6
  2. config.json +4 -8
  3. generation_config.json +3 -3
  4. tf_model.h5 +2 -2
README.md CHANGED
@@ -1,17 +1,19 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_keras_callback
4
  model-index:
5
- - name: distilGPTBhai
6
  results: []
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information Keras had access to. You should
10
  probably proofread and complete it, then remove this comment. -->
11
 
12
- # distilGPTBhai
13
 
14
- This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
 
17
 
@@ -32,8 +34,8 @@ More information needed
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
- - optimizer: {'name': 'AdamWeightDecay', 'learning_rate': {'class_name': 'WarmUp', 'config': {'initial_learning_rate': 5e-05, 'decay_schedule_fn': {'class_name': 'PolynomialDecay', 'config': {'initial_learning_rate': 5e-05, 'decay_steps': 7623, 'end_learning_rate': 0.0, 'power': 1.0, 'cycle': False, 'name': None}, '__passive_serialization__': True}, 'warmup_steps': 1000, 'power': 1.0, 'name': None}}, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08, 'amsgrad': False, 'weight_decay_rate': 0.01}
36
- - training_precision: float32
37
 
38
  ### Training results
39
 
@@ -43,5 +45,5 @@ The following hyperparameters were used during training:
43
 
44
  - Transformers 4.31.0
45
  - TensorFlow 2.12.0
46
- - Datasets 2.13.1
47
  - Tokenizers 0.13.3
 
1
  ---
2
+ license: apache-2.0
3
+ base_model: distilgpt2
4
  tags:
5
  - generated_from_keras_callback
6
  model-index:
7
+ - name: distilGPT-Nepali
8
  results: []
9
  ---
10
 
11
  <!-- This model card has been generated automatically according to the information Keras had access to. You should
12
  probably proofread and complete it, then remove this comment. -->
13
 
14
+ # distilGPT-Nepali
15
 
16
+ This model is a fine-tuned version of [distilgpt2](https://huggingface.co/distilgpt2) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
 
19
 
 
34
  ### Training hyperparameters
35
 
36
  The following hyperparameters were used during training:
37
+ - optimizer: {'name': 'AdamWeightDecay', 'learning_rate': {'class_name': 'WarmUp', 'config': {'initial_learning_rate': 2e-05, 'decay_schedule_fn': {'class_name': 'PolynomialDecay', 'config': {'initial_learning_rate': 2e-05, 'decay_steps': 41670, 'end_learning_rate': 0.0, 'power': 1.0, 'cycle': False, 'name': None}, '__passive_serialization__': True}, 'warmup_steps': 1000, 'power': 1.0, 'name': None}}, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08, 'amsgrad': False, 'weight_decay_rate': 0.02}
38
+ - training_precision: mixed_bfloat16
39
 
40
  ### Training results
41
 
 
45
 
46
  - Transformers 4.31.0
47
  - TensorFlow 2.12.0
48
+ - Datasets 2.14.4
49
  - Tokenizers 0.13.3
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "raygx/distilGPTBhai",
3
  "_num_labels": 1,
4
  "activation_function": "gelu_new",
5
  "architectures": [
@@ -10,21 +10,17 @@
10
  "embd_pdrop": 0.1,
11
  "eos_token_id": 0,
12
  "id2label": {
13
- "0": "NEUTRAL",
14
- "1": "POSITIVE",
15
- "2": "NEGATIVE"
16
  },
17
  "initializer_range": 0.02,
18
  "label2id": {
19
- "NEGATIVE": 2,
20
- "NEUTRAL": 0,
21
- "POSITIVE": 1
22
  },
23
  "layer_norm_epsilon": 1e-05,
24
  "model_type": "gpt2",
25
  "n_ctx": 1024,
26
  "n_embd": 768,
27
- "n_head": 6,
28
  "n_inner": null,
29
  "n_layer": 6,
30
  "n_positions": 1024,
 
1
  {
2
+ "_name_or_path": "distilgpt2",
3
  "_num_labels": 1,
4
  "activation_function": "gelu_new",
5
  "architectures": [
 
10
  "embd_pdrop": 0.1,
11
  "eos_token_id": 0,
12
  "id2label": {
13
+ "0": "LABEL_0"
 
 
14
  },
15
  "initializer_range": 0.02,
16
  "label2id": {
17
+ "LABEL_0": 0
 
 
18
  },
19
  "layer_norm_epsilon": 1e-05,
20
  "model_type": "gpt2",
21
  "n_ctx": 1024,
22
  "n_embd": 768,
23
+ "n_head": 12,
24
  "n_inner": null,
25
  "n_layer": 6,
26
  "n_positions": 1024,
generation_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 1,
4
- "eos_token_id": 2,
5
- "pad_token_id": 3,
6
  "transformers_version": "4.31.0"
7
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 0,
5
+ "pad_token_id": 50002,
6
  "transformers_version": "4.31.0"
7
  }
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9faeaaa945c5f711617c85af1c061d07631d8c1e33ca18435c8ec0a0320de85d
3
- size 326965184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45c7beec991e6a29d32863c136116dd37bed53440e955f44d098994a1f362d3d
3
+ size 326965192