wissamantoun commited on
Commit
76ae46f
1 Parent(s): 18d0924

Update run_pretraining.py and configuration_aragpt2.py

Browse files
Files changed (2) hide show
  1. README.md +19 -1
  2. configuration_aragpt2.py +1 -1
README.md CHANGED
@@ -86,7 +86,25 @@ python create_pretraining_data.py
86
 
87
  Finetuning:
88
  ```bash
89
- python3 run_pretraining.py \\r\n --input_file="gs://<GS_BUCKET>/pretraining_data/*" \\r\n --output_dir="gs://<GS_BUCKET>/pretraining_model/" \\r\n --config_file="config/small_hparams.json" \\r\n --batch_size=128 \\r\n --eval_batch_size=8 \\r\n --num_train_steps= \\r\n --num_warmup_steps= \\r\n --learning_rate= \\r\n --save_checkpoints_steps= \\r\n --max_seq_length=1024 \\r\n --max_eval_steps= \\r\n --optimizer="lamb" \\r\n --iterations_per_loop=5000 \\r\n --keep_checkpoint_max=10 \\r\n --use_tpu=True \\r\n --tpu_name=<TPU NAME> \\r\n --do_train=True \\r\n --do_eval=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  ```
91
  # Model Sizes
92
 
 
86
 
87
  Finetuning:
88
  ```bash
89
+ python3 run_pretraining.py \
90
+ --input_file="gs://<GS_BUCKET>/pretraining_data/*" \
91
+ --output_dir="gs://<GS_BUCKET>/pretraining_model/" \
92
+ --config_file="config/small_hparams.json" \
93
+ --batch_size=128 \
94
+ --eval_batch_size=8 \
95
+ --num_train_steps= \
96
+ --num_warmup_steps= \
97
+ --learning_rate= \
98
+ --save_checkpoints_steps= \
99
+ --max_seq_length=1024 \
100
+ --max_eval_steps= \
101
+ --optimizer="lamb" \
102
+ --iterations_per_loop=5000 \
103
+ --keep_checkpoint_max=10 \
104
+ --use_tpu=True \
105
+ --tpu_name=<TPU NAME> \
106
+ --do_train=True \
107
+ --do_eval=False
108
  ```
109
  # Model Sizes
110
 
configuration_aragpt2.py CHANGED
@@ -131,7 +131,7 @@ class AraGPT2Config(PretrainedConfig):
131
  n_layer=12,
132
  n_head=12,
133
  n_inner=None,
134
- activation_function="gelu",
135
  resid_pdrop=0.1,
136
  embd_pdrop=0.1,
137
  attn_pdrop=0.1,
 
131
  n_layer=12,
132
  n_head=12,
133
  n_inner=None,
134
+ activation_function="gelu_new",
135
  resid_pdrop=0.1,
136
  embd_pdrop=0.1,
137
  attn_pdrop=0.1,