fcakyon
commited on
Commit
•
a80d4fd
1
Parent(s):
cd4f2dc
initial commit
Browse files- README.md +70 -0
- added_tokens.json +1 -0
- config.json +29 -0
- experiment_config.yaml +107 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- spiece.model +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- training_args.bin +3 -0
README.md
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language: tr
|
3 |
+
datasets:
|
4 |
+
- tquad1
|
5 |
+
- tquad2
|
6 |
+
- xquad
|
7 |
+
tags:
|
8 |
+
- answer-extraction
|
9 |
+
- question-answering
|
10 |
+
- question-generation
|
11 |
+
- text-generation
|
12 |
+
- text2text-generation
|
13 |
+
license: cc-by-4.0
|
14 |
+
---
|
15 |
+
|
16 |
+
# mt5-base for Turkish Question Generation
|
17 |
+
Automated question generation and question answering using text-to-text transformers by OBSS AI.
|
18 |
+
|
19 |
+
```python
|
20 |
+
from core.api import GenerationAPI
|
21 |
+
generation_api = GenerationAPI(model_url_or_path='mt5-base-3task-highlight-combined3')
|
22 |
+
```
|
23 |
+
|
24 |
+
## Overview
|
25 |
+
**Language model:** mt5-base
|
26 |
+
**Language:** Turkish
|
27 |
+
**Downstream-task:** Extractive QA/QG, Answer Extraction
|
28 |
+
**Training data:** TQuADv2-train
|
29 |
+
**Code:** https://github.com/obss/turkish-question-generation
|
30 |
+
**Paper:** https://arxiv.org/abs/2111.06476
|
31 |
+
|
32 |
+
## Hyperparameters
|
33 |
+
```
|
34 |
+
batch_size = 256
|
35 |
+
n_epochs = 15
|
36 |
+
base_LM_model = "mt5-base"
|
37 |
+
max_source_length = 512
|
38 |
+
max_target_length = 64
|
39 |
+
learning_rate = 1.0e-3
|
40 |
+
task_lisst = ["qa", "qg", "ans_ext"]
|
41 |
+
qg_format = "highlight"
|
42 |
+
```
|
43 |
+
|
44 |
+
## Performance
|
45 |
+
Refer to [paper](https://arxiv.org/abs/2111.06476).
|
46 |
+
|
47 |
+
## Usage 🔥
|
48 |
+
```python
|
49 |
+
from core.api import GenerationAPI
|
50 |
+
generation_api = GenerationAPI('mt5-base-3task-highlight-combined3')
|
51 |
+
|
52 |
+
context = """
|
53 |
+
Bu modelin eğitiminde, Türkçe soru cevap verileri kullanılmıştır.
|
54 |
+
Paylaşılan model kullanılarak, Türkçe metinlerden otomatik olarak soru ve cevap
|
55 |
+
üretilebilir. Bu proje ile paylaşılan kaynak kodu ile Türkçe Soru Üretme
|
56 |
+
/ Soru Cevaplama konularında yeni akademik çalışmalar yapılabilir.
|
57 |
+
Projenin detaylarına paylaşılan Github ve Arxiv linklerinden ulaşılabilir.
|
58 |
+
"""
|
59 |
+
|
60 |
+
# a) Fully Automated Question Generation
|
61 |
+
generation_api(task='question-generation', context=context)
|
62 |
+
|
63 |
+
# b) Question Answering
|
64 |
+
question = "Bu model ne işe yarar?"
|
65 |
+
generation_api(task='question-answering', context=context, question=question)
|
66 |
+
|
67 |
+
# b) Answer Extraction
|
68 |
+
generation_api(task='answer-extraction', context=context)
|
69 |
+
```
|
70 |
+
|
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<sep>": 250100, "<hl>": 250101}
|
config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "google/mt5-base",
|
3 |
+
"architectures": [
|
4 |
+
"MT5ForConditionalGeneration"
|
5 |
+
],
|
6 |
+
"d_ff": 2048,
|
7 |
+
"d_kv": 64,
|
8 |
+
"d_model": 768,
|
9 |
+
"decoder_start_token_id": 0,
|
10 |
+
"dropout_rate": 0.1,
|
11 |
+
"eos_token_id": 1,
|
12 |
+
"feed_forward_proj": "gated-gelu",
|
13 |
+
"initializer_factor": 1.0,
|
14 |
+
"is_encoder_decoder": true,
|
15 |
+
"layer_norm_epsilon": 1e-06,
|
16 |
+
"model_type": "mt5",
|
17 |
+
"num_decoder_layers": 12,
|
18 |
+
"num_heads": 12,
|
19 |
+
"num_layers": 12,
|
20 |
+
"output_past": true,
|
21 |
+
"pad_token_id": 0,
|
22 |
+
"relative_attention_num_buckets": 32,
|
23 |
+
"tie_word_embeddings": false,
|
24 |
+
"tokenizer_class": "T5Tokenizer",
|
25 |
+
"torch_dtype": "float32",
|
26 |
+
"transformers_version": "4.10.0.dev0",
|
27 |
+
"use_cache": true,
|
28 |
+
"vocab_size": 250102
|
29 |
+
}
|
experiment_config.yaml
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_n_gpu: 1
|
2 |
+
adafactor: false
|
3 |
+
adam_beta1: 0.9
|
4 |
+
adam_beta2: 0.999
|
5 |
+
adam_epsilon: 1.0e-08
|
6 |
+
cache_dir: null
|
7 |
+
dataloader_drop_last: false
|
8 |
+
dataloader_num_workers: 0
|
9 |
+
dataloader_pin_memory: true
|
10 |
+
ddp_find_unused_parameters: null
|
11 |
+
debug: []
|
12 |
+
deepspeed: null
|
13 |
+
disable_tqdm: false
|
14 |
+
do_eval: true
|
15 |
+
do_predict: false
|
16 |
+
do_train: true
|
17 |
+
eval_accumulation_steps: 1
|
18 |
+
eval_dataset_list:
|
19 |
+
- tquad2-valid
|
20 |
+
- xquad.tr
|
21 |
+
eval_steps: 300
|
22 |
+
evaluation_strategy: &id001 !!python/object/apply:transformers.trainer_utils.IntervalStrategy
|
23 |
+
- steps
|
24 |
+
fp16: false
|
25 |
+
fp16_backend: auto
|
26 |
+
fp16_full_eval: false
|
27 |
+
fp16_opt_level: O1
|
28 |
+
freeze_embeddings: false
|
29 |
+
gradient_accumulation_steps: 8
|
30 |
+
greater_is_better: null
|
31 |
+
group_by_length: false
|
32 |
+
ignore_data_skip: false
|
33 |
+
label_names: null
|
34 |
+
label_smoothing_factor: 0
|
35 |
+
learning_rate: 0.001
|
36 |
+
length_column_name: length
|
37 |
+
load_best_model_at_end: false
|
38 |
+
local_rank: -1
|
39 |
+
log_level: -1
|
40 |
+
log_level_replica: -1
|
41 |
+
log_on_each_node: true
|
42 |
+
logging_dir: runs/mt5-base/mt5base-3task-highlight-combined3/runs/Dec02_01-35-48_palamut3.yonetim
|
43 |
+
logging_first_step: false
|
44 |
+
logging_steps: 500
|
45 |
+
logging_strategy: *id001
|
46 |
+
lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType
|
47 |
+
- linear
|
48 |
+
max_grad_norm: 1.0
|
49 |
+
max_source_length: 512
|
50 |
+
max_steps: -1
|
51 |
+
max_target_length: 64
|
52 |
+
metric_for_best_model: null
|
53 |
+
model_name_or_path: google/mt5-base
|
54 |
+
model_type: mt5
|
55 |
+
mp_parameters: ''
|
56 |
+
mt5_qg_format: highlight
|
57 |
+
mt5_task_list:
|
58 |
+
- qa
|
59 |
+
- qg
|
60 |
+
- ans_ext
|
61 |
+
neptune_api_token: null
|
62 |
+
neptune_project: null
|
63 |
+
neptune_run: null
|
64 |
+
no_cuda: false
|
65 |
+
num_train_epochs: 15
|
66 |
+
output_dir: runs/mt5-base/mt5base-3task-highlight-combined3
|
67 |
+
overwrite_output_dir: false
|
68 |
+
past_index: -1
|
69 |
+
per_device_eval_batch_size: 32
|
70 |
+
per_device_train_batch_size: 32
|
71 |
+
per_gpu_eval_batch_size: null
|
72 |
+
per_gpu_train_batch_size: null
|
73 |
+
prediction_loss_only: false
|
74 |
+
prepare_data: true
|
75 |
+
push_to_hub: false
|
76 |
+
push_to_hub_model_id: mt5base-3task-highlight-combined3
|
77 |
+
push_to_hub_organization: null
|
78 |
+
push_to_hub_token: null
|
79 |
+
remove_unused_columns: false
|
80 |
+
report_to:
|
81 |
+
- wandb
|
82 |
+
resume_from_checkpoint: null
|
83 |
+
run_name: mt5base-3task-highlight-combined3
|
84 |
+
save_on_each_node: false
|
85 |
+
save_steps: 500
|
86 |
+
save_strategy: *id001
|
87 |
+
save_total_limit: 1
|
88 |
+
seed: 42
|
89 |
+
sharded_ddp: []
|
90 |
+
skip_memory_metrics: true
|
91 |
+
tokenizer_path: tokenizers/mt5-base
|
92 |
+
tpu_metrics_debug: false
|
93 |
+
tpu_num_cores: null
|
94 |
+
train_dataset_list:
|
95 |
+
- tquad2-train
|
96 |
+
- tquad2-valid
|
97 |
+
- xquad.tr
|
98 |
+
train_file_path: data/train_data.pt
|
99 |
+
use_legacy_prediction_loop: false
|
100 |
+
valid_dataset_list:
|
101 |
+
- tquad2-valid
|
102 |
+
valid_file_path: data/valid_data.pt
|
103 |
+
wandb_id: null
|
104 |
+
wandb_project: turkish-qa-qg
|
105 |
+
warmup_ratio: 0.0
|
106 |
+
warmup_steps: 0
|
107 |
+
weight_decay: 0.0
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6294e5764270f9fabd04bc143eba0a9021ddbaea8d53e5965b98d3dd215c86c
|
3 |
+
size 2329645913
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
|
spiece.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
|
3 |
+
size 4309802
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 0, "additional_special_tokens": null, "special_tokens_map_file": "/home/patrick/.cache/torch/transformers/685ac0ca8568ec593a48b61b0a3c272beee9bc194a3c7241d15dcadb5f875e53.f76030f3ec1b96a8199b2593390c610e76ca8028ef3d24680000619ffb646276", "name_or_path": "tokenizers/mt5-base", "sp_model_kwargs": {}, "tokenizer_class": "T5Tokenizer"}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e70335cd934710e27d3c852faf3390b1a97b781da155fe2485387db8dfad1a6
|
3 |
+
size 3119
|