fcakyon commited on
Commit
a80d4fd
1 Parent(s): cd4f2dc

initial commit

Browse files
README.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: tr
3
+ datasets:
4
+ - tquad1
5
+ - tquad2
6
+ - xquad
7
+ tags:
8
+ - answer-extraction
9
+ - question-answering
10
+ - question-generation
11
+ - text-generation
12
+ - text2text-generation
13
+ license: cc-by-4.0
14
+ ---
15
+
16
+ # mt5-base for Turkish Question Generation
17
+ Automated question generation and question answering using text-to-text transformers by OBSS AI.
18
+
19
+ ```python
20
+ from core.api import GenerationAPI
21
+ generation_api = GenerationAPI(model_url_or_path='mt5-base-3task-highlight-combined3')
22
+ ```
23
+
24
+ ## Overview
25
+ **Language model:** mt5-base
26
+ **Language:** Turkish
27
+ **Downstream-task:** Extractive QA/QG, Answer Extraction
28
+ **Training data:** TQuADv2-train
29
+ **Code:** https://github.com/obss/turkish-question-generation
30
+ **Paper:** https://arxiv.org/abs/2111.06476
31
+
32
+ ## Hyperparameters
33
+ ```
34
+ batch_size = 256
35
+ n_epochs = 15
36
+ base_LM_model = "mt5-base"
37
+ max_source_length = 512
38
+ max_target_length = 64
39
+ learning_rate = 1.0e-3
40
+ task_lisst = ["qa", "qg", "ans_ext"]
41
+ qg_format = "highlight"
42
+ ```
43
+
44
+ ## Performance
45
+ Refer to [paper](https://arxiv.org/abs/2111.06476).
46
+
47
+ ## Usage 🔥
48
+ ```python
49
+ from core.api import GenerationAPI
50
+ generation_api = GenerationAPI('mt5-base-3task-highlight-combined3')
51
+
52
+ context = """
53
+ Bu modelin eğitiminde, Türkçe soru cevap verileri kullanılmıştır.
54
+ Paylaşılan model kullanılarak, Türkçe metinlerden otomatik olarak soru ve cevap
55
+ üretilebilir. Bu proje ile paylaşılan kaynak kodu ile Türkçe Soru Üretme
56
+ / Soru Cevaplama konularında yeni akademik çalışmalar yapılabilir.
57
+ Projenin detaylarına paylaşılan Github ve Arxiv linklerinden ulaşılabilir.
58
+ """
59
+
60
+ # a) Fully Automated Question Generation
61
+ generation_api(task='question-generation', context=context)
62
+
63
+ # b) Question Answering
64
+ question = "Bu model ne işe yarar?"
65
+ generation_api(task='question-answering', context=context, question=question)
66
+
67
+ # b) Answer Extraction
68
+ generation_api(task='answer-extraction', context=context)
69
+ ```
70
+
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<sep>": 250100, "<hl>": 250101}
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mt5-base",
3
+ "architectures": [
4
+ "MT5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 768,
9
+ "decoder_start_token_id": 0,
10
+ "dropout_rate": 0.1,
11
+ "eos_token_id": 1,
12
+ "feed_forward_proj": "gated-gelu",
13
+ "initializer_factor": 1.0,
14
+ "is_encoder_decoder": true,
15
+ "layer_norm_epsilon": 1e-06,
16
+ "model_type": "mt5",
17
+ "num_decoder_layers": 12,
18
+ "num_heads": 12,
19
+ "num_layers": 12,
20
+ "output_past": true,
21
+ "pad_token_id": 0,
22
+ "relative_attention_num_buckets": 32,
23
+ "tie_word_embeddings": false,
24
+ "tokenizer_class": "T5Tokenizer",
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.10.0.dev0",
27
+ "use_cache": true,
28
+ "vocab_size": 250102
29
+ }
experiment_config.yaml ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _n_gpu: 1
2
+ adafactor: false
3
+ adam_beta1: 0.9
4
+ adam_beta2: 0.999
5
+ adam_epsilon: 1.0e-08
6
+ cache_dir: null
7
+ dataloader_drop_last: false
8
+ dataloader_num_workers: 0
9
+ dataloader_pin_memory: true
10
+ ddp_find_unused_parameters: null
11
+ debug: []
12
+ deepspeed: null
13
+ disable_tqdm: false
14
+ do_eval: true
15
+ do_predict: false
16
+ do_train: true
17
+ eval_accumulation_steps: 1
18
+ eval_dataset_list:
19
+ - tquad2-valid
20
+ - xquad.tr
21
+ eval_steps: 300
22
+ evaluation_strategy: &id001 !!python/object/apply:transformers.trainer_utils.IntervalStrategy
23
+ - steps
24
+ fp16: false
25
+ fp16_backend: auto
26
+ fp16_full_eval: false
27
+ fp16_opt_level: O1
28
+ freeze_embeddings: false
29
+ gradient_accumulation_steps: 8
30
+ greater_is_better: null
31
+ group_by_length: false
32
+ ignore_data_skip: false
33
+ label_names: null
34
+ label_smoothing_factor: 0
35
+ learning_rate: 0.001
36
+ length_column_name: length
37
+ load_best_model_at_end: false
38
+ local_rank: -1
39
+ log_level: -1
40
+ log_level_replica: -1
41
+ log_on_each_node: true
42
+ logging_dir: runs/mt5-base/mt5base-3task-highlight-combined3/runs/Dec02_01-35-48_palamut3.yonetim
43
+ logging_first_step: false
44
+ logging_steps: 500
45
+ logging_strategy: *id001
46
+ lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType
47
+ - linear
48
+ max_grad_norm: 1.0
49
+ max_source_length: 512
50
+ max_steps: -1
51
+ max_target_length: 64
52
+ metric_for_best_model: null
53
+ model_name_or_path: google/mt5-base
54
+ model_type: mt5
55
+ mp_parameters: ''
56
+ mt5_qg_format: highlight
57
+ mt5_task_list:
58
+ - qa
59
+ - qg
60
+ - ans_ext
61
+ neptune_api_token: null
62
+ neptune_project: null
63
+ neptune_run: null
64
+ no_cuda: false
65
+ num_train_epochs: 15
66
+ output_dir: runs/mt5-base/mt5base-3task-highlight-combined3
67
+ overwrite_output_dir: false
68
+ past_index: -1
69
+ per_device_eval_batch_size: 32
70
+ per_device_train_batch_size: 32
71
+ per_gpu_eval_batch_size: null
72
+ per_gpu_train_batch_size: null
73
+ prediction_loss_only: false
74
+ prepare_data: true
75
+ push_to_hub: false
76
+ push_to_hub_model_id: mt5base-3task-highlight-combined3
77
+ push_to_hub_organization: null
78
+ push_to_hub_token: null
79
+ remove_unused_columns: false
80
+ report_to:
81
+ - wandb
82
+ resume_from_checkpoint: null
83
+ run_name: mt5base-3task-highlight-combined3
84
+ save_on_each_node: false
85
+ save_steps: 500
86
+ save_strategy: *id001
87
+ save_total_limit: 1
88
+ seed: 42
89
+ sharded_ddp: []
90
+ skip_memory_metrics: true
91
+ tokenizer_path: tokenizers/mt5-base
92
+ tpu_metrics_debug: false
93
+ tpu_num_cores: null
94
+ train_dataset_list:
95
+ - tquad2-train
96
+ - tquad2-valid
97
+ - xquad.tr
98
+ train_file_path: data/train_data.pt
99
+ use_legacy_prediction_loop: false
100
+ valid_dataset_list:
101
+ - tquad2-valid
102
+ valid_file_path: data/valid_data.pt
103
+ wandb_id: null
104
+ wandb_project: turkish-qa-qg
105
+ warmup_ratio: 0.0
106
+ warmup_steps: 0
107
+ weight_decay: 0.0
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6294e5764270f9fabd04bc143eba0a9021ddbaea8d53e5965b98d3dd215c86c
3
+ size 2329645913
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 0, "additional_special_tokens": null, "special_tokens_map_file": "/home/patrick/.cache/torch/transformers/685ac0ca8568ec593a48b61b0a3c272beee9bc194a3c7241d15dcadb5f875e53.f76030f3ec1b96a8199b2593390c610e76ca8028ef3d24680000619ffb646276", "name_or_path": "tokenizers/mt5-base", "sp_model_kwargs": {}, "tokenizer_class": "T5Tokenizer"}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e70335cd934710e27d3c852faf3390b1a97b781da155fe2485387db8dfad1a6
3
+ size 3119