Merge branch 'main' of https://huggingface.co/pszemraj/t5-v1_1-base-finetuned-lfqa into main
Browse files
README.md
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
language:
|
4 |
+
- en
|
5 |
+
tags:
|
6 |
+
- t5
|
7 |
+
- qa
|
8 |
+
- askscience
|
9 |
+
- lfqa
|
10 |
+
- information retrieval
|
11 |
+
datasets:
|
12 |
+
- vblagoje/lfqa
|
13 |
+
metrics:
|
14 |
+
- rouge
|
15 |
+
widget:
|
16 |
+
- text: "why aren't there more planets in our solar system?"
|
17 |
+
example_title: "solar system"
|
18 |
+
- text: "question: what is a probability distribution? context: I am just learning about statistics."
|
19 |
+
example_title: "probability distribution"
|
20 |
+
- text: "question: how does exercise help us lose weight? context: I started working out two weeks ago and already feel a lot better, and started to think about it and became deeply confused."
|
21 |
+
example_title: "pumpen"
|
22 |
+
- text: "what is a neural network?"
|
23 |
+
example_title: "deep learning"
|
24 |
+
- text: "How can computers understand human language?"
|
25 |
+
example_title: "NLP"
|
26 |
+
|
27 |
+
inference:
|
28 |
+
parameters:
|
29 |
+
max_length: 64
|
30 |
+
no_repeat_ngram_size: 2
|
31 |
+
encoder_no_repeat_ngram_size: 4
|
32 |
+
repetition_penalty: 3.51
|
33 |
+
length_penalty: 0.8
|
34 |
+
num_beams: 4
|
35 |
+
early_stopping: True
|
36 |
+
|
37 |
+
---
|
38 |
+
|
39 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
40 |
+
should probably proofread and complete it, then remove this comment. -->
|
41 |
+
|
42 |
+
# checkpoints
|
43 |
+
|
44 |
+
This model is a fine-tuned version of [google/t5-v1_1-base](https://huggingface.co/google/t5-v1_1-base) on the `vblagoje/lfqa` dataset, with training duration of 2 epochs. For a (_somewhat_) apples-to-apples comparison with [t5-base](https://huggingface.co/pszemraj/t5-base-askscience) on the standard eli5 dataset.
|
45 |
+
|
46 |
+
## Model description
|
47 |
+
|
48 |
+
More information needed
|
49 |
+
|
50 |
+
## Intended uses & limitations
|
51 |
+
|
52 |
+
More information needed
|
53 |
+
|
54 |
+
## Training and evaluation data
|
55 |
+
|
56 |
+
More information needed
|
57 |
+
|
58 |
+
## Training procedure
|
59 |
+
|
60 |
+
### Training hyperparameters
|
61 |
+
|
62 |
+
The following hyperparameters were used during training:
|
63 |
+
- learning_rate: 4e-05
|
64 |
+
- train_batch_size: 8
|
65 |
+
- eval_batch_size: 8
|
66 |
+
- seed: 42
|
67 |
+
- distributed_type: multi-GPU
|
68 |
+
- gradient_accumulation_steps: 2
|
69 |
+
- total_train_batch_size: 16
|
70 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
71 |
+
- lr_scheduler_type: cosine
|
72 |
+
- num_epochs: 2
|
73 |
+
|
74 |
+
### Training results
|
75 |
+
|
76 |
+
|
77 |
+
|
78 |
+
### Framework versions
|
79 |
+
|
80 |
+
- Transformers 4.16.2
|
81 |
+
- Pytorch 1.10.0+cu113
|
82 |
+
- Datasets 1.18.3
|
83 |
+
- Tokenizers 0.11.0
|
t5-v1_1-base-finetuned-lfqa_training_metadata.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/content/checkpoints/", "overwrite_output_dir": true, "do_train": false, "do_eval": false, "do_predict": false, "evaluation_strategy": "no", "prediction_loss_only": false, "per_device_train_batch_size": 8, "per_device_eval_batch_size": 8, "per_gpu_train_batch_size": "None", "per_gpu_eval_batch_size": "None", "gradient_accumulation_steps": 2, "eval_accumulation_steps": 2, "learning_rate": 4e-05, "weight_decay": 0.01, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08, "max_grad_norm": 1, "num_train_epochs": 2, "max_steps": -1, "lr_scheduler_type": "cosine", "warmup_ratio": 0, "warmup_steps": 0, "log_level": -1, "log_level_replica": -1, "log_on_each_node": true, "logging_dir": "/content/logs", "logging_strategy": "steps", "logging_first_step": false, "logging_steps": 15, "logging_nan_inf_filter": true, "save_strategy": "epoch", "save_steps": 500, "save_total_limit": 1, "save_on_each_node": false, "no_cuda": false, "seed": 42, "bf16": false, "fp16": true, "fp16_opt_level": "O1", "half_precision_backend": "amp", "bf16_full_eval": false, "fp16_full_eval": true, "tf32": "None", "local_rank": 0, "xpu_backend": "None", "tpu_num_cores": "None", "tpu_metrics_debug": false, "debug": "[]", "dataloader_drop_last": false, "eval_steps": "None", "dataloader_num_workers": 0, "past_index": -1, "run_name": "/content/checkpoints/", "disable_tqdm": false, "remove_unused_columns": true, "label_names": "None", "load_best_model_at_end": false, "metric_for_best_model": "None", "greater_is_better": "None", "ignore_data_skip": false, "sharded_ddp": "[]", "deepspeed": "/content/ds_config_zero2.json", "label_smoothing_factor": 0.0, "optim": "adamw_hf", "adafactor": false, "group_by_length": false, "length_column_name": "length", "report_to": "['tensorboard']", "ddp_find_unused_parameters": "None", "ddp_bucket_cap_mb": "None", "dataloader_pin_memory": false, "skip_memory_metrics": false, "use_legacy_prediction_loop": false, "push_to_hub": false, "resume_from_checkpoint": "None", "hub_model_id": "t5-v1_1-base-finetuned-lfqa", "hub_strategy": "end", "hub_token": "<HUB_TOKEN>", "gradient_checkpointing": true, "fp16_backend": "auto", "push_to_hub_model_id": "None", "push_to_hub_organization": "None", "push_to_hub_token": "<PUSH_TO_HUB_TOKEN>", "_n_gpu": 1, "mp_parameters": "", "sortish_sampler": false, "predict_with_generate": false, "generation_max_length": "None", "generation_num_beams": "None", "train_batch_size": 8, "eval_batch_size": 8, "configs_src": "t5-v1_1-base-finetuned-lfqa", "data_tag": "vblagoje/lfqa"}
|