Persian Poetry First Version
Browse files- README.md +86 -0
- added_tokens.json +1 -0
- config.json +38 -0
- merges.txt +0 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tf_model.h5 +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- vocab.json +0 -0
README.md
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language: fa
|
3 |
+
license: apache-2.0
|
4 |
+
widget:
|
5 |
+
- text: "<s>رودکی<|startoftext|>"
|
6 |
+
- text: "<s>فردوسی<|startoftext|>"
|
7 |
+
- text: "<s>خیام<|startoftext|>"
|
8 |
+
- text: "<s>عطار<|startoftext|>"
|
9 |
+
- text: "<s>نظامی<|startoftext|>"
|
10 |
+
---
|
11 |
+
|
12 |
+
# Persian Poet GPT2
|
13 |
+
|
14 |
+
## Poets
|
15 |
+
The model can generate poetry based on your favorite poet, and you need to add one of the following lines as the input the box on the right side or follow the [fine-tuning notebook](https://colab.research.google.com/github/hooshvare/parsgpt/blob/master/notebooks/Persian_Poetry_FineTuning.ipynb).
|
16 |
+
|
17 |
+
<s>رودکی<|startoftext|>
|
18 |
+
<s>فردوسی<|startoftext|>
|
19 |
+
<s>کسایی<|startoftext|>
|
20 |
+
<s>ناصرخسرو<|startoftext|>
|
21 |
+
<s>منوچهری<|startoftext|>
|
22 |
+
<s>فرخی سیستانی<|startoftext|>
|
23 |
+
<s>مسعود سعد سلمان<|startoftext|>
|
24 |
+
<s>ابوسعید ابوالخیر<|startoftext|>
|
25 |
+
<s>باباطاهر<|startoftext|>
|
26 |
+
<s>فخرالدین اسعد گرگانی<|startoftext|>
|
27 |
+
<s>اسدی توسی<|startoftext|>
|
28 |
+
<s>هجویری<|startoftext|>
|
29 |
+
<s>خیام<|startoftext|>
|
30 |
+
<s>نظامی<|startoftext|>
|
31 |
+
<s>عطار<|startoftext|>
|
32 |
+
<s>سنایی<|startoftext|>
|
33 |
+
<s>خاقانی<|startoftext|>
|
34 |
+
<s>انوری<|startoftext|>
|
35 |
+
<s>عبدالواسع جبلی<|startoftext|>
|
36 |
+
<s>نصرالله منشی<|startoftext|>
|
37 |
+
<s>مهستی گنجوی<|startoftext|>
|
38 |
+
<s>باباافضل کاشانی<|startoftext|>
|
39 |
+
<s>مولوی<|startoftext|>
|
40 |
+
<s>سعدی<|startoftext|>
|
41 |
+
<s>خواجوی کرمانی<|startoftext|>
|
42 |
+
<s>عراقی<|startoftext|>
|
43 |
+
<s>سیف فرغانی<|startoftext|>
|
44 |
+
<s>حافظ<|startoftext|>
|
45 |
+
<s>اوحدی<|startoftext|>
|
46 |
+
<s>شیخ محمود شبستری<|startoftext|>
|
47 |
+
<s>عبید زاکانی<|startoftext|>
|
48 |
+
<s>امیرخسرو دهلوی<|startoftext|>
|
49 |
+
<s>سلمان ساوجی<|startoftext|>
|
50 |
+
<s>شاه نعمتالله ولی<|startoftext|>
|
51 |
+
<s>جامی<|startoftext|>
|
52 |
+
<s>هلالی جغتایی<|startoftext|>
|
53 |
+
<s>وحشی<|startoftext|>
|
54 |
+
<s>محتشم کاشانی<|startoftext|>
|
55 |
+
<s>شیخ بهایی<|startoftext|>
|
56 |
+
<s>عرفی<|startoftext|>
|
57 |
+
<s>رضیالدین آرتیمانی<|startoftext|>
|
58 |
+
<s>صائب تبریزی<|startoftext|>
|
59 |
+
<s>فیض کاشانی<|startoftext|>
|
60 |
+
<s>بیدل دهلوی<|startoftext|>
|
61 |
+
<s>هاتف اصفهانی<|startoftext|>
|
62 |
+
<s>فروغی بسطامی<|startoftext|>
|
63 |
+
<s>قاآنی<|startoftext|>
|
64 |
+
<s>ملا هادی سبزواری<|startoftext|>
|
65 |
+
<s>پروین اعتصامی<|startoftext|>
|
66 |
+
<s>ملکالشعرای بهار<|startoftext|>
|
67 |
+
<s>شهریار<|startoftext|>
|
68 |
+
<s>رهی معیری<|startoftext|>
|
69 |
+
<s>اقبال لاهوری<|startoftext|>
|
70 |
+
<s>خلیلالله خلیلی<|startoftext|>
|
71 |
+
<s>شاطرعباس صبوحی<|startoftext|>
|
72 |
+
<s>نیما یوشیج ( آوای آزاد )<|startoftext|>
|
73 |
+
<s>احمد شاملو<|startoftext|>
|
74 |
+
<s>سهراب سپهری<|startoftext|>
|
75 |
+
<s>فروغ فرخزاد<|startoftext|>
|
76 |
+
<s>سیمین بهبهانی<|startoftext|>
|
77 |
+
<s>مهدی اخوان ثالث<|startoftext|>
|
78 |
+
<s>محمدحسن بارق شفیعی<|startoftext|>
|
79 |
+
<s>شیون فومنی<|startoftext|>
|
80 |
+
<s>کامبیز صدیقی کسمایی<|startoftext|>
|
81 |
+
<s>بهرام سالکی<|startoftext|>
|
82 |
+
<s>عبدالقهّار عاصی<|startoftext|>
|
83 |
+
<s>اِ لیـــار (جبار محمدی )<|startoftext|>
|
84 |
+
|
85 |
+
## Questions?
|
86 |
+
Post a Github issue on the [ParsGPT2 Issues](https://github.com/hooshvare/parsgpt/issues) repo.
|
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"[N]": 42000}
|
config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"architectures": [
|
4 |
+
"GPT2LMHeadModel"
|
5 |
+
],
|
6 |
+
"attn_pdrop": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"embd_pdrop": 0.1,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"gradient_checkpointing": false,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"layer_norm_epsilon": 1e-05,
|
13 |
+
"model_type": "gpt2",
|
14 |
+
"n_ctx": 1024,
|
15 |
+
"n_embd": 768,
|
16 |
+
"n_head": 12,
|
17 |
+
"n_inner": null,
|
18 |
+
"n_layer": 12,
|
19 |
+
"n_positions": 1024,
|
20 |
+
"pad_token_id": 1,
|
21 |
+
"resid_pdrop": 0.1,
|
22 |
+
"summary_activation": null,
|
23 |
+
"summary_first_dropout": 0.1,
|
24 |
+
"summary_proj_to_labels": true,
|
25 |
+
"summary_type": "cls_index",
|
26 |
+
"summary_use_proj": true,
|
27 |
+
"task_specific_params": {
|
28 |
+
"text-generation": {
|
29 |
+
"do_sample": true,
|
30 |
+
"max_length": 128,
|
31 |
+
"top_k": 50,
|
32 |
+
"top_p": 0.95
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"transformers_version": "4.3.2",
|
36 |
+
"use_cache": true,
|
37 |
+
"vocab_size": 42001
|
38 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3959094249ce84930979cd767e25e744bd38241767bb75b28cad2d46315270b
|
3 |
+
size 485044128
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "</s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
|
tf_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cca83b01814c811252ee43da4676158a292575df15ef3c1f97115aad4b9952e1
|
3 |
+
size 472571216
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "special_tokens_map_file": null, "errors": "replace", "pad_token": "<pad>", "name_or_path": "/content/gpt2"}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|