Spaces:
Runtime error
Runtime error
Update app.py
Browse filesDifferent text generation model "distilgpt2"
app.py
CHANGED
@@ -163,41 +163,59 @@
|
|
163 |
# which could capture long-term dependencies in text. Finally, we came to transformers, whose decoder architecture became popular for generative models
|
164 |
# used for generating text as an example.
|
165 |
|
166 |
-
from transformers import GPT2LMHeadModel,GPT2Tokenizer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
import gradio as grad
|
168 |
|
169 |
-
|
170 |
-
|
171 |
|
172 |
def generate(starting_text):
|
173 |
-
|
174 |
-
|
175 |
-
# When no specific parameter is specified, the model performs a greedy search to find the next word, which entails selecting the word from all of the
|
176 |
-
# alternatives that has the highest probability of being correct. This process is deterministic in nature, which means that resultant text is the same
|
177 |
-
# as before if we use the same parameters.
|
178 |
-
|
179 |
-
# The num_beams parameter does a beam search: it returns the sequences that have the highest probability, and then, when it comes time to
|
180 |
-
# choose, it picks the one that has the highest probability.
|
181 |
-
|
182 |
-
# The do_sample parameter select the next word at random from the probability distribution.
|
183 |
-
|
184 |
-
# The temperature parameter controls the level of greed that the generative model exhibits.
|
185 |
-
# If the temperature is low, the probabilities of sample classes other than the one with the highest log probability will be low.
|
186 |
-
# As a result, the model will probably output the text that is most correct, but it will be rather monotonous and contain only a small amount of variation.
|
187 |
-
# If the temperature is high, the model has a greater chance of outputting different words than those with the highest probability.
|
188 |
-
# The generated text will feature a greater variety of topics, but there is also an increased likelihood that it will generate nonsense text and
|
189 |
-
# contain grammatical errors.
|
190 |
-
|
191 |
-
# With less temperature (1.5 --> 0.1), the output becomes less variational.
|
192 |
-
gpt2_tensors = mdl.generate(tkn_ids, max_length=100, no_repeat_ngram_size=True, num_beams=3, do_sample=True, temperature=0.1)
|
193 |
-
response=""
|
194 |
-
#response = gpt2_tensors
|
195 |
-
for i, x in enumerate(gpt2_tensors):
|
196 |
-
response=response+f"{i}: {gpt2_tkn.decode(x, skip_special_tokens=True)}" # Decode tensors into text
|
197 |
-
return gpt2_tensors, response
|
198 |
|
199 |
txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
|
200 |
-
|
201 |
-
out_text=grad.Textbox(lines=1, label="Generated Text")
|
202 |
|
203 |
-
grad.Interface(generate, inputs=txt, outputs=
|
|
|
163 |
# which could capture long-term dependencies in text. Finally, we came to transformers, whose decoder architecture became popular for generative models
|
164 |
# used for generating text as an example.
|
165 |
|
166 |
+
# from transformers import GPT2LMHeadModel,GPT2Tokenizer
|
167 |
+
# import gradio as grad
|
168 |
+
|
169 |
+
# mdl = GPT2LMHeadModel.from_pretrained('gpt2')
|
170 |
+
# gpt2_tkn=GPT2Tokenizer.from_pretrained('gpt2')
|
171 |
+
|
172 |
+
# def generate(starting_text):
|
173 |
+
# tkn_ids = gpt2_tkn.encode(starting_text, return_tensors = 'pt')
|
174 |
+
|
175 |
+
# # When no specific parameter is specified, the model performs a greedy search to find the next word, which entails selecting the word from all of the
|
176 |
+
# # alternatives that has the highest probability of being correct. This process is deterministic in nature, which means that resultant text is the same
|
177 |
+
# # as before if we use the same parameters.
|
178 |
+
|
179 |
+
# # The num_beams parameter does a beam search: it returns the sequences that have the highest probability, and then, when it comes time to
|
180 |
+
# # choose, it picks the one that has the highest probability.
|
181 |
+
|
182 |
+
# # The do_sample parameter select the next word at random from the probability distribution.
|
183 |
+
|
184 |
+
# # The temperature parameter controls the level of greed that the generative model exhibits.
|
185 |
+
# # If the temperature is low, the probabilities of sample classes other than the one with the highest log probability will be low.
|
186 |
+
# # As a result, the model will probably output the text that is most correct, but it will be rather monotonous and contain only a small amount of variation.
|
187 |
+
# # If the temperature is high, the model has a greater chance of outputting different words than those with the highest probability.
|
188 |
+
# # The generated text will feature a greater variety of topics, but there is also an increased likelihood that it will generate nonsense text and
|
189 |
+
# # contain grammatical errors.
|
190 |
+
|
191 |
+
# # With less temperature (1.5 --> 0.1), the output becomes less variational.
|
192 |
+
# gpt2_tensors = mdl.generate(tkn_ids, max_length=100, no_repeat_ngram_size=True, num_beams=3, do_sample=True, temperature=0.1)
|
193 |
+
# response=""
|
194 |
+
# #response = gpt2_tensors
|
195 |
+
# for i, x in enumerate(gpt2_tensors):
|
196 |
+
# response=response+f"{i}: {gpt2_tkn.decode(x, skip_special_tokens=True)}" # Decode tensors into text
|
197 |
+
# return gpt2_tensors, response
|
198 |
+
|
199 |
+
# txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
|
200 |
+
# out_tensors=grad.Textbox(lines=1, label="Generated Tensors")
|
201 |
+
# out_text=grad.Textbox(lines=1, label="Generated Text")
|
202 |
+
|
203 |
+
# grad.Interface(generate, inputs=txt, outputs=[out_tensors, out_text]).launch()
|
204 |
+
|
205 |
+
#-----------------------------------------------------------------------------------
|
206 |
+
# 9. Text Generation: different model "distilgpt2"
|
207 |
+
|
208 |
+
from transformers import pipeline, set_seed
|
209 |
import gradio as grad
|
210 |
|
211 |
+
gpt2_pipe = pipeline('text-generation', model='distilgpt2')
|
212 |
+
set_seed(42)
|
213 |
|
214 |
def generate(starting_text):
|
215 |
+
response= gpt2_pipe(starting_text, max_length=20, num_return_sequences=5)
|
216 |
+
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
|
218 |
txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
|
219 |
+
out=grad.Textbox(lines=1, label="Generated Text")
|
|
|
220 |
|
221 |
+
grad.Interface(generate, inputs=txt, outputs=out).launch()
|