run480 commited on
Commit
a533979
1 Parent(s): 3b6e14c

Update app.py

Browse files

Different text generation model "distilgpt2"

Files changed (1) hide show
  1. app.py +49 -31
app.py CHANGED
@@ -163,41 +163,59 @@
163
  # which could capture long-term dependencies in text. Finally, we came to transformers, whose decoder architecture became popular for generative models
164
  # used for generating text as an example.
165
 
166
- from transformers import GPT2LMHeadModel,GPT2Tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  import gradio as grad
168
 
169
- mdl = GPT2LMHeadModel.from_pretrained('gpt2')
170
- gpt2_tkn=GPT2Tokenizer.from_pretrained('gpt2')
171
 
172
  def generate(starting_text):
173
- tkn_ids = gpt2_tkn.encode(starting_text, return_tensors = 'pt')
174
-
175
- # When no specific parameter is specified, the model performs a greedy search to find the next word, which entails selecting the word from all of the
176
- # alternatives that has the highest probability of being correct. This process is deterministic in nature, which means that resultant text is the same
177
- # as before if we use the same parameters.
178
-
179
- # The num_beams parameter does a beam search: it returns the sequences that have the highest probability, and then, when it comes time to
180
- # choose, it picks the one that has the highest probability.
181
-
182
- # The do_sample parameter select the next word at random from the probability distribution.
183
-
184
- # The temperature parameter controls the level of greed that the generative model exhibits.
185
- # If the temperature is low, the probabilities of sample classes other than the one with the highest log probability will be low.
186
- # As a result, the model will probably output the text that is most correct, but it will be rather monotonous and contain only a small amount of variation.
187
- # If the temperature is high, the model has a greater chance of outputting different words than those with the highest probability.
188
- # The generated text will feature a greater variety of topics, but there is also an increased likelihood that it will generate nonsense text and
189
- # contain grammatical errors.
190
-
191
- # With less temperature (1.5 --> 0.1), the output becomes less variational.
192
- gpt2_tensors = mdl.generate(tkn_ids, max_length=100, no_repeat_ngram_size=True, num_beams=3, do_sample=True, temperature=0.1)
193
- response=""
194
- #response = gpt2_tensors
195
- for i, x in enumerate(gpt2_tensors):
196
- response=response+f"{i}: {gpt2_tkn.decode(x, skip_special_tokens=True)}" # Decode tensors into text
197
- return gpt2_tensors, response
198
 
199
  txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
200
- out_tensors=grad.Textbox(lines=1, label="Generated Tensors")
201
- out_text=grad.Textbox(lines=1, label="Generated Text")
202
 
203
- grad.Interface(generate, inputs=txt, outputs=[out_tensors, out_text]).launch()
 
163
  # which could capture long-term dependencies in text. Finally, we came to transformers, whose decoder architecture became popular for generative models
164
  # used for generating text as an example.
165
 
166
+ # from transformers import GPT2LMHeadModel,GPT2Tokenizer
167
+ # import gradio as grad
168
+
169
+ # mdl = GPT2LMHeadModel.from_pretrained('gpt2')
170
+ # gpt2_tkn=GPT2Tokenizer.from_pretrained('gpt2')
171
+
172
+ # def generate(starting_text):
173
+ # tkn_ids = gpt2_tkn.encode(starting_text, return_tensors = 'pt')
174
+
175
+ # # When no specific parameter is specified, the model performs a greedy search to find the next word, which entails selecting the word from all of the
176
+ # # alternatives that has the highest probability of being correct. This process is deterministic in nature, which means that resultant text is the same
177
+ # # as before if we use the same parameters.
178
+
179
+ # # The num_beams parameter does a beam search: it returns the sequences that have the highest probability, and then, when it comes time to
180
+ # # choose, it picks the one that has the highest probability.
181
+
182
+ # # The do_sample parameter select the next word at random from the probability distribution.
183
+
184
+ # # The temperature parameter controls the level of greed that the generative model exhibits.
185
+ # # If the temperature is low, the probabilities of sample classes other than the one with the highest log probability will be low.
186
+ # # As a result, the model will probably output the text that is most correct, but it will be rather monotonous and contain only a small amount of variation.
187
+ # # If the temperature is high, the model has a greater chance of outputting different words than those with the highest probability.
188
+ # # The generated text will feature a greater variety of topics, but there is also an increased likelihood that it will generate nonsense text and
189
+ # # contain grammatical errors.
190
+
191
+ # # With less temperature (1.5 --> 0.1), the output becomes less variational.
192
+ # gpt2_tensors = mdl.generate(tkn_ids, max_length=100, no_repeat_ngram_size=True, num_beams=3, do_sample=True, temperature=0.1)
193
+ # response=""
194
+ # #response = gpt2_tensors
195
+ # for i, x in enumerate(gpt2_tensors):
196
+ # response=response+f"{i}: {gpt2_tkn.decode(x, skip_special_tokens=True)}" # Decode tensors into text
197
+ # return gpt2_tensors, response
198
+
199
+ # txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
200
+ # out_tensors=grad.Textbox(lines=1, label="Generated Tensors")
201
+ # out_text=grad.Textbox(lines=1, label="Generated Text")
202
+
203
+ # grad.Interface(generate, inputs=txt, outputs=[out_tensors, out_text]).launch()
204
+
205
+ #-----------------------------------------------------------------------------------
206
+ # 9. Text Generation: different model "distilgpt2"
207
+
208
+ from transformers import pipeline, set_seed
209
  import gradio as grad
210
 
211
+ gpt2_pipe = pipeline('text-generation', model='distilgpt2')
212
+ set_seed(42)
213
 
214
  def generate(starting_text):
215
+ response= gpt2_pipe(starting_text, max_length=20, num_return_sequences=5)
216
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
  txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
219
+ out=grad.Textbox(lines=1, label="Generated Text")
 
220
 
221
+ grad.Interface(generate, inputs=txt, outputs=out).launch()