fillinblanks / app.py
Technozam's picture
Update app.py
0c8aed3
from nltk import pos_tag, word_tokenize, sent_tokenize
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
import re
import random
from collections import defaultdict
'''
function to clean the input paragraph or paragraphs and returning sentense_wise tagged words
cleaning has to be done as '[' , ']' , '@'.. are tagged as nouns. and this program is to build blanks with nouns only
'''
def clean_and_tag(paragraph):
# cleaning
para = re.sub(r'[^a-zA-Z0-9 .-]','',paragraph)
#dictionary for all sentences
tags_dict_sentences = defaultdict(list)
for sentence in sent_tokenize(para):
# tagging each word in the current sentence and creating a dictionary out of em
# actually it's enough just to care about 'NNP' and 'NN'
temp_tags = pos_tag(word_tokenize(sentence))
tags_dict = defaultdict(list)
tags_dict_sentences[sentence] = tags_dict
for i in ((temp_tags)):
tags_dict[i[1]].append(i[0])
return tags_dict_sentences
'''
function to replace a word with a blank
'''
def replaceblank(word, sentence):
#uses regex to replace a word with a blank
temp = re.compile(re.escape(word), re.IGNORECASE)
return temp.sub('________________', sentence)
'''
function to basically remove a word and replace it with a blank if possible
As most of the geography, history, science books revolve around nouns, most fill in
the blanks are nouns.
if you want to teach children english, you can pick adjectives or verbs as well
'''
def removeWord(sentence, tags_dict):
#select only nouns
words = None
if 'NNP' in tags_dict:
words = tags_dict['NNP']
elif 'NN' in tags_dict:
words = tags_dict['NN']
else:
print("NN and NNP not found")
return (None, sentence, None)
if len(words) > 0:
# randomly choose a word, if there is a score of NNP, we can choose the
# best word with that
word = random.choice(words)
replaced = replaceblank(word, sentence)
return (word, sentence, replaced)
else:
print("words are empty")
return (None, sentence, None)
'''
function which combines everything and generates blanks
inputs are paragraph = paragraph(s) you want to generate blanks from
num_of_blanks = number of blanks you want to create
right now, the function works only to create blanks upto a limit. when
number of blanks to be created are more than total sentences, we have to create
multiple blanks from same sentence, so answers can be deduced easily. where as
if you don't care about that, we can extend the code a little and get that too
output is an array
each entry in the array is an array again, whose 1st element is the blank and
2nd element is the missing word
'''
def create_blanks(paragraph,num_of_blanks):
# retrieve sentences whose words are tagged
tags_dict_sentences = clean_and_tag(paragraph) # a dictionary
# these many blanks are possible, as they can't be more than number of sentences
possib_blanks = len(list(tags_dict_sentences.keys()))
if possib_blanks> num_of_blanks:
# randomly shuffle the sentences
rand_sents = (list(tags_dict_sentences.keys()))
random.shuffle(rand_sents)
#store the blanks in an array
blanks_arr = []
# number of prepared blanks and dummy variable to iterate through rand_sents
prepared_blanks = 0
i = 0
while prepared_blanks<num_of_blanks and (i<len(rand_sents)):
curr_sent_tags = tags_dict_sentences[rand_sents[i]]
(word, sentence, replaced) = removeWord(rand_sents[i], tags_dict_sentences[rand_sents[i]])
if replaced is not None:
blanks_arr.append([replaced,word])
i+=1
prepared_blanks+=1
else:
i+=1
if prepared_blanks<num_of_blanks:
print("sorry, couldn't form more than {} blanks".format(prepared_blanks))
return blanks_arr
# the same as above but asking for input
else:
try:
num_of_blanks = int(input('''number of blanks you want to create are more
than number of sentences, please give a number less than {}.
if you want to quit, just press any key other than integer and enter\n'''.format(
possib_blanks)))
if num_of_blanks<possib_blanks:
flag = 0
rand_sents = (list(tags_dict_sentences.keys()))
random.shuffle(rand_sents)
blanks_arr = []
prepared_blanks = 0
i = 0
while prepared_blanks<num_of_blanks and (i<len(rand_sents)):
curr_sent_tags = tags_dict_sentences[rand_sents[i]]
(word, sentence, replaced) = removeWord(rand_sents[i], tags_dict_sentences[rand_sents[i]])
if replaced is not None:
blanks_arr.append([replaced,word])
i+=1
prepared_blanks+=1
else:
i+=1
if prepared_blanks<num_of_blanks:
print("sorry, couldn't form more than {} blanks".format(prepared_blanks))
return blanks_arr
except:
print('quittin :(')
return None
text = """A Lion lay asleep in the forest, his great head resting on his paws. A timid little Mouse came upon him unexpectedly, and in her fright and haste to
get away, ran across the Lion's nose. Roused from his nap, the Lion laid his huge paw angrily on the tiny creature to kill her. "Spare me!" begged
the poor Mouse. "Please let me go and some day I will surely repay you." The Lion was much amused to think that a Mouse could ever help him. But he
was generous and finally let the Mouse go. Some days later, while stalking his prey in the forest, the Lion was caught in the toils of a hunter's
net. Unable to free himself, he filled the forest with his angry roaring. The Mouse knew the voice and quickly found the Lion struggling in the net.
Running to one of the great ropes that bound him, she gnawed it until it parted, and soon the Lion was free. "You laughed when I said I would repay
you," said the Mouse. "Now you see that even a Mouse can help a Lion." """
file =None
import gradio as gr
total = 5
def createdblanks_text(text,subject,total):
words_text = len(re.findall(r'\w+', text))
words_subject = len(re.findall(r'\w+', subject))
if (words_text < 150):
raise gr.Error("Invalid Input (Words limit must be more than 150 words).")
# print("Number of words:", words)
elif (words_subject < 1):
raise gr.Error("Invalid Input (Title must be one or more than one word).")
else:
output="<b>Read the question carefully and fill in the blanks.</b><br/>"
blanks = create_blanks(text,total)
i=1
for blank in blanks:
output = output + "<b> Q"+ str(i) + ") " + blank[0] + "</b><br/>"
output = output + "<br/>"
i += 1
output = output + "<b>Correct Answer Key</b><br/>"
i=1
for blank in blanks:
output = output + "<b style='color:green;'>" + "Ans"+ str(i) + ": " +blank[1]+ "</b>"
output = output + "<br/>"
i += 1
return output
import gradio as gr
context = gr.Textbox(lines=10, placeholder="Enter paragraph/content here...", label="Enter your content (words input must be more than 150 words).")
total = gr.Slider(1,10, value=1,step=1, label="Total Number Of Questions")
subject = gr.Textbox(placeholder="Enter subject/title here...", label="Enter your title (title must contain 1 word)")
output = gr.HTML( label="Question and Answers")
iface = gr.Interface(
fn=createdblanks_text,
inputs=[context,subject, total],
outputs=output,
allow_flagging="never",flagging_options=["Save Data"])
# iface.launch(debug=True)
import glob
import os.path
import pandas as pd
total=5
def createdblanks(text,subject,total):
output="<b>Read the question carefully and fill in the blanks.</b><br/>"
blanks = create_blanks(text,total)
i=1
for blank in blanks:
output = output + "<b> Q"+ str(i) + ") " + blank[0] + "</b><br/>"
output = output + "<br/>"
i += 1
output = output + "<b>Correct Answer Key</b><br/>"
i=1
for blank in blanks:
output = output + "<b style='color:green;'>" + "Ans"+ str(i) + ": " +blank[1]+ "</b>"
output = output + "<br/>"
i += 1
return output
def filecreate(x,subject,total):
with open(x.name) as fo:
text = fo.read()
# print(text)
words_text = len(re.findall(r'\w+', text))
words_subject = len(re.findall(r'\w+', subject))
if (words_text < 150):
raise gr.Error("Invalid Input (Words limit must be more than 150 words).")
# print("Number of words:", words)
elif (words_subject < 1):
raise gr.Error("Invalid Input (Title must be one or more than one word).")
else:
generated = createdblanks(text,subject, total)
return generated
# filecreate(file,2)
import gradio as gr
context = gr.HTML(label="Text")
file = gr.File(label="Upload your file (File must contain more than 150 words).")
total = gr.Slider(1,10, value=1,step=1, label="Total Number Of Questions")
subject = gr.Textbox(placeholder="Enter subject/title here...", label="Enter your title (title must contain 1 word).")
fface = gr.Interface(
fn=filecreate,
inputs=[file,subject,total],
outputs=context,
# css=".gradio-container {background-color: white}",
# theme="huggingface",
allow_flagging="never",flagging_options=["Save Data"])
# fface.launch(debug=True, show_api=False)
demo = gr.TabbedInterface([iface, fface], ["Text", "Upload File"], theme="huggingface")
demo.launch(debug=True, show_api=False)