Spaces:
Sleeping
Sleeping
from nltk import pos_tag, word_tokenize, sent_tokenize | |
import nltk | |
nltk.download('punkt') | |
nltk.download('averaged_perceptron_tagger') | |
import re | |
import random | |
from collections import defaultdict | |
''' | |
function to clean the input paragraph or paragraphs and returning sentense_wise tagged words | |
cleaning has to be done as '[' , ']' , '@'.. are tagged as nouns. and this program is to build blanks with nouns only | |
''' | |
def clean_and_tag(paragraph): | |
# cleaning | |
para = re.sub(r'[^a-zA-Z0-9 .-]','',paragraph) | |
#dictionary for all sentences | |
tags_dict_sentences = defaultdict(list) | |
for sentence in sent_tokenize(para): | |
# tagging each word in the current sentence and creating a dictionary out of em | |
# actually it's enough just to care about 'NNP' and 'NN' | |
temp_tags = pos_tag(word_tokenize(sentence)) | |
tags_dict = defaultdict(list) | |
tags_dict_sentences[sentence] = tags_dict | |
for i in ((temp_tags)): | |
tags_dict[i[1]].append(i[0]) | |
return tags_dict_sentences | |
''' | |
function to replace a word with a blank | |
''' | |
def replaceblank(word, sentence): | |
#uses regex to replace a word with a blank | |
temp = re.compile(re.escape(word), re.IGNORECASE) | |
return temp.sub('________________', sentence) | |
''' | |
function to basically remove a word and replace it with a blank if possible | |
As most of the geography, history, science books revolve around nouns, most fill in | |
the blanks are nouns. | |
if you want to teach children english, you can pick adjectives or verbs as well | |
''' | |
def removeWord(sentence, tags_dict): | |
#select only nouns | |
words = None | |
if 'NNP' in tags_dict: | |
words = tags_dict['NNP'] | |
elif 'NN' in tags_dict: | |
words = tags_dict['NN'] | |
else: | |
print("NN and NNP not found") | |
return (None, sentence, None) | |
if len(words) > 0: | |
# randomly choose a word, if there is a score of NNP, we can choose the | |
# best word with that | |
word = random.choice(words) | |
replaced = replaceblank(word, sentence) | |
return (word, sentence, replaced) | |
else: | |
print("words are empty") | |
return (None, sentence, None) | |
''' | |
function which combines everything and generates blanks | |
inputs are paragraph = paragraph(s) you want to generate blanks from | |
num_of_blanks = number of blanks you want to create | |
right now, the function works only to create blanks upto a limit. when | |
number of blanks to be created are more than total sentences, we have to create | |
multiple blanks from same sentence, so answers can be deduced easily. where as | |
if you don't care about that, we can extend the code a little and get that too | |
output is an array | |
each entry in the array is an array again, whose 1st element is the blank and | |
2nd element is the missing word | |
''' | |
def create_blanks(paragraph,num_of_blanks): | |
# retrieve sentences whose words are tagged | |
tags_dict_sentences = clean_and_tag(paragraph) # a dictionary | |
# these many blanks are possible, as they can't be more than number of sentences | |
possib_blanks = len(list(tags_dict_sentences.keys())) | |
if possib_blanks> num_of_blanks: | |
# randomly shuffle the sentences | |
rand_sents = (list(tags_dict_sentences.keys())) | |
random.shuffle(rand_sents) | |
#store the blanks in an array | |
blanks_arr = [] | |
# number of prepared blanks and dummy variable to iterate through rand_sents | |
prepared_blanks = 0 | |
i = 0 | |
while prepared_blanks<num_of_blanks and (i<len(rand_sents)): | |
curr_sent_tags = tags_dict_sentences[rand_sents[i]] | |
(word, sentence, replaced) = removeWord(rand_sents[i], tags_dict_sentences[rand_sents[i]]) | |
if replaced is not None: | |
blanks_arr.append([replaced,word]) | |
i+=1 | |
prepared_blanks+=1 | |
else: | |
i+=1 | |
if prepared_blanks<num_of_blanks: | |
print("sorry, couldn't form more than {} blanks".format(prepared_blanks)) | |
return blanks_arr | |
# the same as above but asking for input | |
else: | |
try: | |
num_of_blanks = int(input('''number of blanks you want to create are more | |
than number of sentences, please give a number less than {}. | |
if you want to quit, just press any key other than integer and enter\n'''.format( | |
possib_blanks))) | |
if num_of_blanks<possib_blanks: | |
flag = 0 | |
rand_sents = (list(tags_dict_sentences.keys())) | |
random.shuffle(rand_sents) | |
blanks_arr = [] | |
prepared_blanks = 0 | |
i = 0 | |
while prepared_blanks<num_of_blanks and (i<len(rand_sents)): | |
curr_sent_tags = tags_dict_sentences[rand_sents[i]] | |
(word, sentence, replaced) = removeWord(rand_sents[i], tags_dict_sentences[rand_sents[i]]) | |
if replaced is not None: | |
blanks_arr.append([replaced,word]) | |
i+=1 | |
prepared_blanks+=1 | |
else: | |
i+=1 | |
if prepared_blanks<num_of_blanks: | |
print("sorry, couldn't form more than {} blanks".format(prepared_blanks)) | |
return blanks_arr | |
except: | |
print('quittin :(') | |
return None | |
text = """A Lion lay asleep in the forest, his great head resting on his paws. A timid little Mouse came upon him unexpectedly, and in her fright and haste to | |
get away, ran across the Lion's nose. Roused from his nap, the Lion laid his huge paw angrily on the tiny creature to kill her. "Spare me!" begged | |
the poor Mouse. "Please let me go and some day I will surely repay you." The Lion was much amused to think that a Mouse could ever help him. But he | |
was generous and finally let the Mouse go. Some days later, while stalking his prey in the forest, the Lion was caught in the toils of a hunter's | |
net. Unable to free himself, he filled the forest with his angry roaring. The Mouse knew the voice and quickly found the Lion struggling in the net. | |
Running to one of the great ropes that bound him, she gnawed it until it parted, and soon the Lion was free. "You laughed when I said I would repay | |
you," said the Mouse. "Now you see that even a Mouse can help a Lion." """ | |
file =None | |
import gradio as gr | |
total = 5 | |
def createdblanks_text(text,subject,total): | |
words_text = len(re.findall(r'\w+', text)) | |
words_subject = len(re.findall(r'\w+', subject)) | |
if (words_text < 150): | |
raise gr.Error("Invalid Input (Words limit must be more than 150 words).") | |
# print("Number of words:", words) | |
elif (words_subject < 1): | |
raise gr.Error("Invalid Input (Title must be one or more than one word).") | |
else: | |
output="<b>Read the question carefully and fill in the blanks.</b><br/>" | |
blanks = create_blanks(text,total) | |
i=1 | |
for blank in blanks: | |
output = output + "<b> Q"+ str(i) + ") " + blank[0] + "</b><br/>" | |
output = output + "<br/>" | |
i += 1 | |
output = output + "<b>Correct Answer Key</b><br/>" | |
i=1 | |
for blank in blanks: | |
output = output + "<b style='color:green;'>" + "Ans"+ str(i) + ": " +blank[1]+ "</b>" | |
output = output + "<br/>" | |
i += 1 | |
return output | |
import gradio as gr | |
context = gr.Textbox(lines=10, placeholder="Enter paragraph/content here...", label="Enter your content (words input must be more than 150 words).") | |
total = gr.Slider(1,10, value=1,step=1, label="Total Number Of Questions") | |
subject = gr.Textbox(placeholder="Enter subject/title here...", label="Enter your title (title must contain 1 word)") | |
output = gr.HTML( label="Question and Answers") | |
iface = gr.Interface( | |
fn=createdblanks_text, | |
inputs=[context,subject, total], | |
outputs=output, | |
allow_flagging="never",flagging_options=["Save Data"]) | |
# iface.launch(debug=True) | |
import glob | |
import os.path | |
import pandas as pd | |
total=5 | |
def createdblanks(text,subject,total): | |
output="<b>Read the question carefully and fill in the blanks.</b><br/>" | |
blanks = create_blanks(text,total) | |
i=1 | |
for blank in blanks: | |
output = output + "<b> Q"+ str(i) + ") " + blank[0] + "</b><br/>" | |
output = output + "<br/>" | |
i += 1 | |
output = output + "<b>Correct Answer Key</b><br/>" | |
i=1 | |
for blank in blanks: | |
output = output + "<b style='color:green;'>" + "Ans"+ str(i) + ": " +blank[1]+ "</b>" | |
output = output + "<br/>" | |
i += 1 | |
return output | |
def filecreate(x,subject,total): | |
with open(x.name) as fo: | |
text = fo.read() | |
# print(text) | |
words_text = len(re.findall(r'\w+', text)) | |
words_subject = len(re.findall(r'\w+', subject)) | |
if (words_text < 150): | |
raise gr.Error("Invalid Input (Words limit must be more than 150 words).") | |
# print("Number of words:", words) | |
elif (words_subject < 1): | |
raise gr.Error("Invalid Input (Title must be one or more than one word).") | |
else: | |
generated = createdblanks(text,subject, total) | |
return generated | |
# filecreate(file,2) | |
import gradio as gr | |
context = gr.HTML(label="Text") | |
file = gr.File(label="Upload your file (File must contain more than 150 words).") | |
total = gr.Slider(1,10, value=1,step=1, label="Total Number Of Questions") | |
subject = gr.Textbox(placeholder="Enter subject/title here...", label="Enter your title (title must contain 1 word).") | |
fface = gr.Interface( | |
fn=filecreate, | |
inputs=[file,subject,total], | |
outputs=context, | |
# css=".gradio-container {background-color: white}", | |
# theme="huggingface", | |
allow_flagging="never",flagging_options=["Save Data"]) | |
# fface.launch(debug=True, show_api=False) | |
demo = gr.TabbedInterface([iface, fface], ["Text", "Upload File"], theme="huggingface") | |
demo.launch(debug=True, show_api=False) |