from nltk import pos_tag, word_tokenize, sent_tokenize import nltk nltk.download('punkt') nltk.download('averaged_perceptron_tagger') import re import random from collections import defaultdict ''' function to clean the input paragraph or paragraphs and returning sentense_wise tagged words cleaning has to be done as '[' , ']' , '@'.. are tagged as nouns. and this program is to build blanks with nouns only ''' def clean_and_tag(paragraph): # cleaning para = re.sub(r'[^a-zA-Z0-9 .-]','',paragraph) #dictionary for all sentences tags_dict_sentences = defaultdict(list) for sentence in sent_tokenize(para): # tagging each word in the current sentence and creating a dictionary out of em # actually it's enough just to care about 'NNP' and 'NN' temp_tags = pos_tag(word_tokenize(sentence)) tags_dict = defaultdict(list) tags_dict_sentences[sentence] = tags_dict for i in ((temp_tags)): tags_dict[i[1]].append(i[0]) return tags_dict_sentences ''' function to replace a word with a blank ''' def replaceblank(word, sentence): #uses regex to replace a word with a blank temp = re.compile(re.escape(word), re.IGNORECASE) return temp.sub('________________', sentence) ''' function to basically remove a word and replace it with a blank if possible As most of the geography, history, science books revolve around nouns, most fill in the blanks are nouns. if you want to teach children english, you can pick adjectives or verbs as well ''' def removeWord(sentence, tags_dict): #select only nouns words = None if 'NNP' in tags_dict: words = tags_dict['NNP'] elif 'NN' in tags_dict: words = tags_dict['NN'] else: print("NN and NNP not found") return (None, sentence, None) if len(words) > 0: # randomly choose a word, if there is a score of NNP, we can choose the # best word with that word = random.choice(words) replaced = replaceblank(word, sentence) return (word, sentence, replaced) else: print("words are empty") return (None, sentence, None) ''' function which combines everything and generates blanks inputs are paragraph = paragraph(s) you want to generate blanks from num_of_blanks = number of blanks you want to create right now, the function works only to create blanks upto a limit. when number of blanks to be created are more than total sentences, we have to create multiple blanks from same sentence, so answers can be deduced easily. where as if you don't care about that, we can extend the code a little and get that too output is an array each entry in the array is an array again, whose 1st element is the blank and 2nd element is the missing word ''' def create_blanks(paragraph,num_of_blanks): # retrieve sentences whose words are tagged tags_dict_sentences = clean_and_tag(paragraph) # a dictionary # these many blanks are possible, as they can't be more than number of sentences possib_blanks = len(list(tags_dict_sentences.keys())) if possib_blanks> num_of_blanks: # randomly shuffle the sentences rand_sents = (list(tags_dict_sentences.keys())) random.shuffle(rand_sents) #store the blanks in an array blanks_arr = [] # number of prepared blanks and dummy variable to iterate through rand_sents prepared_blanks = 0 i = 0 while prepared_blanks Q"+ str(i) + ") " + blank[0] + "
" output = output + "
" i += 1 output = output + "Correct Answer Key
" i=1 for blank in blanks: output = output + "" + "Ans"+ str(i) + ": " +blank[1]+ "" output = output + "
" i += 1 return output import gradio as gr context = gr.Textbox(lines=10, placeholder="Enter paragraph/content here...", label="Enter your content (words input must be more than 150 words).") total = gr.Slider(1,10, value=1,step=1, label="Total Number Of Questions") subject = gr.Textbox(placeholder="Enter subject/title here...", label="Enter your title (title must contain 1 word)") output = gr.HTML( label="Question and Answers") iface = gr.Interface( fn=createdblanks_text, inputs=[context,subject, total], outputs=output, allow_flagging="never",flagging_options=["Save Data"]) # iface.launch(debug=True) import glob import os.path import pandas as pd total=5 def createdblanks(text,subject,total): output="Read the question carefully and fill in the blanks.
" blanks = create_blanks(text,total) i=1 for blank in blanks: output = output + " Q"+ str(i) + ") " + blank[0] + "
" output = output + "
" i += 1 output = output + "Correct Answer Key
" i=1 for blank in blanks: output = output + "" + "Ans"+ str(i) + ": " +blank[1]+ "" output = output + "
" i += 1 return output def filecreate(x,subject,total): with open(x.name) as fo: text = fo.read() # print(text) words_text = len(re.findall(r'\w+', text)) words_subject = len(re.findall(r'\w+', subject)) if (words_text < 150): raise gr.Error("Invalid Input (Words limit must be more than 150 words).") # print("Number of words:", words) elif (words_subject < 1): raise gr.Error("Invalid Input (Title must be one or more than one word).") else: generated = createdblanks(text,subject, total) return generated # filecreate(file,2) import gradio as gr context = gr.HTML(label="Text") file = gr.File(label="Upload your file (File must contain more than 150 words).") total = gr.Slider(1,10, value=1,step=1, label="Total Number Of Questions") subject = gr.Textbox(placeholder="Enter subject/title here...", label="Enter your title (title must contain 1 word).") fface = gr.Interface( fn=filecreate, inputs=[file,subject,total], outputs=context, # css=".gradio-container {background-color: white}", # theme="huggingface", allow_flagging="never",flagging_options=["Save Data"]) # fface.launch(debug=True, show_api=False) demo = gr.TabbedInterface([iface, fface], ["Text", "Upload File"], theme="huggingface") demo.launch(debug=True, show_api=False)