run480's picture
Update app.py
ab9d3ba verified
raw
history blame
6.8 kB
# 1. The RoBERTa base model is used, fine-tuned using the SQuAD 2.0 dataset.
# It’s been trained on question-answer pairs, including unanswerable questions, for the task of question and answering.
# from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
# import gradio as grad
# import ast
# mdl_name = "deepset/roberta-base-squad2"
# my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name)
# def answer_question(question,context):
# text= "{"+"'question': '"+question+"','context': '"+context+"'}"
# di=ast.literal_eval(text)
# response = my_pipeline(di)
# return response
# grad.Interface(answer_question, inputs=["text","text"], outputs="text").launch()
#---------------------------------------------------------------------------------
# 2. Same task, different model.
# from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
# import gradio as grad
# import ast
# mdl_name = "distilbert-base-cased-distilled-squad"
# my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name)
# def answer_question(question,context):
# text= "{"+"'question': '"+question+"','context': '"+context+"'}"
# di=ast.literal_eval(text)
# response = my_pipeline(di)
# return response
# grad.Interface(answer_question, inputs=["text","text"], outputs="text").launch()
#---------------------------------------------------------------------------------
# 3. Different task: language translation.
# from transformers import pipeline
# import gradio as grad
# First model translates English to German.
# mdl_name = "Helsinki-NLP/opus-mt-en-de"
# opus_translator = pipeline("translation", model=mdl_name)
# def translate(text):
# response = opus_translator(text)
# return response
# grad.Interface(translate, inputs=["text",], outputs="text").launch()
#----------------------------------------------------------------------------------
# 4. Language translation without pipeline API.
# Second model translates English to French.
# from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
# import gradio as grad
# mdl_name = "Helsinki-NLP/opus-mt-en-fr"
# mdl = AutoModelForSeq2SeqLM.from_pretrained(mdl_name)
# my_tkn = AutoTokenizer.from_pretrained(mdl_name)
# def translate(text):
# inputs = my_tkn(text, return_tensors="pt")
# trans_output = mdl.generate(**inputs)
# response = my_tkn.decode(trans_output[0], skip_special_tokens=True)
# return response
# txt = grad.Textbox(lines=1, label="English", placeholder="English Text here")
# out = grad.Textbox(lines=1, label="French")
# grad.Interface(translate, inputs=txt, outputs=out).launch()
#-----------------------------------------------------------------------------------
# 5. Different task: abstractive summarization
# Abstractive summarization is more difficult than extractive summarization,
# which pulls key sentences from a document and combines them to form a “summary.”
# Because abstractive summarization involves paraphrasing words, it is also more time-consuming;
# however, it has the potential to produce a more polished and coherent summary.
# from transformers import PegasusForConditionalGeneration, PegasusTokenizer
# import gradio as grad
# mdl_name = "google/pegasus-xsum"
# pegasus_tkn = PegasusTokenizer.from_pretrained(mdl_name)
# mdl = PegasusForConditionalGeneration.from_pretrained(mdl_name)
# def summarize(text):
# tokens = pegasus_tkn(text, truncation=True, padding="longest", return_tensors="pt")
# txt_summary = mdl.generate(**tokens)
# response = pegasus_tkn.batch_decode(txt_summary, skip_special_tokens=True)
# return response
# txt = grad.Textbox(lines=10, label="English", placeholder="English Text here")
# out = grad.Textbox(lines=10, label="Summary")
# grad.Interface(summarize, inputs=txt, outputs=out).launch()
#------------------------------------------------------------------------------------------
# 6. Same model with some tuning with some parameters: num_return_sequences=5, max_length=200, temperature=1.5, num_beams=10
# from transformers import PegasusForConditionalGeneration, PegasusTokenizer
# import gradio as grad
# mdl_name = "google/pegasus-xsum"
# pegasus_tkn = PegasusTokenizer.from_pretrained(mdl_name)
# mdl = PegasusForConditionalGeneration.from_pretrained(mdl_name)
# def summarize(text):
# tokens = pegasus_tkn(text, truncation=True, padding="longest", return_tensors="pt")
# translated_txt = mdl.generate(**tokens, num_return_sequences=5, max_length=200, temperature=1.5, num_beams=10)
# response = pegasus_tkn.batch_decode(translated_txt, skip_special_tokens=True)
# return response
# txt = grad.Textbox(lines=10, label="English", placeholder="English Text here")
# out = grad.Textbox(lines=10, label="Summary")
# grad.Interface(summarize, inputs=txt, outputs=out).launch()
#-----------------------------------------------------------------------------------
# 7. Zero-Shot Learning:
# Zero-shot learning, as the name implies, is to use a pretrained model , trained on a certain set of data,
# on a different set of data, which it has not seen during training. This would mean, as an example, to take
# some model from huggingface that is trained on a certain dataset and use it for inference on examples it has never seen before.
# The transformers are where the zero-shot classification implementations are most frequently found by us.
# There are more than 60 transformer models that function based on the zero-shot classification that are found in the huggingface library.
# When we discuss zero-shot text classification , there is one additional thing that springs to mind.
# In the same vein as zero-shot classification is few-shot classification, which is very similar to zero-shot classification.
# However, in contrast with zero-shot classification, few-shot classification makes use of very few labeled samples during the training process.
# The implementation of the few-shot classification methods can be found in OpenAI, where the GPT3 classifier is a well-known example of a few-shot classifier.
from transformers import pipeline
import gradio as grad
zero_shot_classifier = pipeline("zero-shot-classification")
def classify(text,labels):
    classifer_labels = labels.split(",")
    #["software", "politics", "love", "movies", "emergency", "advertisment","sports"]
    response = zero_shot_classifier(text,classifer_labels)
    return response
txt=grad.Textbox(lines=1, label="English", placeholder="text to be classified")
labels=grad.Textbox(lines=1, label="Labels", placeholder="comma separated labels")
out=grad.Textbox(lines=1, label="Classification")
grad.Interface(classify, inputs=[txt,labels], outputs=out).launch()