Spaces:

run480
/

intro_to_transformers

Runtime error

App Files Files Community

run480 commited on Feb 14, 2024

Commit

04e983e

verified ·

1 Parent(s): 02a75ae

Update app.py

Browse files

New task: abstractive summarization.

Files changed (1) hide show

app.py +69 -18

app.py CHANGED Viewed

@@ -1,13 +1,28 @@
-from transformers import AutoModel, AutoModelForSeq2SeqLM, AutoModelForQuestionAnswering, AutoTokenizer, pipeline
-import gradio as grad
-import ast
 # 1. The RoBERTa base model is used, fine-tuned using the SQuAD 2.0 dataset.
 # It’s been trained on question-answer pairs, including unanswerable questions, for the task of question and answering.
 # mdl_name = "deepset/roberta-base-squad2"
 # my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name)
-# 2. Different model.
 # mdl_name = "distilbert-base-cased-distilled-squad"
 # my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name)
@@ -19,7 +34,12 @@ import ast
 # grad.Interface(answer_question, inputs=["text","text"], outputs="text").launch()
 # 3. Different task: language translation.
 # First model translates English to German.
 # mdl_name = "Helsinki-NLP/opus-mt-en-de"
 # opus_translator = pipeline("translation", model=mdl_name)
@@ -30,18 +50,49 @@ import ast
 # grad.Interface(translate, inputs=["text",], outputs="text").launch()
 # 4. Language translation without pipeline API.
 # Second model translates English to French.
-mdl_name = "Helsinki-NLP/opus-mt-en-fr"
-mdl = AutoModelForSeq2SeqLM.from_pretrained(mdl_name)
-my_tkn = AutoTokenizer.from_pretrained(mdl_name)
-def translate(text):
-    inputs = my_tkn(text, return_tensors="pt")
-    trans_output = mdl.generate(**inputs)
-    response = my_tkn.decode(trans_output[0], skip_special_tokens=True)
-    return response
-txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
-out=grad.Textbox(lines=1, label="French")
-grad.Interface(translate, inputs=txt, outputs=out).launch()

 # 1. The RoBERTa base model is used, fine-tuned using the SQuAD 2.0 dataset.
 # It’s been trained on question-answer pairs, including unanswerable questions, for the task of question and answering.
+# from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
+# import gradio as grad
+# import ast
 # mdl_name = "deepset/roberta-base-squad2"
 # my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name)
+# def answer_question(question,context):
+#     text= "{"+"'question': '"+question+"','context': '"+context+"'}"
+#     di=ast.literal_eval(text)
+#     response = my_pipeline(di)
+#     return response
+# grad.Interface(answer_question, inputs=["text","text"], outputs="text").launch()
+#---------------------------------------------------------------------------------
+# 2. Same task, different model.
+# from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
+# import gradio as grad
+# import ast
 # mdl_name = "distilbert-base-cased-distilled-squad"
 # my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name)
 # grad.Interface(answer_question, inputs=["text","text"], outputs="text").launch()
+#---------------------------------------------------------------------------------
 # 3. Different task: language translation.
+# from transformers import pipeline
+# import gradio as grad
 # First model translates English to German.
 # mdl_name = "Helsinki-NLP/opus-mt-en-de"
 # opus_translator = pipeline("translation", model=mdl_name)
 # grad.Interface(translate, inputs=["text",], outputs="text").launch()
+#----------------------------------------------------------------------------------
 # 4. Language translation without pipeline API.
 # Second model translates English to French.
+# from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+# import gradio as grad
+# mdl_name = "Helsinki-NLP/opus-mt-en-fr"
+# mdl = AutoModelForSeq2SeqLM.from_pretrained(mdl_name)
+# my_tkn = AutoTokenizer.from_pretrained(mdl_name)
+# def translate(text):
+#     inputs = my_tkn(text, return_tensors="pt")
+#     trans_output = mdl.generate(**inputs)
+#     response = my_tkn.decode(trans_output[0], skip_special_tokens=True)
+#     return response
+# txt = grad.Textbox(lines=1, label="English", placeholder="English Text here")
+# out = grad.Textbox(lines=1, label="French")
+# grad.Interface(translate, inputs=txt, outputs=out).launch()
+#-----------------------------------------------------------------------------------
+# 5. Different task: abstractive summarization
+# Abstractive summarization is more difficult than extractive summarization,
+# which pulls key sentences from a document and combines them to form a “summary.”
+# Because abstractive summarization involves paraphrasing words, it is also more time-consuming;
+# however, it has the potential to produce a more polished and coherent summary.
+from transformers import PegasusForConditionalGeneration, PegasusTokenizer
+import gradio as grad
+mdl_name = "google/pegasus-xsum"
+pegasus_tkn = PegasusTokenizer.from_pretrained(mdl_name)
+mdl = PegasusForConditionalGeneration.from_pretrained(mdl_name)
+def summarize(text):
+    tokens = pegasus_tkn(text, truncation=True, padding="longest", return_tensors="pt")
+    txt_summary = mdl.generate(**tokens)
+    response = pegasus_tkn.batch_decode(txt_summary, skip_special_tokens=True)
+    return response
+txt = grad.Textbox(lines=10, label="English", placeholder="English Text here")
+out = grad.Textbox(lines=10, label="Summary")
+grad.Interface(summarize, inputs=txt, outputs=out).launch()