run480 commited on
Commit
04e983e
1 Parent(s): 02a75ae

Update app.py

Browse files

New task: abstractive summarization.

Files changed (1) hide show
  1. app.py +69 -18
app.py CHANGED
@@ -1,13 +1,28 @@
1
- from transformers import AutoModel, AutoModelForSeq2SeqLM, AutoModelForQuestionAnswering, AutoTokenizer, pipeline
2
- import gradio as grad
3
- import ast
4
-
5
  # 1. The RoBERTa base model is used, fine-tuned using the SQuAD 2.0 dataset.
6
  # It’s been trained on question-answer pairs, including unanswerable questions, for the task of question and answering.
 
 
 
 
 
7
  # mdl_name = "deepset/roberta-base-squad2"
8
  # my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name)
9
 
10
- # 2. Different model.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # mdl_name = "distilbert-base-cased-distilled-squad"
12
  # my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name)
13
 
@@ -19,7 +34,12 @@ import ast
19
 
20
  # grad.Interface(answer_question, inputs=["text","text"], outputs="text").launch()
21
 
 
22
  # 3. Different task: language translation.
 
 
 
 
23
  # First model translates English to German.
24
  # mdl_name = "Helsinki-NLP/opus-mt-en-de"
25
  # opus_translator = pipeline("translation", model=mdl_name)
@@ -30,18 +50,49 @@ import ast
30
 
31
  # grad.Interface(translate, inputs=["text",], outputs="text").launch()
32
 
 
33
  # 4. Language translation without pipeline API.
34
  # Second model translates English to French.
35
- mdl_name = "Helsinki-NLP/opus-mt-en-fr"
36
- mdl = AutoModelForSeq2SeqLM.from_pretrained(mdl_name)
37
- my_tkn = AutoTokenizer.from_pretrained(mdl_name)
38
-
39
- def translate(text):
40
- inputs = my_tkn(text, return_tensors="pt")
41
- trans_output = mdl.generate(**inputs)
42
- response = my_tkn.decode(trans_output[0], skip_special_tokens=True)
43
- return response
44
-
45
- txt=grad.Textbox(lines=1, label="English", placeholder="English Text here")
46
- out=grad.Textbox(lines=1, label="French")
47
- grad.Interface(translate, inputs=txt, outputs=out).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # 1. The RoBERTa base model is used, fine-tuned using the SQuAD 2.0 dataset.
2
  # It’s been trained on question-answer pairs, including unanswerable questions, for the task of question and answering.
3
+
4
+ # from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
5
+ # import gradio as grad
6
+ # import ast
7
+
8
  # mdl_name = "deepset/roberta-base-squad2"
9
  # my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name)
10
 
11
+ # def answer_question(question,context):
12
+ # text= "{"+"'question': '"+question+"','context': '"+context+"'}"
13
+ # di=ast.literal_eval(text)
14
+ # response = my_pipeline(di)
15
+ # return response
16
+
17
+ # grad.Interface(answer_question, inputs=["text","text"], outputs="text").launch()
18
+
19
+ #---------------------------------------------------------------------------------
20
+ # 2. Same task, different model.
21
+
22
+ # from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
23
+ # import gradio as grad
24
+ # import ast
25
+
26
  # mdl_name = "distilbert-base-cased-distilled-squad"
27
  # my_pipeline = pipeline('question-answering', model=mdl_name, tokenizer=mdl_name)
28
 
 
34
 
35
  # grad.Interface(answer_question, inputs=["text","text"], outputs="text").launch()
36
 
37
+ #---------------------------------------------------------------------------------
38
  # 3. Different task: language translation.
39
+
40
+ # from transformers import pipeline
41
+ # import gradio as grad
42
+
43
  # First model translates English to German.
44
  # mdl_name = "Helsinki-NLP/opus-mt-en-de"
45
  # opus_translator = pipeline("translation", model=mdl_name)
 
50
 
51
  # grad.Interface(translate, inputs=["text",], outputs="text").launch()
52
 
53
+ #----------------------------------------------------------------------------------
54
  # 4. Language translation without pipeline API.
55
  # Second model translates English to French.
56
+
57
+ # from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
58
+ # import gradio as grad
59
+
60
+ # mdl_name = "Helsinki-NLP/opus-mt-en-fr"
61
+ # mdl = AutoModelForSeq2SeqLM.from_pretrained(mdl_name)
62
+ # my_tkn = AutoTokenizer.from_pretrained(mdl_name)
63
+
64
+ # def translate(text):
65
+ # inputs = my_tkn(text, return_tensors="pt")
66
+ # trans_output = mdl.generate(**inputs)
67
+ # response = my_tkn.decode(trans_output[0], skip_special_tokens=True)
68
+ # return response
69
+
70
+ # txt = grad.Textbox(lines=1, label="English", placeholder="English Text here")
71
+ # out = grad.Textbox(lines=1, label="French")
72
+ # grad.Interface(translate, inputs=txt, outputs=out).launch()
73
+
74
+ #-----------------------------------------------------------------------------------
75
+ # 5. Different task: abstractive summarization
76
+ # Abstractive summarization is more difficult than extractive summarization,
77
+ # which pulls key sentences from a document and combines them to form a “summary.”
78
+ # Because abstractive summarization involves paraphrasing words, it is also more time-consuming;
79
+ # however, it has the potential to produce a more polished and coherent summary.
80
+
81
+ from transformers import PegasusForConditionalGeneration, PegasusTokenizer
82
+ import gradio as grad
83
+
84
+ mdl_name = "google/pegasus-xsum"
85
+ pegasus_tkn = PegasusTokenizer.from_pretrained(mdl_name)
86
+ mdl = PegasusForConditionalGeneration.from_pretrained(mdl_name)
87
+
88
+ def summarize(text):
89
+     tokens = pegasus_tkn(text, truncation=True, padding="longest", return_tensors="pt")
90
+     txt_summary = mdl.generate(**tokens)
91
+     response = pegasus_tkn.batch_decode(txt_summary, skip_special_tokens=True)
92
+     return response
93
+
94
+ txt = grad.Textbox(lines=10, label="English", placeholder="English Text here")
95
+ out = grad.Textbox(lines=10, label="Summary")
96
+
97
+ grad.Interface(summarize, inputs=txt, outputs=out).launch()
98
+