oceankim's picture
Update app.py
9794d50 verified
raw
history blame
4.46 kB
import gradio
import torch
from transformers import pipeline
from transformers import BertForSequenceClassification, BertTokenizer
tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
summarizer = pipeline('summarization', model='t5-base')
classifier_emotions = ['positive', 'neutral', 'negative']
# classifier_model_name = 'bhadresh-savani/distilbert-base-uncased-emotion'
# classifier_emotions = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise']
def summarize_sentences(sentences_by_emotion, min_length, max_length):
for k in sentences_by_emotion.keys():
if (len(sentences_by_emotion[k])!=0):
text = ' '.join(sentences_by_emotion[k])
summary = summarizer(text, min_length=min_length, max_length=max_length)
print(f"{k.upper()}: {summary[0]['summary_text']}\n")
def chunk_text_to_window_size_and_predict_proba(input_ids, attention_mask, total_len):
"""
This function splits the given input text into chunks of a specified window length,
applies transformer model to each chunk and computes probabilities of each class for each chunk.
The computed probabilities are then appended to a list.
Args:
input_ids (List[int]): List of token ids representing the input text.
attention_mask (List[int]): List of attention masks corresponding to input_ids.
total_len (int): Total length of the input_ids.
Returns:
proba_list (List[torch.Tensor]): List of probability tensors for each chunk.
"""
proba_list = []
start = 0
window_length = 510
loop = True
count = 1
print(f'Total Length: {total_len}')
while loop:
end = start + window_length
# If the end index exceeds total length, set the flag to False and adjust the end index
if (end >= total_len) or (count >= 25):
loop = False
print(f'Start: {start}')
print(f'End: {end}')
# 1 => Define the text chunk
input_ids_chunk = input_ids[start : end]
attention_mask_chunk = attention_mask[start : end]
# 2 => Append [CLS] and [SEP]
input_ids_chunk = [101] + input_ids_chunk + [102]
attention_mask_chunk = [1] + attention_mask_chunk + [1]
#3 Convert regular python list to Pytorch Tensor
input_dict = {
'input_ids' : torch.Tensor([input_ids_chunk]).long(),
'attention_mask' : torch.Tensor([attention_mask_chunk]).int()
}
outputs = model(**input_dict)
decoded = tokenizer.decode(input_ids_chunk)
print(f'Loop Count: {count}')
count = count + 1
print("########:", decoded , ":##############")
probabilities = torch.nn.functional.softmax(outputs[0], dim = -1)
print("########:", probabilities , ":##############")
proba_list.append(probabilities)
start = end
return proba_list
def get_mean_from_proba(proba_list):
"""
This function computes the mean probabilities of class predictions over all the chunks.
Args:
proba_list (List[torch.Tensor]): List of probability tensors for each chunk.
Returns:
mean (torch.Tensor): Mean of the probabilities across all chunks.
"""
# Ensures that gradients are not computed, saving memory
with torch.no_grad():
# Stack the list of tensors into a single tensor
stacks = torch.stack(proba_list)
# Resize the tensor to match the dimensions needed for mean computation
stacks = stacks.resize(stacks.shape[0], stacks.shape[2])
# Compute the mean along the zeroth dimension (i.e., the chunk dimension)
mean = stacks.mean(dim = 0)
return mean
input_ids = tokens['input_ids']
total_len = len(input_ids)
attention_mask = tokens['attention_mask']
proba_list = chunk_text_to_window_size_and_predict_proba(input_ids, attention_mask, total_len )
mean = get_mean_from_proba(proba_list)
sentiment = torch.argmax(mean).item()
if sentiment == 0:
return "Positive Sentiment"
elif sentiment == 1:
return "Negative Sentiment"
else:
return "Neutral"
gr_interface = gradio.Interface(
fn = my_inference_function,
inputs = "text",
outputs = "text"
)
gr_interface.launch()