File size: 4,462 Bytes
29c5704 b3301a4 37fc349 47f38d5 37fc349 b3301a4 37fc349 47f38d5 37fc349 062e6a2 b9a198e 47f38d5 b9a198e 47f38d5 b9a198e 37fc349 b9a198e 37fc349 aa47040 6469f49 37fc349 ffe7a35 37fc349 9eb6d05 37fc349 6469f49 37fc349 b9a198e 215e416 aa47040 b9a198e 37fc349 567a866 37fc349 dcabcbb 286dd08 37fc349 71e2445 b9a198e 37fc349 71e2445 37fc349 e483bf1 37fc349 29c5704 7af1a44 a11ee17 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import gradio
import torch
from transformers import pipeline
from transformers import BertForSequenceClassification, BertTokenizer
tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
summarizer = pipeline('summarization', model='t5-base')
classifier_emotions = ['positive', 'neutral', 'negative']
# classifier_model_name = 'bhadresh-savani/distilbert-base-uncased-emotion'
# classifier_emotions = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise']
def summarize_sentences(sentences_by_emotion, min_length, max_length):
for k in sentences_by_emotion.keys():
if (len(sentences_by_emotion[k])!=0):
text = ' '.join(sentences_by_emotion[k])
summary = summarizer(text, min_length=min_length, max_length=max_length)
print(f"{k.upper()}: {summary[0]['summary_text']}\n")
def chunk_text_to_window_size_and_predict_proba(input_ids, attention_mask, total_len):
"""
This function splits the given input text into chunks of a specified window length,
applies transformer model to each chunk and computes probabilities of each class for each chunk.
The computed probabilities are then appended to a list.
Args:
input_ids (List[int]): List of token ids representing the input text.
attention_mask (List[int]): List of attention masks corresponding to input_ids.
total_len (int): Total length of the input_ids.
Returns:
proba_list (List[torch.Tensor]): List of probability tensors for each chunk.
"""
proba_list = []
start = 0
window_length = 510
loop = True
count = 1
print(f'Total Length: {total_len}')
while loop:
end = start + window_length
# If the end index exceeds total length, set the flag to False and adjust the end index
if (end >= total_len) or (count >= 25):
loop = False
print(f'Start: {start}')
print(f'End: {end}')
# 1 => Define the text chunk
input_ids_chunk = input_ids[start : end]
attention_mask_chunk = attention_mask[start : end]
# 2 => Append [CLS] and [SEP]
input_ids_chunk = [101] + input_ids_chunk + [102]
attention_mask_chunk = [1] + attention_mask_chunk + [1]
#3 Convert regular python list to Pytorch Tensor
input_dict = {
'input_ids' : torch.Tensor([input_ids_chunk]).long(),
'attention_mask' : torch.Tensor([attention_mask_chunk]).int()
}
outputs = model(**input_dict)
decoded = tokenizer.decode(input_ids_chunk)
print(f'Loop Count: {count}')
count = count + 1
print("########:", decoded , ":##############")
probabilities = torch.nn.functional.softmax(outputs[0], dim = -1)
print("########:", probabilities , ":##############")
proba_list.append(probabilities)
start = end
return proba_list
def get_mean_from_proba(proba_list):
"""
This function computes the mean probabilities of class predictions over all the chunks.
Args:
proba_list (List[torch.Tensor]): List of probability tensors for each chunk.
Returns:
mean (torch.Tensor): Mean of the probabilities across all chunks.
"""
# Ensures that gradients are not computed, saving memory
with torch.no_grad():
# Stack the list of tensors into a single tensor
stacks = torch.stack(proba_list)
# Resize the tensor to match the dimensions needed for mean computation
stacks = stacks.resize(stacks.shape[0], stacks.shape[2])
# Compute the mean along the zeroth dimension (i.e., the chunk dimension)
mean = stacks.mean(dim = 0)
return mean
input_ids = tokens['input_ids']
total_len = len(input_ids)
attention_mask = tokens['attention_mask']
proba_list = chunk_text_to_window_size_and_predict_proba(input_ids, attention_mask, total_len )
mean = get_mean_from_proba(proba_list)
sentiment = torch.argmax(mean).item()
if sentiment == 0:
return "Positive Sentiment"
elif sentiment == 1:
return "Negative Sentiment"
else:
return "Neutral"
gr_interface = gradio.Interface(
fn = my_inference_function,
inputs = "text",
outputs = "text"
)
gr_interface.launch()
|