Himanshusingh
commited on
Commit
•
b9a198e
1
Parent(s):
e483bf1
Update app.py
Browse files
app.py
CHANGED
@@ -9,16 +9,18 @@ tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
|
|
9 |
|
10 |
model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
|
11 |
|
12 |
-
|
13 |
|
|
|
14 |
# classifier_model_name = 'bhadresh-savani/distilbert-base-uncased-emotion'
|
15 |
# classifier_emotions = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise']
|
16 |
|
17 |
-
def
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
22 |
|
23 |
|
24 |
def chunk_text_to_window_size_and_predict_proba(input_ids, attention_mask, total_len):
|
@@ -36,7 +38,7 @@ def chunk_text_to_window_size_and_predict_proba(input_ids, attention_mask, total
|
|
36 |
proba_list (List[torch.Tensor]): List of probability tensors for each chunk.
|
37 |
"""
|
38 |
proba_list = []
|
39 |
-
|
40 |
start = 0
|
41 |
window_length = 510
|
42 |
|
@@ -64,6 +66,9 @@ def chunk_text_to_window_size_and_predict_proba(input_ids, attention_mask, total
|
|
64 |
}
|
65 |
|
66 |
outputs = model(**input_dict)
|
|
|
|
|
|
|
67 |
|
68 |
probabilities = torch.nn.functional.softmax(outputs[0], dim = -1)
|
69 |
proba_list.append(probabilities)
|
@@ -115,6 +120,7 @@ def my_inference_function(sec_text):
|
|
115 |
"""
|
116 |
tokens = tokenizer.encode_plus(sec_text, add_special_tokens=False)
|
117 |
|
|
|
118 |
input_ids = tokens['input_ids']
|
119 |
total_len = len(input_ids)
|
120 |
attention_mask = tokens['attention_mask']
|
|
|
9 |
|
10 |
model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
|
11 |
|
12 |
+
summarizer = pipeline('summarization', model='t5-base')
|
13 |
|
14 |
+
classifier_emotions = ['positive', 'neutral', 'negative']
|
15 |
# classifier_model_name = 'bhadresh-savani/distilbert-base-uncased-emotion'
|
16 |
# classifier_emotions = ['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise']
|
17 |
|
18 |
+
def summarize_sentences(sentences_by_emotion, min_length, max_length):
|
19 |
+
for k in sentences_by_emotion.keys():
|
20 |
+
if (len(sentences_by_emotion[k])!=0):
|
21 |
+
text = ' '.join(sentences_by_emotion[k])
|
22 |
+
summary = summarizer(text, min_length=min_length, max_length=max_length)
|
23 |
+
print(f"{k.upper()}: {summary[0]['summary_text']}\n")
|
24 |
|
25 |
|
26 |
def chunk_text_to_window_size_and_predict_proba(input_ids, attention_mask, total_len):
|
|
|
38 |
proba_list (List[torch.Tensor]): List of probability tensors for each chunk.
|
39 |
"""
|
40 |
proba_list = []
|
41 |
+
|
42 |
start = 0
|
43 |
window_length = 510
|
44 |
|
|
|
66 |
}
|
67 |
|
68 |
outputs = model(**input_dict)
|
69 |
+
|
70 |
+
decoded = tokenizer.decode(input_ids_chunk)
|
71 |
+
print("########:", decoded , ":##############")
|
72 |
|
73 |
probabilities = torch.nn.functional.softmax(outputs[0], dim = -1)
|
74 |
proba_list.append(probabilities)
|
|
|
120 |
"""
|
121 |
tokens = tokenizer.encode_plus(sec_text, add_special_tokens=False)
|
122 |
|
123 |
+
|
124 |
input_ids = tokens['input_ids']
|
125 |
total_len = len(input_ids)
|
126 |
attention_mask = tokens['attention_mask']
|