File size: 7,665 Bytes
32664e6
 
 
3920fee
 
32664e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import requests
import streamlit as st
import torch
import joblib
import tensorflow as tf
from transformers import AutoTokenizer, LEDForConditionalGeneration
from tensorflow.keras.models import load_model
from transformers import TFBertForSequenceClassification, BertTokenizer
st.set_page_config(page_title="Summarization&tweet_analysis", page_icon="📈",layout="wide")
hide_streamlit_style = """
            <style>
            #MainMenu {visibility: hidden;}
            footer {	
	visibility: hidden;
	
	}
footer:after {
	content:'©2023 Sravathi AI Technology. All rights reserved.'; 
	visibility: visible;
	display: block;
	position: relative;
	#background-color: red;
	padding: 5px;
	top: 2px;
}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
import pandas as pd
import time
import sys
import pickle
#from stqdm import stqdm
import base64
#from tensorflow.keras.preprocessing.text import Tokenizer
#from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import json
import os
import re

#from tensorflow.keras.models import load_model
#st.write("API examples - Dermatophagoides, Miconazole, neomycin,Iothalamate")

#background_image = sys.path[1]+"/streamlit_datafile_links/audience-1853662_960_720.jpg"  # Path to your background image
def add_bg_from_local(image_file):
    with open(image_file, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read())
    st.markdown(
    f"""
    <style>
    .stApp {{
        background-image: url(data:image/{"jpg"};base64,{encoded_string.decode()});
        background-size: cover
    }}
    </style>
    """,
    unsafe_allow_html=True
    )
#add_bg_from_local(background_image)  

#@st.cache
st.header('Summarization & tweet_analysis')
def convert_df(df):
                # IMPORTANT: Cache the conversion to prevent computation on every rerun
                return df.to_csv(index=False).encode('utf-8')  
col1, col2 = st.columns([4,1])
result_csv_batch_sql = result_csv_batch_fail=result_csv_batch=result_csv4=result_csv3=result_csv1=result_csv2=0

with col1:
    models = st.selectbox(
    'Select the option',
    ('summarization_model1','tweet_analysis' ))
    #try: 
    if models == 'summarization_model1':   
        st.markdown("")
    else:
         st.markdown("")
    with st.form("form1"):
        hide_label = """
        <style>
            .css-9ycgxx {
                display: none;
            }
        </style>
        """
        text_data = st.text_input('Enter the text')
        print(text_data)
        st.markdown(hide_label, unsafe_allow_html=True)            
        submitted = st.form_submit_button("Submit")
        if submitted:
            if models == 'summarization_model1': 
                #torch.cuda.set_device(2)
                tokenizer = AutoTokenizer.from_pretrained('allenai/PRIMERA-multinews')
                model = LEDForConditionalGeneration.from_pretrained('allenai/PRIMERA-multinews')
                #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # get the device
                device = "cpu"
                model.to(device) # move the model to the device  
                documents = text_data
    
                # Tokenize and encode the documents
                inputs = tokenizer(documents, return_tensors='pt', padding=True, truncation=True,max_length=1000000)
    
                # Move the inputs to the device
                inputs = inputs.to(device)
    
                # Generate summaries
                outputs = model.generate(**inputs,max_length=1000000)
    
                # Decode the summaries
                st.write(tokenizer.batch_decode(outputs, skip_special_tokens=True))                          
                st.success('Prediction done successfully!', icon="✅")  
            else:
                    # Define the custom objects (custom layers) needed for loading the model
                    custom_objects = {"TFBertForSequenceClassification": TFBertForSequenceClassification}
                    
                    # Load the best model checkpoint
                    best_model = load_model('best_model_checkpoint_val_acc_0.8697_epoch_03.h5', custom_objects=custom_objects)

                    # Assuming you already have the test set DataFrame (df_test) and tokenizer
                    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
                    test_encodings = tokenizer(text_data, padding=True, truncation=True, return_tensors='tf')
                    test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encodings)))

                    # Make predictions on the test set using the loaded model
                    predictions_probabilities = best_model.predict(test_dataset.batch(8))

                    # Convert probabilities to one-hot encoded predictions
                    predictions_onehot = np.eye(9)[np.argmax(predictions_probabilities, axis=1)]

                    # Display or save the DataFrame with predicted labels
                    index_arg = np.argmax(predictions_probabilities, axis=1)
                    # Later, you can load the LabelEncoder
                    label_encoder = joblib.load('label_encoder.joblib')
                    result_label = label_encoder.inverse_transform(index_arg)

                    # Display or save the DataFrame with predicted labels
                    st.write("Item name: ", result_label[0])
                    
                    from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
                    from scipy.special import softmax

                    MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
                    tokenizer = AutoTokenizer.from_pretrained(MODEL)
                    config = AutoConfig.from_pretrained(MODEL)
                    # PT
                    model = AutoModelForSequenceClassification.from_pretrained(MODEL)
                    #model.save_pretrained(MODEL)
                    #text = "Covid cases are increasing fast!"
                    pred_label = []
                    pred_scor = []
                    def preprocess(text):
                        new_text = []
                        for t in text.split(" "):
                            t = '@user' if t.startswith('@') and len(t) > 1 else t
                            t = 'http' if t.startswith('http') else t
                            new_text.append(t)
                        return " ".join(new_text)
                    def predict_pret(text):
                        #print(text)
                        text = preprocess(text)
                        encoded_input = tokenizer(text, return_tensors='pt')
                        output = model(**encoded_input)
                        scores = output[0][0].detach().numpy()
                        scores = softmax(scores)

                        ranking = np.argsort(scores)
                        ranking = ranking[::-1]
                        l = config.id2label[ranking[0]]
                        s = scores[ranking[0]]
                        return l,s

                    l,s = predict_pret(text_data)

                    st.write("Sentiment is: ", l)

                    st.success('Prediction done successfully!', icon="✅")                 
        _='''
        except Exception as e:
            if 'NoneType' or 'not defined' in str(e):
                st.warning('Enter the required inputs', icon="⚠️")
            else:          
                st.warning(str(e), icon="⚠️")
        ''' 

for i in range(30):
    st.markdown('##')