File size: 7,665 Bytes
32664e6 3920fee 32664e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import requests
import streamlit as st
import torch
import joblib
import tensorflow as tf
from transformers import AutoTokenizer, LEDForConditionalGeneration
from tensorflow.keras.models import load_model
from transformers import TFBertForSequenceClassification, BertTokenizer
st.set_page_config(page_title="Summarization&tweet_analysis", page_icon="📈",layout="wide")
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {
visibility: hidden;
}
footer:after {
content:'©2023 Sravathi AI Technology. All rights reserved.';
visibility: visible;
display: block;
position: relative;
#background-color: red;
padding: 5px;
top: 2px;
}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
import pandas as pd
import time
import sys
import pickle
#from stqdm import stqdm
import base64
#from tensorflow.keras.preprocessing.text import Tokenizer
#from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import json
import os
import re
#from tensorflow.keras.models import load_model
#st.write("API examples - Dermatophagoides, Miconazole, neomycin,Iothalamate")
#background_image = sys.path[1]+"/streamlit_datafile_links/audience-1853662_960_720.jpg" # Path to your background image
def add_bg_from_local(image_file):
with open(image_file, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read())
st.markdown(
f"""
<style>
.stApp {{
background-image: url(data:image/{"jpg"};base64,{encoded_string.decode()});
background-size: cover
}}
</style>
""",
unsafe_allow_html=True
)
#add_bg_from_local(background_image)
#@st.cache
st.header('Summarization & tweet_analysis')
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv(index=False).encode('utf-8')
col1, col2 = st.columns([4,1])
result_csv_batch_sql = result_csv_batch_fail=result_csv_batch=result_csv4=result_csv3=result_csv1=result_csv2=0
with col1:
models = st.selectbox(
'Select the option',
('summarization_model1','tweet_analysis' ))
#try:
if models == 'summarization_model1':
st.markdown("")
else:
st.markdown("")
with st.form("form1"):
hide_label = """
<style>
.css-9ycgxx {
display: none;
}
</style>
"""
text_data = st.text_input('Enter the text')
print(text_data)
st.markdown(hide_label, unsafe_allow_html=True)
submitted = st.form_submit_button("Submit")
if submitted:
if models == 'summarization_model1':
#torch.cuda.set_device(2)
tokenizer = AutoTokenizer.from_pretrained('allenai/PRIMERA-multinews')
model = LEDForConditionalGeneration.from_pretrained('allenai/PRIMERA-multinews')
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # get the device
device = "cpu"
model.to(device) # move the model to the device
documents = text_data
# Tokenize and encode the documents
inputs = tokenizer(documents, return_tensors='pt', padding=True, truncation=True,max_length=1000000)
# Move the inputs to the device
inputs = inputs.to(device)
# Generate summaries
outputs = model.generate(**inputs,max_length=1000000)
# Decode the summaries
st.write(tokenizer.batch_decode(outputs, skip_special_tokens=True))
st.success('Prediction done successfully!', icon="✅")
else:
# Define the custom objects (custom layers) needed for loading the model
custom_objects = {"TFBertForSequenceClassification": TFBertForSequenceClassification}
# Load the best model checkpoint
best_model = load_model('best_model_checkpoint_val_acc_0.8697_epoch_03.h5', custom_objects=custom_objects)
# Assuming you already have the test set DataFrame (df_test) and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
test_encodings = tokenizer(text_data, padding=True, truncation=True, return_tensors='tf')
test_dataset = tf.data.Dataset.from_tensor_slices((dict(test_encodings)))
# Make predictions on the test set using the loaded model
predictions_probabilities = best_model.predict(test_dataset.batch(8))
# Convert probabilities to one-hot encoded predictions
predictions_onehot = np.eye(9)[np.argmax(predictions_probabilities, axis=1)]
# Display or save the DataFrame with predicted labels
index_arg = np.argmax(predictions_probabilities, axis=1)
# Later, you can load the LabelEncoder
label_encoder = joblib.load('label_encoder.joblib')
result_label = label_encoder.inverse_transform(index_arg)
# Display or save the DataFrame with predicted labels
st.write("Item name: ", result_label[0])
from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
from scipy.special import softmax
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)
# PT
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
#model.save_pretrained(MODEL)
#text = "Covid cases are increasing fast!"
pred_label = []
pred_scor = []
def preprocess(text):
new_text = []
for t in text.split(" "):
t = '@user' if t.startswith('@') and len(t) > 1 else t
t = 'http' if t.startswith('http') else t
new_text.append(t)
return " ".join(new_text)
def predict_pret(text):
#print(text)
text = preprocess(text)
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
ranking = np.argsort(scores)
ranking = ranking[::-1]
l = config.id2label[ranking[0]]
s = scores[ranking[0]]
return l,s
l,s = predict_pret(text_data)
st.write("Sentiment is: ", l)
st.success('Prediction done successfully!', icon="✅")
_='''
except Exception as e:
if 'NoneType' or 'not defined' in str(e):
st.warning('Enter the required inputs', icon="⚠️")
else:
st.warning(str(e), icon="⚠️")
'''
for i in range(30):
st.markdown('##') |