import streamlit as st import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt import plotly.express as pe from wordcloud import WordCloud, STOPWORDS from nltk import FreqDist st.set_option('deprecation.showPyplotGlobalUse', False) st.title("Welcome To The Explore Page: ") st.markdown("On this page you will be able to some EDA Visuals") ##loading my dataset data= pd.read_csv("datasets/Train.csv") clean_data= pd.read_csv("datasets/clean_copy.csv") clean_data= clean_data.dropna() ##plotting my wordcloud for the unclean dataset unclean_words= " ".join(data["safe_text"]) wc= WordCloud(stopwords=STOPWORDS).generate(unclean_words) plt.figure(figsize= (5,10)) plt.title("Most common Words in unclean Dataset") plt.imshow(wc) st.pyplot() ##creating a wordcloud of my most common word in cleaned tweet clean_words= ' '.join(clean_data["clean_tweet"]).split() ##converting the dataframe to corpus of words freq_words= pd.DataFrame(FreqDist(clean_words).most_common(20), columns= ["word", "count"]) fig= pe.treemap(data_frame=freq_words, path=["word"], values= "count", title= "Top 20 Most Frequent Words After Cleaning") st.plotly_chart(fig) ##getting the tweet lengths data["tweet_length"]= [len(i.split(" ")) for i in data["safe_text"]] words= data["tweet_length"].value_counts().reset_index() fig_2= pe.scatter(data_frame=words, x="tweet_length", y="count", size= "count", color= "tweet_length", title= "Tweet Lenghts") st.plotly_chart(fig_2)