Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import plotly.express as pe | |
from wordcloud import WordCloud, STOPWORDS | |
from nltk import FreqDist | |
st.set_option('deprecation.showPyplotGlobalUse', False) | |
st.title("Welcome To The Explore Page: ") | |
st.markdown("On this page you will be able to some EDA Visuals") | |
##loading my dataset | |
data= pd.read_csv("datasets/Train.csv") | |
clean_data= pd.read_csv("datasets/clean_copy.csv") | |
clean_data= clean_data.dropna() | |
##plotting my wordcloud for the unclean dataset | |
unclean_words= " ".join(data["safe_text"]) | |
wc= WordCloud(stopwords=STOPWORDS).generate(unclean_words) | |
plt.figure(figsize= (5,10)) | |
plt.title("Most common Words in unclean Dataset") | |
plt.imshow(wc) | |
st.pyplot() | |
##creating a wordcloud of my most common word in cleaned tweet | |
clean_words= ' '.join(clean_data["clean_tweet"]).split() ##converting the dataframe to corpus of words | |
freq_words= pd.DataFrame(FreqDist(clean_words).most_common(20), columns= ["word", "count"]) | |
fig= pe.treemap(data_frame=freq_words, path=["word"], values= "count", title= "Top 20 Most Frequent Words After Cleaning") | |
st.plotly_chart(fig) | |
##getting the tweet lengths | |
data["tweet_length"]= [len(i.split(" ")) for i in data["safe_text"]] | |
words= data["tweet_length"].value_counts().reset_index() | |
fig_2= pe.scatter(data_frame=words, x="tweet_length", y="count", size= "count", color= "tweet_length", title= "Tweet Lenghts") | |
st.plotly_chart(fig_2) |