Spaces:
Runtime error
Runtime error
File size: 7,026 Bytes
f6c6a41 7f4632e f6c6a41 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
import streamlit as st
import preprocessor
import helper
import matplotlib.pyplot as plt
import seaborn as sns
def main():
st.sidebar.title("Whatsapp Chat Analyzer")
uploaded_file = st.sidebar.file_uploader("Choose a file")
if uploaded_file is not None:
# To read file as bytes:
bytes_data = uploaded_file.getvalue()
data = bytes_data.decode("utf-8")
df = preprocessor.preprocess(data)
#fetch unique users
user_list = df['user'].unique().tolist()
user_list.remove('group_notification')
user_list.sort()
user_list.insert(0,"Overall")
selected_user = st.sidebar.selectbox("Show analysis wrt",user_list)
if st.sidebar.button("Show Analysis"):
num_messages,words, num_media_messages ,num_links = helper.fetch_stats(selected_user,df)
st.title("Top Statistics")
col1, col2 , col3, col4 = st.columns(4)
with col1:
st.header("Total Messages")
st.title(num_messages)
with col2:
st.header("Total Words")
st.title(words)
with col3:
st.header("Media shared")
st.title(num_media_messages)
with col4:
st.header("Links shared")
st.title(num_links)
#monthly_timeline
st.title("Monthly Timeline")
timeline=helper.monthly_timeline(selected_user,df)
fig = plt.figure()
sns.set_style('darkgrid')
sns.lineplot(x=timeline['time'],y=timeline['message'],color='red')
plt.xticks(rotation='vertical')
st.pyplot(fig)
# daily timeline
st.title("Daily Timeline")
daily_timeline = helper.daily_timeline(selected_user, df)
fig = plt.figure(figsize=(10, 3))
sns.set_style('whitegrid')
sns.lineplot(x=daily_timeline['only_date'],y=daily_timeline['message'], color='purple')
plt.xticks(rotation='vertical')
plt.xlabel("date")
st.pyplot(fig)
#activity map
st.title('Activity Map')
col1,col2 = st.columns(2)
with col1:
st.header("Most busy day")
busy_day=helper.week_activity_map(selected_user,df)
fig = plt.figure()
sns.set_style('ticks')
pal=sns.cubehelix_palette(start=2, rot=0, dark=0.5, light=0.9, reverse=True)
sns.barplot(x=busy_day.index,y=busy_day.values,palette=pal)
plt.ylabel("messages")
plt.xticks(rotation='vertical')
st.pyplot(fig)
with col2:
st.header("Most busy month")
busy_month=helper.month_activity_map(selected_user,df)
fig=plt.figure()
sns.set_style('ticks')
pal=sns.cubehelix_palette(start=0, rot=0, dark=0.2, light=0.9, reverse=True)
sns.barplot(x=busy_month.index,y=busy_month.values,palette=pal)
plt.ylabel("messages")
plt.xticks(rotation='vertical')
st.pyplot(fig)
st.title("Weekly Activity Map")
user_heatmap = helper.activity_heatmap(selected_user, df)
fig = plt.figure(figsize=(13,4))
cmap = sns.color_palette("viridis", as_cmap=True)
sns.heatmap(user_heatmap,cmap=cmap,square=True)
st.pyplot(fig)
#finding active users
if(selected_user=='Overall'):
st.title('Most active users')
x,new_df=helper.most_busy_users(df)
fig=plt.figure()
col1,col2 = st.columns(2)
with col1:
pal = sns.color_palette("cubehelix")
sns.barplot(x=x.index,y=x.values,palette=pal)
plt.xticks(rotation='vertical')
plt.ylabel('messages')
st.pyplot(fig)
with col2:
st.dataframe(new_df)
#word cloud
st.title('WordCloud')
df_wc = helper.create_wordcloud(selected_user,df)
fig,ax = plt.subplots()
ax.imshow(df_wc)
st.pyplot(fig)
#most common words
st.title('Most Common Words')
most_common_df=helper.most_common_words(selected_user,df)
colors = ['#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231',
'#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe',
'#008080', '#e6beff', '#9a6324', '#fffac8', '#aaffc3',
'#808000', '#ffd8b1', '#808080', 'lightgreen', 'lightblue']
# explosion
fig = plt.figure()
# Pie Chart
plt.pie(most_common_df[1], labels=most_common_df[0], colors=colors,
autopct='%0.1f%%', pctdistance=0.9, labeldistance=1, rotatelabels=270, startangle=180,
counterclock=False)
# draw circle
centre_circle = plt.Circle((0, 0), 0.50, fc='white')
fig2 = plt.gcf()
# Adding Circle in Pie chart
fig2.gca().add_artist(centre_circle)
st.pyplot(fig)
#emoji analysys
emoji_df = helper.emoji_helper(selected_user,df)
if(emoji_df.shape[0]):
st.title("Emoji Analysis")
col1,col2 =st.columns(2)
with col1:
st.dataframe(emoji_df)
with col2:
fig,ax = plt.subplots()
plt.rcParams["font.monospace"] = ["Segoe UI Emoji"]
plt.rcParams["font.family"] = "monospace"
ax.pie(emoji_df[1].head(min(5,emoji_df.shape[0])),labels=emoji_df[0].head(min(5,emoji_df.shape[0])),autopct="%0.2f")
st.pyplot(fig)
#birth_dates
if(selected_user=='Overall'):
birth_data = helper.birth_dates(df)
if(birth_data.shape[0]):
st.title("Birth dates of some users.")
st.dataframe(birth_data)
#Sentiment-analysis
if (selected_user != 'Overall'):
st.title("Sentiment Analysis")
sentiment_data,number=helper.sentiment_analysis(selected_user,df)
fig = plt.figure()
sns.set_style('ticks')
pal = sns.cubehelix_palette(start=0.5, rot=0, dark=0.2, light=0.9, reverse=True)
sns.barplot(x=sentiment_data.index,y=sentiment_data.values,palette=pal)
plt.xticks(rotation='vertical')
st.pyplot(fig)
st.header("Based on random "+str(number)+" messages.")
st.text("Note : Sentiment Analysis give good results if messages \nare in hinglish (hindi or english or both).")
main() |