File size: 7,026 Bytes
f6c6a41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f4632e
 
f6c6a41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import streamlit as st
import preprocessor
import helper
import matplotlib.pyplot as plt
import seaborn as sns



def main():
    st.sidebar.title("Whatsapp Chat Analyzer")
    uploaded_file = st.sidebar.file_uploader("Choose a file")
    if uploaded_file is not None:
        # To read file as bytes:
        bytes_data = uploaded_file.getvalue()
        data = bytes_data.decode("utf-8")
        df = preprocessor.preprocess(data)

        #fetch unique users
        user_list = df['user'].unique().tolist()
        user_list.remove('group_notification')
        user_list.sort()
        user_list.insert(0,"Overall")
        selected_user = st.sidebar.selectbox("Show analysis wrt",user_list)
        if st.sidebar.button("Show Analysis"):
            num_messages,words, num_media_messages ,num_links = helper.fetch_stats(selected_user,df)
            st.title("Top Statistics")
            col1, col2 , col3, col4 = st.columns(4)

            with col1:
                st.header("Total Messages")
                st.title(num_messages)
            with col2:
                st.header("Total Words")
                st.title(words)
            with col3:
                st.header("Media shared")
                st.title(num_media_messages)
            with col4:
                st.header("Links shared")
                st.title(num_links)

            #monthly_timeline
            st.title("Monthly Timeline")
            timeline=helper.monthly_timeline(selected_user,df)
            fig = plt.figure()
            sns.set_style('darkgrid')
            sns.lineplot(x=timeline['time'],y=timeline['message'],color='red')
            plt.xticks(rotation='vertical')
            st.pyplot(fig)

            # daily timeline
            st.title("Daily Timeline")
            daily_timeline = helper.daily_timeline(selected_user, df)
            fig = plt.figure(figsize=(10, 3))
            sns.set_style('whitegrid')
            sns.lineplot(x=daily_timeline['only_date'],y=daily_timeline['message'], color='purple')
            plt.xticks(rotation='vertical')
            plt.xlabel("date")
            st.pyplot(fig)

            #activity map
            st.title('Activity Map')
            col1,col2 = st.columns(2)

            with col1:
                st.header("Most busy day")
                busy_day=helper.week_activity_map(selected_user,df)
                fig = plt.figure()
                sns.set_style('ticks')
                pal=sns.cubehelix_palette(start=2, rot=0, dark=0.5, light=0.9, reverse=True)
                sns.barplot(x=busy_day.index,y=busy_day.values,palette=pal)
                plt.ylabel("messages")
                plt.xticks(rotation='vertical')
                st.pyplot(fig)

            with col2:
                st.header("Most busy month")
                busy_month=helper.month_activity_map(selected_user,df)
                fig=plt.figure()
                sns.set_style('ticks')
                pal=sns.cubehelix_palette(start=0, rot=0, dark=0.2, light=0.9, reverse=True)
                sns.barplot(x=busy_month.index,y=busy_month.values,palette=pal)
                plt.ylabel("messages")
                plt.xticks(rotation='vertical')
                st.pyplot(fig)

            st.title("Weekly Activity Map")
            user_heatmap = helper.activity_heatmap(selected_user, df)
            fig = plt.figure(figsize=(13,4))
            cmap = sns.color_palette("viridis", as_cmap=True)
            sns.heatmap(user_heatmap,cmap=cmap,square=True)
            st.pyplot(fig)

            #finding active users
            if(selected_user=='Overall'):
                st.title('Most active users')
                x,new_df=helper.most_busy_users(df)
                fig=plt.figure()

                col1,col2 = st.columns(2)

                with col1:
                    pal = sns.color_palette("cubehelix")
                    sns.barplot(x=x.index,y=x.values,palette=pal)
                    plt.xticks(rotation='vertical')
                    plt.ylabel('messages')
                    st.pyplot(fig)
                with col2:
                    st.dataframe(new_df)
            #word cloud
            st.title('WordCloud')
            df_wc = helper.create_wordcloud(selected_user,df)
            fig,ax = plt.subplots()
            ax.imshow(df_wc)
            st.pyplot(fig)

            #most common words
            st.title('Most Common Words')
            most_common_df=helper.most_common_words(selected_user,df)
            colors = ['#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231',
                      '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe',
                      '#008080', '#e6beff', '#9a6324', '#fffac8', '#aaffc3',
                      '#808000', '#ffd8b1', '#808080', 'lightgreen', 'lightblue']
            # explosion

            fig = plt.figure()
            # Pie Chart
            plt.pie(most_common_df[1], labels=most_common_df[0], colors=colors,
                    autopct='%0.1f%%', pctdistance=0.9, labeldistance=1, rotatelabels=270, startangle=180,
                    counterclock=False)
            # draw circle
            centre_circle = plt.Circle((0, 0), 0.50, fc='white')
            fig2 = plt.gcf()
            # Adding Circle in Pie chart
            fig2.gca().add_artist(centre_circle)
            st.pyplot(fig)

            #emoji analysys

            emoji_df = helper.emoji_helper(selected_user,df)
            if(emoji_df.shape[0]):
                st.title("Emoji Analysis")
                col1,col2 =st.columns(2)
                with col1:
                    st.dataframe(emoji_df)
                with col2:
                    fig,ax = plt.subplots()
                    plt.rcParams["font.monospace"] = ["Segoe UI Emoji"]
                    plt.rcParams["font.family"] = "monospace"
                    ax.pie(emoji_df[1].head(min(5,emoji_df.shape[0])),labels=emoji_df[0].head(min(5,emoji_df.shape[0])),autopct="%0.2f")
                    st.pyplot(fig)

            #birth_dates
            if(selected_user=='Overall'):
                birth_data = helper.birth_dates(df)
                if(birth_data.shape[0]):
                    st.title("Birth dates of some users.")
                    st.dataframe(birth_data)

            #Sentiment-analysis
            if (selected_user != 'Overall'):
                st.title("Sentiment Analysis")
                sentiment_data,number=helper.sentiment_analysis(selected_user,df)
                fig = plt.figure()
                sns.set_style('ticks')
                pal = sns.cubehelix_palette(start=0.5, rot=0, dark=0.2, light=0.9, reverse=True)
                sns.barplot(x=sentiment_data.index,y=sentiment_data.values,palette=pal)
                plt.xticks(rotation='vertical')
                st.pyplot(fig)
                st.header("Based on random "+str(number)+" messages.")
                st.text("Note : Sentiment Analysis give good results if messages \nare in hinglish (hindi or english or both).")
main()