Sa-m commited on
Commit
6542f5d
1 Parent(s): aa5087f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -15
app.py CHANGED
@@ -142,42 +142,93 @@ def fDistance(text2Party):
142
  mem[x[0]]=x[1]
143
  return normalize(mem)
144
 
145
- def fDistancePlot(text2Party,plotN=20):
146
  '''
147
- most frequent words visualisation
148
  '''
149
  word_tokens_party = word_tokenize(text2Party) #Tokenizing
150
  fdistance = FreqDist(word_tokens_party)
151
- return fdistance.plot(20)
 
 
 
152
 
153
 
154
 
155
- ## UI INTERFACE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
  def analysis(Manifesto,Search):
158
  raw_party = Parsing(Manifesto)
159
  text_Party=clean_text(raw_party)
160
  text_Party= Preprocess(text_Party)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  fdist_Party=fDistance(text_Party)
 
 
 
 
 
 
 
162
  searchRes=concordance(text_Party,Search)
163
  searChRes=clean(searchRes)
164
- # searChRes=searchRes.replace(Search,f"\u0332{Search}\u0332 ")
165
  searChRes=searchRes.replace(Search,"\u0332".join(Search))
166
- return fdist_Party,searChRes
167
 
168
 
169
  Search_txt=gr.inputs.Textbox()
170
  filePdf = gr.inputs.File()
171
  text = gr.outputs.Textbox(label='SEARCHED OUTPUT')
172
- mfw=gr.outputs.Label(label="Most Relevant topics in manifesto")
173
-
174
- gr.Interface(fn=analysis, inputs=[filePdf,Search_txt], outputs=[mfw,text], title='Manifesto Analysis').launch(debug=False,share=True)
175
-
176
-
177
-
178
-
179
-
180
-
181
 
182
 
183
 
 
142
  mem[x[0]]=x[1]
143
  return normalize(mem)
144
 
145
+ def fDistancePlot(text2Party,plotN=30):
146
  '''
147
+ most frequent words visualization
148
  '''
149
  word_tokens_party = word_tokenize(text2Party) #Tokenizing
150
  fdistance = FreqDist(word_tokens_party)
151
+ plt.figure(figsize=(4,6))
152
+ fdistance.plot(plotN)
153
+ plt.savefig('distplot.png')
154
+ plt.clf()
155
 
156
 
157
 
158
+ def getSubjectivity(text):
159
+ return TextBlob(text).sentiment.subjectivity
160
+
161
+ # Create a function to get the polarity
162
+ def getPolarity(text):
163
+ return TextBlob(text).sentiment.polarity
164
+
165
+
166
+ def getAnalysis(score):
167
+ if score < 0:
168
+ return 'Negative'
169
+ elif score == 0:
170
+ return 'Neutral'
171
+ else:
172
+ return 'Positive'
173
+
174
+
175
 
176
  def analysis(Manifesto,Search):
177
  raw_party = Parsing(Manifesto)
178
  text_Party=clean_text(raw_party)
179
  text_Party= Preprocess(text_Party)
180
+
181
+ df = pd.DataFrame(raw_party.split('\n'), columns=['Content'])
182
+ df['Subjectivity'] = df['Content'].apply(getSubjectivity)
183
+ df['Polarity'] = df['Content'].apply(getPolarity)
184
+ df['Analysis on Polarity'] = df['Polarity'].apply(getAnalysis)
185
+ df['Analysis on Subjectivity'] = df['Subjectivity'].apply(getAnalysis)
186
+ plt.title('Sentiment Analysis')
187
+ plt.xlabel('Sentiment')
188
+ plt.ylabel('Counts')
189
+ plt.figure(figsize=(4,6))
190
+ df['Analysis on Polarity'].value_counts().plot(kind ='bar')
191
+ plt.savefig('sentimentAnalysis.png')
192
+ plt.clf()
193
+
194
+ plt.figure(figsize=(4,6))
195
+ df['Analysis on Subjectivity'].value_counts().plot(kind ='bar')
196
+ plt.savefig('sentimentAnalysis2.png')
197
+ plt.clf()
198
+
199
+ wordcloud = WordCloud(max_words=2000, background_color="white",mode="RGB").generate(text_Party)
200
+ plt.figure(figsize=(4,3))
201
+ plt.imshow(wordcloud, interpolation="bilinear")
202
+ plt.axis("off")
203
+ plt.savefig('wordcloud.png')
204
+ plt.clf()
205
+
206
  fdist_Party=fDistance(text_Party)
207
+ fDistancePlot(text_Party)
208
+
209
+ img1=cv2.imread('../sentimentAnalysis.png')
210
+ img2=cv2.imread('../wordcloud.png')
211
+ img3=cv2.imread('../sentimentAnalysis2.png')
212
+ img4=cv2.imread('../distplot.png')
213
+
214
  searchRes=concordance(text_Party,Search)
215
  searChRes=clean(searchRes)
 
216
  searChRes=searchRes.replace(Search,"\u0332".join(Search))
217
+ return searChRes,fdist_Party,img4,img1,img2,img3
218
 
219
 
220
  Search_txt=gr.inputs.Textbox()
221
  filePdf = gr.inputs.File()
222
  text = gr.outputs.Textbox(label='SEARCHED OUTPUT')
223
+ mfw=gr.outputs.Label(label="Most Relevant Topics")
224
+ # mfw2=gr.outputs.Image(label="Most Relevant Topics Plot")
225
+ plot1=gr.outputs. Image(label='Sentiment Analysis')
226
+ plot2=gr.outputs.Image(label='Word Cloud')
227
+ plot3=gr.outputs.Image(label='Subjectivity')
228
+ plot4=gr.outputs.Image(label='Frequency Distribution')
229
+
230
+ io=gr.Interface(fn=analysis, inputs=[filePdf,Search_txt], outputs=[text,mfw,plot4,plot1,plot2,plot3], title='Manifesto Analysis')
231
+ io.launch(debug=False,share=True)
232
 
233
 
234