Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -114,6 +114,33 @@ def concordance(text_Party,strng):
|
|
114 |
s=result.getvalue().splitlines()
|
115 |
return result.getvalue()
|
116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
def normalize(d, target=1.0):
|
119 |
raw = sum(d.values())
|
@@ -356,8 +383,8 @@ def analysis(Manifesto,Search):
|
|
356 |
fdist_Party=fDistance(text_Party)
|
357 |
img4=fDistancePlot(text_Party)
|
358 |
img5=DispersionPlot(text_Party)
|
359 |
-
|
360 |
-
searchRes=
|
361 |
searChRes=clean(searchRes)
|
362 |
searChRes=searchRes.replace(Search,"\u0332".join(Search))
|
363 |
return searChRes,fdist_Party,img1,img2,img3,img4,img5
|
@@ -373,7 +400,7 @@ plot3=gr.outputs.Image(label='Word Cloud')
|
|
373 |
plot4=gr.outputs.Image(label='Frequency Distribution')
|
374 |
plot5=gr.outputs.Image(label='Dispersion Plot')
|
375 |
|
376 |
-
io=gr.Interface(fn=analysis, inputs=[filePdf,Search_txt], outputs=[text,mfw,plot1,plot2,plot3,plot4,plot5], title='Manifesto Analysis',examples=[['manifestos/Bjp_Manifesto_2019.pdf','modi'],['manifestos/AAP_Manifesto_2019.pdf','delhi'],['manifestos/Congress_Manifesto_2019.pdf','safety']],theme='peach')
|
377 |
io.launch(debug=True,share=False)
|
378 |
#,examples=[['./Bjp_Manifesto_2019.pdf','india'],['./AAP_Manifesto_2019.pdf',],['./Congress_Manifesto_2019.pdf',]]
|
379 |
#allow_screenshot=False, allow_flagging="never",
|
|
|
114 |
s=result.getvalue().splitlines()
|
115 |
return result.getvalue()
|
116 |
|
117 |
+
def get_all_phases_containing_tar_wrd(target_word, tar_passage, left_margin = 10, right_margin = 10):
|
118 |
+
"""
|
119 |
+
Function to get all the phases that contain the target word in a text/passage tar_passage.
|
120 |
+
Workaround to save the output given by nltk Concordance function
|
121 |
+
|
122 |
+
str target_word, str tar_passage int left_margin int right_margin --> list of str
|
123 |
+
left_margin and right_margin allocate the number of words/pununciation before and after target word
|
124 |
+
Left margin will take note of the beginning of the text
|
125 |
+
"""
|
126 |
+
## Create list of tokens using nltk function
|
127 |
+
tokens = nltk.word_tokenize(tar_passage)
|
128 |
+
|
129 |
+
## Create the text of tokens
|
130 |
+
text = nltk.Text(tokens)
|
131 |
+
|
132 |
+
## Collect all the index or offset position of the target word
|
133 |
+
c = nltk.ConcordanceIndex(text.tokens, key = lambda s: s.lower())
|
134 |
+
|
135 |
+
## Collect the range of the words that is within the target word by using text.tokens[start;end].
|
136 |
+
## The map function is use so that when the offset position - the target range < 0, it will be default to zero
|
137 |
+
concordance_txt = ([text.tokens[list(map(lambda x: x-5 if (x-left_margin)>0 else 0,[offset]))[0]:offset+right_margin] for offset in c.offsets(target_word)])
|
138 |
+
|
139 |
+
## join the sentences for each of the target phrase and return it
|
140 |
+
result = [''.join([x.replace("Y","")+' ' for x in con_sub]) for con_sub in concordance_txt][:-1]
|
141 |
+
res='. '.join(result)
|
142 |
+
return res
|
143 |
+
|
144 |
|
145 |
def normalize(d, target=1.0):
|
146 |
raw = sum(d.values())
|
|
|
383 |
fdist_Party=fDistance(text_Party)
|
384 |
img4=fDistancePlot(text_Party)
|
385 |
img5=DispersionPlot(text_Party)
|
386 |
+
#concordance(text_Party,Search)
|
387 |
+
searchRes=get_all_phases_containing_tar_wrd(text_Party,Search)
|
388 |
searChRes=clean(searchRes)
|
389 |
searChRes=searchRes.replace(Search,"\u0332".join(Search))
|
390 |
return searChRes,fdist_Party,img1,img2,img3,img4,img5
|
|
|
400 |
plot4=gr.outputs.Image(label='Frequency Distribution')
|
401 |
plot5=gr.outputs.Image(label='Dispersion Plot')
|
402 |
|
403 |
+
io=gr.Interface(fn=analysis, inputs=[filePdf,Search_txt], outputs=[text,mfw,plot1,plot2,plot3,plot4,plot5], title='Manifesto Analysis',examples=[['manifestos/Bjp_Manifesto_2019.pdf','modi'],['manifestos/AAP_Manifesto_2019.pdf','delhi'],['manifestos/Congress_Manifesto_2019.pdf','safety']],theme='dark-peach')
|
404 |
io.launch(debug=True,share=False)
|
405 |
#,examples=[['./Bjp_Manifesto_2019.pdf','india'],['./AAP_Manifesto_2019.pdf',],['./Congress_Manifesto_2019.pdf',]]
|
406 |
#allow_screenshot=False, allow_flagging="never",
|