File size: 1,874 Bytes
f9933ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import spacy
import streamlit as st

def main():
    st.title("Metin Özetleyici")
    document = st.text_area("Metninizi buraya girin:")
    if st.button("Özetle"):
        summary = summarize(document)
        st.subheader("Özet:")
        st.write(summary)

stopwordstxt = open("stopwords.txt", "r", encoding="utf8")
stopwords = stopwordstxt.read()
nlp = spacy.load("tr_core_news_trf")
def main():
    st.title("Text Summarizer")
    document = st.text_area("Enter the document:")
    if st.button("Summarize"):
        summary = summarize(document)
        st.subheader("Summary:")
        st.write(summary)

def summarize(document):
    docx = nlp(document)
    word_frequencies = {}
    for word in docx:
        if word.text not in stopwords:
            if word.text not in word_frequencies.keys():
                word_frequencies[word.text] = 1
            else:
                word_frequencies[word.text] += 1
    maximum_frequency = max(word_frequencies.values())
    for word in word_frequencies.keys():
        word_frequencies[word] = (word_frequencies[word]/maximum_frequency)
    sentence_list = [ sentence for sentence in docx.sents ]
    sentence_scores = {}
    for sent in sentence_list:
        for word in sent:
            if word.text.lower() in word_frequencies.keys():
                if len(sent.text.split(" ")) < 30:
                    if sent not in sentence_scores.keys():
                        sentence_scores[sent] = word_frequencies[word.text.lower()]
                    else: 
                        sentence_scores[sent] += word_frequencies[word.text.lower()]
    from heapq import nlargest
    summarized_sentences = nlargest(7,sentence_scores,key=sentence_scores.get)
    final_sentences = [ w.text for w in summarized_sentences ]
    summary = " ".join(final_sentences)
    return summary
if __name__ == "__main__":
    main()