File size: 2,170 Bytes
074115a
 
 
 
 
 
 
 
 
 
 
 
 
 
5ea6ee6
 
 
 
 
 
 
 
2020d9c
c6e9f07
 
c04970e
c6e9f07
 
c04970e
a9605ff
 
 
c6e9f07
 
a9605ff
 
 
 
 
 
 
 
 
 
c04970e
c6e9f07
c04970e
5ea6ee6
c6e9f07
074115a
 
 
 
4e1a32f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import streamlit as st
from keybert import KeyBERT

# Create a KeyBERT instance
kw_model = KeyBERT()

# Define the Streamlit app
def main():
    st.title("Keyword Extraction")
    st.write("Enter your document below:")

    # Get user input
    doc = st.text_area("Document")

    # Get user choice for stopwords removal (default checkbox)
    remove_stopwords = st.checkbox("Remove Stopwords", value=True)

    # Get user choice for MMR (default checkbox)
    apply_mmr = st.checkbox("Apply Maximal Marginal Relevance (MMR)", value=True)

    # Get user choice for number of results (slider)
    num_results = st.slider("Number of Results", min_value=1, max_value=30, value=5, step=1)

    # Get user choice for minimum n-gram value (default textbox)
    min_ngram = st.number_input("Minimum N-gram", value=1, min_value=1, max_value=10, step=1)

    # Get user choice for maximum n-gram value (default textbox)
    max_ngram = st.number_input("Maximum N-gram", value=3, min_value=1, max_value=10, step=1)

    # Get user choice for keyword diversity (MMR only)
    diversity = st.slider("Keyword Diversity (MMR)", min_value=0.0, max_value=1.0, value=0.2, step=0.1, format="%.1f")

    # Extract keywords
    if st.button("Extract Keywords"):
        if apply_mmr:
            keywords = kw_model.extract_keywords(doc,
                                                 keyphrase_ngram_range=(min_ngram, max_ngram),
                                                 stop_words='english' if remove_stopwords else None,
                                                 use_mmr=True,
                                                 diversity=diversity)
        else:
            keywords = kw_model.extract_keywords(doc,
                                                 keyphrase_ngram_range=(min_ngram, max_ngram),
                                                 stop_words='english' if remove_stopwords else None)

        selected_keywords = keywords[:num_results]

        st.write(f"Top {num_results} Keywords:")
        for keyword, score in selected_keywords:
            st.write(f"- {keyword} (Score: {score})")

# Run the app
if __name__ == "__main__":
    main()