|
import streamlit as st |
|
from keybert import KeyBERT |
|
|
|
|
|
kw_model = KeyBERT() |
|
|
|
|
|
def main(): |
|
st.title("Keyword Extraction") |
|
st.write("Enter your document below:") |
|
|
|
|
|
doc = st.text_area("Document") |
|
|
|
|
|
remove_stopwords = st.checkbox("Remove Stopwords", value=True) |
|
|
|
|
|
apply_mmr = st.checkbox("Apply Maximal Marginal Relevance (MMR)", value=True) |
|
|
|
|
|
num_results = st.slider("Number of Results", min_value=1, max_value=30, value=5, step=1) |
|
|
|
|
|
min_ngram = st.number_input("Minimum N-gram", value=1, min_value=1, max_value=10, step=1) |
|
|
|
|
|
max_ngram = st.number_input("Maximum N-gram", value=3, min_value=1, max_value=10, step=1) |
|
|
|
|
|
diversity = st.slider("Keyword Diversity (MMR)", min_value=0.0, max_value=1.0, value=0.2, step=0.1, format="%.1f") |
|
|
|
|
|
if st.button("Extract Keywords"): |
|
if apply_mmr: |
|
keywords = kw_model.extract_keywords(doc, |
|
keyphrase_ngram_range=(min_ngram, max_ngram), |
|
stop_words='english' if remove_stopwords else None, |
|
use_mmr=True, |
|
diversity=diversity) |
|
else: |
|
keywords = kw_model.extract_keywords(doc, |
|
keyphrase_ngram_range=(min_ngram, max_ngram), |
|
stop_words='english' if remove_stopwords else None) |
|
|
|
selected_keywords = keywords[:num_results] |
|
|
|
st.write(f"Top {num_results} Keywords:") |
|
for keyword, score in selected_keywords: |
|
st.write(f"- {keyword} (Score: {score})") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|