Spaces:

kaisugi
/

academic-paraphraser

Runtime error

App Files Files Community

kaisugi commited on Feb 13, 2023

Commit

7db6000

1 Parent(s): e1a3f25

update

Browse files

Files changed (1) hide show

app.py +15 -5

app.py CHANGED Viewed

@@ -62,7 +62,7 @@ def load_sentence_embeddings_and_index():
 @st.cache(allow_output_mutation=True)
-def get_retrieval_results(index, input_text, top_k, model, tokenizer, sentence_df):
     with torch.no_grad():
         inputs = tokenizer.encode_plus(
             input_text,
@@ -78,11 +78,19 @@ def get_retrieval_results(index, input_text, top_k, model, tokenizer, sentence_d
     _, ids = index.search(x=np.array([query_embeddings]), k=top_k)
     retrieved_sentences = []
     for id in ids[0]:
         retrieved_sentences.append(sentence_df.loc[id, "sentence"])
-    return pd.DataFrame({"sentences": retrieved_sentences})
 if __name__ == "__main__":
@@ -93,9 +101,11 @@ if __name__ == "__main__":
     st.markdown("## AI-based Paraphrasing for Academic Writing")
-    input_text = st.text_area("text input", "Model have good results.", placeholder="Write something here...")
-    top_k = st.number_input('top_k', min_value=1, value=10, step=1)
     if st.button('search'):
-        df = get_retrieval_results(index, input_text, top_k, model, tokenizer, sentence_df)
         st.table(df)

 @st.cache(allow_output_mutation=True)
+def get_retrieval_results(index, input_text, top_k, model, tokenizer, sentence_df, exclude_word_list):
     with torch.no_grad():
         inputs = tokenizer.encode_plus(
             input_text,
     _, ids = index.search(x=np.array([query_embeddings]), k=top_k)
     retrieved_sentences = []
+    retrieved_paper_id = []
     for id in ids[0]:
         retrieved_sentences.append(sentence_df.loc[id, "sentence"])
+        retrieved_paper_id.append(f"https://aclanthology.org/{sentence_df.loc[id, 'file_id']}")
+    all_df = pd.DataFrame({"sentence": retrieved_sentences, "source link": retrieved_paper_id})
+    if len(exclude_word_list) == 0:
+        return all_df
+    else:
+        exclude_word_list_regex = '|'.join(exclude_word_list)
+        return all_df[~all_df["sentence"].str.contains(exclude_word_list_regex)]
 if __name__ == "__main__":
     st.markdown("## AI-based Paraphrasing for Academic Writing")
+    input_text = st.text_area("text input", "We saw difference in the results between A and B.", placeholder="Write something here...")
+    top_k = st.number_input('top_k (upperbound)', min_value=1, value=30, step=1)
+    input_words = st.text_input("exclude words (comma separated)", "see, saw")
     if st.button('search'):
+        exclude_word_list = [s.strip() for s in input_words.split(",") if s.strip() != ""]
+        df = get_retrieval_results(index, input_text, top_k, model, tokenizer, sentence_df, exclude_word_list)
         st.table(df)