Spaces:

GroNLP
/

agalma

Sleeping

Mark7549 commited on Mar 22, 2024

Commit

51778ca

1 Parent(s): 317c2f1

updated autocomplete for nearest neighbours

Files changed (3) hide show

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import streamlit as st
 from streamlit_option_menu import option_menu
 from word2vec import *
 import pandas as pd
 st.set_page_config(page_title="Ancient Greek Word2Vec", layout="centered")
@@ -13,14 +14,14 @@ active_tab = option_menu(None, ["Nearest neighbours", "Cosine similarity", "3D g
 if active_tab == "Nearest neighbours":
     st.write("### TO DO: add description of function")
     col1, col2 = st.columns(2)
-    with open('corpora/compass_filtered.txt', 'r') as file:
-        all_words = file.read().split()
-        all_words = sorted(all_words)
     with st.container():
         with col1:
-            # word = st.text_input("Enter a word", placeholder="πατήρ")
-            word = st.multiselect("Enter a word", all_words, default=["πατήρ"])
         with col2:
             time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])

 from streamlit_option_menu import option_menu
 from word2vec import *
 import pandas as pd
+from autocomplete import *
 st.set_page_config(page_title="Ancient Greek Word2Vec", layout="centered")
 if active_tab == "Nearest neighbours":
     st.write("### TO DO: add description of function")
     col1, col2 = st.columns(2)
+    # Load the compressed word list
+    compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz'
+    all_words = load_compressed_word_list(compressed_word_list_filename)
     with st.container():
         with col1:
+            word = st.multiselect("Enter a word", all_words)
         with col2:
             time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])

autocomplete.py ADDED Viewed

+import pickle
+import gzip
+def get_unique_words(corpus_filename):
+    """
+    Get a list of unique words from a corpus file
+    """
+    unique_words = set()
+    with open(corpus_filename, 'r', encoding='utf-8') as file:
+        for line in file:
+            words = line.strip().split()
+            unique_words.update(words)
+    return list(unique_words)
+def save_compressed_word_list(words, filename):
+    """
+    Save a list of words to a compressed file
+    """
+    with gzip.open(filename, 'wb') as file:
+        pickle.dump(words, file)
+def load_compressed_word_list(filename):
+    """
+    Load a list of words from a compressed file
+    """
+    with gzip.open(filename, 'rb') as file:
+        return pickle.load(file)
+def get_autocomplete(input_word=" ", all_words=" "):
+    """
+    Get a list of words that start with the input word
+    """
+    return [word for word in all_words if word.startswith(input_word)]

corpora/compass_filtered.pkl.gz ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:a02f7e61330045219098cda092f3edb23650decc70d60db6be2de267e8c7a925
+size 163769