File size: 943 Bytes
51778ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import pickle
import gzip
def get_unique_words(corpus_filename):
"""
Get a list of unique words from a corpus file
"""
unique_words = set()
with open(corpus_filename, 'r', encoding='utf-8') as file:
for line in file:
words = line.strip().split()
unique_words.update(words)
return list(unique_words)
def save_compressed_word_list(words, filename):
"""
Save a list of words to a compressed file
"""
with gzip.open(filename, 'wb') as file:
pickle.dump(words, file)
def load_compressed_word_list(filename):
"""
Load a list of words from a compressed file
"""
with gzip.open(filename, 'rb') as file:
return pickle.load(file)
def get_autocomplete(input_word=" ", all_words=" "):
"""
Get a list of words that start with the input word
"""
return [word for word in all_words if word.startswith(input_word)] |