File size: 6,090 Bytes
0d0f07a 14c3a4f 51778ca 7b3478d 0d0f07a 51778ca 317c2f1 0d0f07a d24cb74 0d0f07a 14c3a4f 0d0f07a fcfa1a6 14c3a4f 0d0f07a 14c3a4f 0d0f07a 14c3a4f 169869e b24ad56 169869e 14c3a4f 169869e 14c3a4f 169869e fcfa1a6 b24ad56 14c3a4f bdf0a5e 0d0f07a bdf0a5e 0d0f07a bdf0a5e 0d0f07a 7b3478d 0d0f07a 7b3478d 0d0f07a 7b3478d 0d0f07a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
import streamlit as st
from streamlit_option_menu import option_menu
from word2vec import *
import pandas as pd
from autocomplete import *
from vector_graph import *
from plots import *
st.set_page_config(page_title="Ancient Greek Word2Vec", layout="centered")
# Horizontal menu
active_tab = option_menu(None, ["Nearest neighbours", "Cosine similarity", "3D graph", 'Dictionary'],
menu_icon="cast", default_index=0, orientation="horizontal")
# Nearest neighbours tab
if active_tab == "Nearest neighbours":
st.write("### TO DO: add description of function")
col1, col2 = st.columns(2)
# Load the compressed word list
compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz'
all_words = load_compressed_word_list(compressed_word_list_filename)
with st.container():
with col1:
word = st.multiselect("Enter a word", all_words, max_selections=1)
if len(word) > 0:
word = word[0]
with col2:
time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
models = st.multiselect(
"Select models to search for neighbours",
["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]
)
n = st.slider("Number of neighbours", 1, 50, 15)
nearest_neighbours_button = st.button("Find nearest neighbours")
# If the button to calculate nearest neighbours is clicked
if nearest_neighbours_button:
# Rewrite timeslices to model names: Archaic -> archaic_cbow
if time_slice == 'Hellenistic':
time_slice = 'hellen'
elif time_slice == 'Early Roman':
time_slice = 'early_roman'
elif time_slice == 'Late Roman':
time_slice = 'late_roman'
time_slice = time_slice.lower() + "_cbow"
# Check if all fields are filled in
if validate_nearest_neighbours(word, time_slice, n, models) == False:
st.error('Please fill in all fields')
else:
# Rewrite models to list of all loaded models
models = load_selected_models(models)
nearest_neighbours = get_nearest_neighbours(word, time_slice, n, models)
df = pd.DataFrame(
nearest_neighbours,
columns=["Word", "Time slice", "Similarity"],
index = range(1, len(nearest_neighbours) + 1)
)
st.table(df)
# Store content in a temporary file
tmp_file = store_df_in_temp_file(df)
# Open the temporary file and read its content
with open(tmp_file, "rb") as file:
file_byte = file.read()
# Create download button
st.download_button(
"Download results",
data=file_byte,
file_name = f'nearest_neighbours_{word}_{time_slice}.xlsx',
mime='application/octet-stream'
)
# Cosine similarity tab
elif active_tab == "Cosine similarity":
col1, col2 = st.columns(2)
col3, col4 = st.columns(2)
with st.container():
with col1:
word_1 = st.text_input("Enter a word", placeholder="πατήρ")
with col2:
time_slice_1 = st.selectbox("Time slice word 1", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
with st.container():
with col3:
word_2 = st.text_input("Enter a word", placeholder="μήτηρ")
with col4:
time_slice_2 = st.selectbox("Time slice word 2", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
# Create button for calculating cosine similarity
cosine_similarity_button = st.button("Calculate cosine similarity")
# If the button is clicked, execute calculation
if cosine_similarity_button:
cosine_simularity_score = get_cosine_similarity(word_1, time_slice_1, word_2, time_slice_2)
st.write(cosine_simularity_score)
# 3D graph tab
elif active_tab == "3D graph":
col1, col2 = st.columns(2)
# Load compressed word list
compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz'
all_words = load_compressed_word_list(compressed_word_list_filename)
with st.container():
with col1:
word = st.multiselect("Enter a word", all_words, max_selections=1)
if len(word) > 0:
word = word[0]
with col2:
time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
n = st.slider("Number of words", 1, 50, 15)
graph_button = st.button("Create 3D graph")
if graph_button:
time_slice_model = convert_time_name_to_model(time_slice)
nearest_neighbours_vectors = get_nearest_neighbours_vectors(word, time_slice_model, n)
# nearest_neighbours_3d_vectors = create_3d_vectors(word, time_slice_model, nearest_neighbours_vectors)
st.dataframe(nearest_neighbours_vectors)
# new_3d_vectors = nearest_neighbours_to_pca_vectors(word, time_slice, nearest_neighbours_vectors)
# st.dataframe(new_3d_vectors)
fig, df = make_3d_plot4(nearest_neighbours_vectors, word, time_slice_model)
st.dataframe(df)
st.plotly_chart(fig)
# Dictionary tab
elif active_tab == "Dictionary":
with st.container():
st.write("Dictionary tab")
|