File size: 6,090 Bytes
0d0f07a
 
 
14c3a4f
51778ca
7b3478d
 
0d0f07a
 
 
 
 
 
 
 
 
 
 
51778ca
 
 
 
317c2f1
0d0f07a
 
d24cb74
 
 
0d0f07a
 
14c3a4f
0d0f07a
fcfa1a6
 
 
 
14c3a4f
0d0f07a
 
 
14c3a4f
0d0f07a
14c3a4f
 
169869e
 
 
 
 
 
 
b24ad56
169869e
14c3a4f
 
169869e
14c3a4f
 
169869e
 
 
 
 
fcfa1a6
 
 
 
b24ad56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14c3a4f
bdf0a5e
0d0f07a
 
bdf0a5e
 
0d0f07a
bdf0a5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d0f07a
 
 
7b3478d
 
 
 
 
 
0d0f07a
7b3478d
 
 
 
 
 
 
 
 
0d0f07a
7b3478d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d0f07a
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import streamlit as st
from streamlit_option_menu import option_menu
from word2vec import *
import pandas as pd
from autocomplete import *
from vector_graph import *
from plots import *

st.set_page_config(page_title="Ancient Greek Word2Vec", layout="centered")

# Horizontal menu
active_tab = option_menu(None, ["Nearest neighbours", "Cosine similarity", "3D graph", 'Dictionary'], 
    menu_icon="cast", default_index=0, orientation="horizontal")

# Nearest neighbours tab
if active_tab == "Nearest neighbours":
    st.write("### TO DO: add description of function")
    col1, col2 = st.columns(2)
    
    # Load the compressed word list
    compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz'
    all_words = load_compressed_word_list(compressed_word_list_filename)
    
    with st.container():
        with col1:
            word = st.multiselect("Enter a word", all_words, max_selections=1)
            if len(word) > 0:
                word = word[0]
            
        with col2:
            time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
        
        models = st.multiselect(
            "Select models to search for neighbours",
            ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]
            )
        n = st.slider("Number of neighbours", 1, 50, 15)
        
        nearest_neighbours_button = st.button("Find nearest neighbours")
        
        # If the button to calculate nearest neighbours is clicked
        if nearest_neighbours_button:
            
            # Rewrite timeslices to model names: Archaic -> archaic_cbow
            if time_slice == 'Hellenistic':
                time_slice = 'hellen'
            elif time_slice == 'Early Roman':
                time_slice = 'early_roman'
            elif time_slice == 'Late Roman':
                time_slice = 'late_roman'
            
            time_slice = time_slice.lower() + "_cbow"           
            
            
            # Check if all fields are filled in
            if validate_nearest_neighbours(word, time_slice, n, models) == False:
                st.error('Please fill in all fields')
            else:
                # Rewrite models to list of all loaded models
                models = load_selected_models(models)
                
                nearest_neighbours = get_nearest_neighbours(word, time_slice, n, models)
                
                df = pd.DataFrame(
                    nearest_neighbours,
                    columns=["Word", "Time slice", "Similarity"],
                    index = range(1, len(nearest_neighbours) + 1)
                )              
                st.table(df)             
                
                
                # Store content in a temporary file
                tmp_file = store_df_in_temp_file(df)
                
                # Open the temporary file and read its content
                with open(tmp_file, "rb") as file:
                    file_byte = file.read()
                    
                    # Create download button
                    st.download_button(
                        "Download results",
                        data=file_byte,
                        file_name = f'nearest_neighbours_{word}_{time_slice}.xlsx',
                        mime='application/octet-stream'
                        )

                
   
# Cosine similarity tab
elif active_tab == "Cosine similarity":
    col1, col2 = st.columns(2)
    col3, col4 = st.columns(2)
    with st.container():
        with col1:
            word_1 = st.text_input("Enter a word", placeholder="πατήρ")
            
        with col2:
            time_slice_1 = st.selectbox("Time slice word 1", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])

    with st.container():
        with col3:
            word_2 = st.text_input("Enter a word", placeholder="μήτηρ")
            
        with col4:
            time_slice_2 = st.selectbox("Time slice word 2", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
    
    # Create button for calculating cosine similarity
    cosine_similarity_button = st.button("Calculate cosine similarity")
    
    # If the button is clicked, execute calculation
    if cosine_similarity_button:
        cosine_simularity_score = get_cosine_similarity(word_1, time_slice_1, word_2, time_slice_2)
        st.write(cosine_simularity_score)

# 3D graph tab
elif active_tab == "3D graph":
    col1, col2 = st.columns(2)
    
    # Load compressed word list
    compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz'
    all_words = load_compressed_word_list(compressed_word_list_filename)
    
    with st.container():
        with col1:
            word = st.multiselect("Enter a word", all_words, max_selections=1)
            if len(word) > 0:
                word = word[0]
            
        with col2:
            time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])

        n = st.slider("Number of words", 1, 50, 15)

        graph_button = st.button("Create 3D graph")
        
        if graph_button:
            time_slice_model = convert_time_name_to_model(time_slice)
            nearest_neighbours_vectors = get_nearest_neighbours_vectors(word, time_slice_model, n)
            # nearest_neighbours_3d_vectors = create_3d_vectors(word, time_slice_model, nearest_neighbours_vectors)
            st.dataframe(nearest_neighbours_vectors)
            # new_3d_vectors = nearest_neighbours_to_pca_vectors(word, time_slice, nearest_neighbours_vectors)
            # st.dataframe(new_3d_vectors)
            
            
            fig, df = make_3d_plot4(nearest_neighbours_vectors, word, time_slice_model)
            
            st.dataframe(df)
            
            st.plotly_chart(fig) 
            
            
            
            
# Dictionary tab
elif active_tab == "Dictionary":
    with st.container():
        st.write("Dictionary tab")