Spaces:

GroNLP
/

agalma

Sleeping

File size: 6,090 Bytes

0d0f07a
 
 
14c3a4f
51778ca
7b3478d
 
0d0f07a
 
 
 
 
 
 
 
 
 
 
51778ca
 
 
 
317c2f1
0d0f07a
 
d24cb74
 
 
0d0f07a
 
14c3a4f
0d0f07a
fcfa1a6
 
 
 
14c3a4f
0d0f07a
 
 
14c3a4f
0d0f07a
14c3a4f
 
169869e
 
 
 
 
 
 
b24ad56
169869e
14c3a4f
 
169869e
14c3a4f
 
169869e
 
 
 
 
fcfa1a6
 
 
 
b24ad56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14c3a4f
bdf0a5e
0d0f07a
 
bdf0a5e
 
0d0f07a
bdf0a5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d0f07a
 
 
7b3478d
 
 
 
 
 
0d0f07a
7b3478d
 
 
 
 
 
 
 
 
0d0f07a
7b3478d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d0f07a

import streamlit as st
from streamlit_option_menu import option_menu
from word2vec import *
import pandas as pd
from autocomplete import *
from vector_graph import *
from plots import *

st.set_page_config(page_title="Ancient Greek Word2Vec", layout="centered")

# Horizontal menu
active_tab = option_menu(None, ["Nearest neighbours", "Cosine similarity", "3D graph", 'Dictionary'], 
    menu_icon="cast", default_index=0, orientation="horizontal")

# Nearest neighbours tab
if active_tab == "Nearest neighbours":
    st.write("### TO DO: add description of function")
    col1, col2 = st.columns(2)
    
    # Load the compressed word list
    compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz'
    all_words = load_compressed_word_list(compressed_word_list_filename)
    
    with st.container():
        with col1:
            word = st.multiselect("Enter a word", all_words, max_selections=1)
            if len(word) > 0:
                word = word[0]
            
        with col2:
            time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
        
        models = st.multiselect(
            "Select models to search for neighbours",
            ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"]
            )
        n = st.slider("Number of neighbours", 1, 50, 15)
        
        nearest_neighbours_button = st.button("Find nearest neighbours")
        
        # If the button to calculate nearest neighbours is clicked
        if nearest_neighbours_button:
            
            # Rewrite timeslices to model names: Archaic -> archaic_cbow
            if time_slice == 'Hellenistic':
                time_slice = 'hellen'
            elif time_slice == 'Early Roman':
                time_slice = 'early_roman'
            elif time_slice == 'Late Roman':
                time_slice = 'late_roman'
            
            time_slice = time_slice.lower() + "_cbow"           
            
            
            # Check if all fields are filled in
            if validate_nearest_neighbours(word, time_slice, n, models) == False:
                st.error('Please fill in all fields')
            else:
                # Rewrite models to list of all loaded models
                models = load_selected_models(models)
                
                nearest_neighbours = get_nearest_neighbours(word, time_slice, n, models)
                
                df = pd.DataFrame(
                    nearest_neighbours,
                    columns=["Word", "Time slice", "Similarity"],
                    index = range(1, len(nearest_neighbours) + 1)
                )              
                st.table(df)             
                
                
                # Store content in a temporary file
                tmp_file = store_df_in_temp_file(df)
                
                # Open the temporary file and read its content
                with open(tmp_file, "rb") as file:
                    file_byte = file.read()
                    
                    # Create download button
                    st.download_button(
                        "Download results",
                        data=file_byte,
                        file_name = f'nearest_neighbours_{word}_{time_slice}.xlsx',
                        mime='application/octet-stream'
                        )

                
   
# Cosine similarity tab
elif active_tab == "Cosine similarity":
    col1, col2 = st.columns(2)
    col3, col4 = st.columns(2)
    with st.container():
        with col1:
            word_1 = st.text_input("Enter a word", placeholder="πατήρ")
            
        with col2:
            time_slice_1 = st.selectbox("Time slice word 1", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])

    with st.container():
        with col3:
            word_2 = st.text_input("Enter a word", placeholder="μήτηρ")
            
        with col4:
            time_slice_2 = st.selectbox("Time slice word 2", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
    
    # Create button for calculating cosine similarity
    cosine_similarity_button = st.button("Calculate cosine similarity")
    
    # If the button is clicked, execute calculation
    if cosine_similarity_button:
        cosine_simularity_score = get_cosine_similarity(word_1, time_slice_1, word_2, time_slice_2)
        st.write(cosine_simularity_score)

# 3D graph tab
elif active_tab == "3D graph":
    col1, col2 = st.columns(2)
    
    # Load compressed word list
    compressed_word_list_filename = 'corpora/compass_filtered.pkl.gz'
    all_words = load_compressed_word_list(compressed_word_list_filename)
    
    with st.container():
        with col1:
            word = st.multiselect("Enter a word", all_words, max_selections=1)
            if len(word) > 0:
                word = word[0]
            
        with col2:
            time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])

        n = st.slider("Number of words", 1, 50, 15)

        graph_button = st.button("Create 3D graph")
        
        if graph_button:
            time_slice_model = convert_time_name_to_model(time_slice)
            nearest_neighbours_vectors = get_nearest_neighbours_vectors(word, time_slice_model, n)
            # nearest_neighbours_3d_vectors = create_3d_vectors(word, time_slice_model, nearest_neighbours_vectors)
            st.dataframe(nearest_neighbours_vectors)
            # new_3d_vectors = nearest_neighbours_to_pca_vectors(word, time_slice, nearest_neighbours_vectors)
            # st.dataframe(new_3d_vectors)
            
            
            fig, df = make_3d_plot4(nearest_neighbours_vectors, word, time_slice_model)
            
            st.dataframe(df)
            
            st.plotly_chart(fig) 
            
            
            
            
# Dictionary tab
elif active_tab == "Dictionary":
    with st.container():
        st.write("Dictionary tab")