import streamlit as st import numpy as np import numpy.linalg as la import pickle #import streamlit_analytics # Compute Cosine Similarity def cosine_similarity(x,y): x_arr = np.array(x) y_arr = np.array(y) return np.dot(x_arr,y_arr)/(la.norm(x_arr)*la.norm(y_arr)) # Function to Load Glove Embeddings def load_glove_embeddings(glove_path="Data/embeddings.pkl"): with open(glove_path,"rb") as f: embeddings_dict = pickle.load(f) return embeddings_dict # Get Averaged Glove Embedding of a sentence def averaged_glove_embeddings(sentence, embeddings_dict): words = sentence.split(" ") glove_embedding = np.zeros(50) count_words = 0 for word in words: if word in embeddings_dict: glove_embedding += embeddings_dict[word] count_words += 1 return glove_embedding/max(count_words,1) # Load glove embeddings glove_embeddings = load_glove_embeddings() # Gold standard words to search from gold_words = ["flower","mountain","tree","car","building"] # Text Search #with streamlit_analytics.track(): st.title("Search Based Retrieval Demo") st.subheader("Pass in an input word or even a sentence (e.g. jasmine or mount adams)") text_search = st.text_input("", value="") # Find closest word to an input word if text_search: input_embedding = averaged_glove_embeddings(text_search, glove_embeddings) cosine_sim = {} for index in range(len(gold_words)): cosine_sim[index] = cosine_similarity(input_embedding, glove_embeddings[gold_words[index]]) print(cosine_sim) sorted_cosine_sim = sorted(cosine_sim.items(), key = lambda x: x[1], reverse=True) st.write("(My search uses glove embeddings)") st.write("Closest word I have between flower, mountain, tree, car and building for your input is: ") st.subheader(gold_words[sorted_cosine_sim[0][0]] ) st.write("") st.write("Demo developed by Dr. Karthik Mohan")