search-test / test_search_bar.py
Shreemit's picture
Upload 6 files
412b7ab
import streamlit as st
import numpy as np
import numpy.linalg as la
import pickle
#import streamlit_analytics
# Compute Cosine Similarity
def cosine_similarity(x,y):
x_arr = np.array(x)
y_arr = np.array(y)
return np.dot(x_arr,y_arr)/(la.norm(x_arr)*la.norm(y_arr))
# Function to Load Glove Embeddings
def load_glove_embeddings(glove_path="Data/embeddings.pkl"):
with open(glove_path,"rb") as f:
embeddings_dict = pickle.load(f)
return embeddings_dict
# Get Averaged Glove Embedding of a sentence
def averaged_glove_embeddings(sentence, embeddings_dict):
words = sentence.split(" ")
glove_embedding = np.zeros(50)
count_words = 0
for word in words:
if word in embeddings_dict:
glove_embedding += embeddings_dict[word]
count_words += 1
return glove_embedding/max(count_words,1)
# Load glove embeddings
glove_embeddings = load_glove_embeddings()
# Gold standard words to search from
gold_words = ["flower","mountain","tree","car","building"]
# Text Search
#with streamlit_analytics.track():
st.title("Search Based Retrieval Demo")
st.subheader("Pass in an input word or even a sentence (e.g. jasmine or mount adams)")
text_search = st.text_input("", value="")
# Find closest word to an input word
if text_search:
input_embedding = averaged_glove_embeddings(text_search, glove_embeddings)
cosine_sim = {}
for index in range(len(gold_words)):
cosine_sim[index] = cosine_similarity(input_embedding, glove_embeddings[gold_words[index]])
print(cosine_sim)
sorted_cosine_sim = sorted(cosine_sim.items(), key = lambda x: x[1], reverse=True)
st.write("(My search uses glove embeddings)")
st.write("Closest word I have between flower, mountain, tree, car and building for your input is: ")
st.subheader(gold_words[sorted_cosine_sim[0][0]] )
st.write("")
st.write("Demo developed by Dr. Karthik Mohan")