File size: 1,944 Bytes
412b7ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import streamlit as st
import numpy as np
import numpy.linalg as la
import pickle 
#import streamlit_analytics


# Compute Cosine Similarity
def cosine_similarity(x,y):

    x_arr = np.array(x)
    y_arr = np.array(y)
    return np.dot(x_arr,y_arr)/(la.norm(x_arr)*la.norm(y_arr))


# Function to Load Glove Embeddings
def load_glove_embeddings(glove_path="Data/embeddings.pkl"):

    with open(glove_path,"rb") as f:
        embeddings_dict = pickle.load(f)
    
    return embeddings_dict

# Get Averaged Glove Embedding of a sentence
def averaged_glove_embeddings(sentence, embeddings_dict):
    words = sentence.split(" ")
    glove_embedding = np.zeros(50)
    count_words = 0
    for word in words:
        if word in embeddings_dict:
            glove_embedding += embeddings_dict[word]
            count_words += 1
    
    return glove_embedding/max(count_words,1)

# Load glove embeddings
glove_embeddings = load_glove_embeddings()

# Gold standard words to search from
gold_words = ["flower","mountain","tree","car","building"]

# Text Search
#with streamlit_analytics.track():
st.title("Search Based Retrieval Demo")
st.subheader("Pass in an input word or even a sentence (e.g. jasmine or mount adams)")
text_search = st.text_input("", value="")


# Find closest word to an input word
if text_search:
    input_embedding = averaged_glove_embeddings(text_search, glove_embeddings)
    cosine_sim = {}
    for index in range(len(gold_words)):
        cosine_sim[index] = cosine_similarity(input_embedding, glove_embeddings[gold_words[index]])

    print(cosine_sim)
    sorted_cosine_sim = sorted(cosine_sim.items(), key = lambda x: x[1], reverse=True)

    st.write("(My search uses glove embeddings)")
    st.write("Closest word I have between flower, mountain, tree, car and building for your input is: ")
    st.subheader(gold_words[sorted_cosine_sim[0][0]] )
    st.write("")
    st.write("Demo developed by Dr. Karthik Mohan")