Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -18,100 +18,97 @@ load_dotenv()
|
|
18 |
MODEL = "text-embedding-ada-002"
|
19 |
st.set_page_config(page_title="Visual Embeddings and Similarity", page_icon="🤖", layout="wide")
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
st.write("Please enter your OpenAI API Key and Nomic Token in the sidebar")
|
116 |
-
if __name__ == "__main__":
|
117 |
-
main()
|
|
|
18 |
MODEL = "text-embedding-ada-002"
|
19 |
st.set_page_config(page_title="Visual Embeddings and Similarity", page_icon="🤖", layout="wide")
|
20 |
|
21 |
+
# sidebar with openai api key and nomic token
|
22 |
+
st.sidebar.title("Credentials")
|
23 |
+
st.sidebar.write("OpenAI API Key")
|
24 |
+
openai_api_key = st.sidebar.text_input("Enter your OpenAI API Key", value=os.getenv("OPENAI_API_KEY"))
|
25 |
+
st.sidebar.write("Nomic Token")
|
26 |
+
nomic_token = st.sidebar.text_input("Enter your Nomic Token", value=os.getenv("NOMIC_TOKEN"))
|
27 |
+
|
28 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
29 |
+
nomic.login(os.getenv("NOMIC_TOKEN"))
|
30 |
+
|
31 |
+
# get data
|
32 |
+
datafile_path = "food_review.csv"
|
33 |
+
# show only columns ProductId, Score, Summary, Text, n_tokens, embedding
|
34 |
+
df = pd.read_csv(datafile_path, usecols=[0,1,3, 5, 7, 8])
|
35 |
+
st.title("Visual Embeddings and Similarity")
|
36 |
+
st.write("Amazon food reviews dataset")
|
37 |
+
st.write(df)
|
38 |
+
|
39 |
+
st.write("Search similarity")
|
40 |
+
form = st.form('Embeddings')
|
41 |
+
question = form.text_input("Enter a sentence to search for semantic similarity", value="I love this soup")
|
42 |
+
btn = form.form_submit_button("Run")
|
43 |
+
|
44 |
+
if btn:
|
45 |
+
# si openai api key no es none y nomic token no es none
|
46 |
+
if openai_api_key is not None and nomic_token is not None:
|
47 |
+
with st.spinner("Loading"):
|
48 |
+
search_term_vector = get_embedding(question, engine="text-embedding-ada-002")
|
49 |
+
search_term_vector = np.array(search_term_vector)
|
50 |
+
|
51 |
+
matrix = np.array(df.embedding.apply(literal_eval).to_list())
|
52 |
+
|
53 |
+
# Compute distances to the search_term_vector
|
54 |
+
distances = np.linalg.norm(matrix - search_term_vector, axis=1)
|
55 |
+
df['distance_to_search_term'] = distances
|
56 |
+
|
57 |
+
# Normalize the distances to range 0-1 for coloring
|
58 |
+
df['normalized_distance'] = (df['distance_to_search_term'] - df['distance_to_search_term'].min()) / (df['distance_to_search_term'].max() - df['distance_to_search_term'].min())
|
59 |
+
|
60 |
+
# 2D visualization
|
61 |
+
# Create a t-SNE model and transform the data
|
62 |
+
tsne = TSNE(n_components=2, perplexity=15, random_state=42, init='random', learning_rate=200)
|
63 |
+
vis_dims = tsne.fit_transform(matrix)
|
64 |
+
|
65 |
+
colors = cm.rainbow(df['normalized_distance'])
|
66 |
+
x = [x for x,y in vis_dims]
|
67 |
+
y = [y for x,y in vis_dims]
|
68 |
+
|
69 |
+
# Plot points with colors corresponding to their distance from search_term_vector
|
70 |
+
plt.scatter(x, y, color=colors, alpha=0.3)
|
71 |
+
|
72 |
+
# Set title and plot
|
73 |
+
plt.title("Similarity to search term visualized in language using t-SNE")
|
74 |
+
|
75 |
+
|
76 |
+
# Convert 'embedding' column to numpy arrays
|
77 |
+
df['embedding'] = df['embedding'].apply(lambda x: np.array(literal_eval(x)))
|
78 |
+
df["similarities"] = df['embedding'].apply(lambda x: cosine_similarity(x, search_term_vector))
|
79 |
+
|
80 |
+
st.title("Visual embedding of the search term and the 20 most similar sentences")
|
81 |
+
#create two columns
|
82 |
+
col1, col2 = st.columns(2)
|
83 |
+
#col1
|
84 |
+
#show st.plot in col1
|
85 |
+
col1.pyplot(plt)
|
86 |
+
|
87 |
+
#col2
|
88 |
+
#show df in col2, but only the columns, text and similarities
|
89 |
+
col2.write(df[['similarities','Text']].sort_values("similarities", ascending=False).head(20))
|
90 |
+
|
91 |
+
# Convert to a list of lists of floats
|
92 |
+
st.title("Nomic mappping embeddings")
|
93 |
+
embeddings = np.array(df.embedding.to_list())
|
94 |
+
df = df.drop('embedding', axis=1)
|
95 |
+
df = df.rename(columns={'Unnamed: 0': 'id'})
|
96 |
+
|
97 |
+
data = df.to_dict('records')
|
98 |
+
project = atlas.map_embeddings(embeddings=embeddings, data=data,
|
99 |
+
id_field='id',
|
100 |
+
colorable_fields=['Score'])
|
101 |
+
# Convert project to a string before getting link information
|
102 |
+
project_str = str(project)
|
103 |
+
|
104 |
+
st.text(project_str)
|
105 |
+
# Split the project string at the colon and take the second part (index 1)
|
106 |
+
project_link = project_str.split(':', 1)[1]
|
107 |
+
|
108 |
+
# Trim any leading or trailing whitespace
|
109 |
+
project_link = project_link.strip()
|
110 |
+
|
111 |
+
# Crea un iframe con la URL y muéstralo con Streamlit
|
112 |
+
st.markdown(f'<iframe src="{project_link}" width="100%" height="600px"></iframe>', unsafe_allow_html=True)
|
113 |
+
else:
|
114 |
+
st.write("Please enter your OpenAI API Key and Nomic Token in the sidebar")
|
|
|
|
|
|