Runtime error
Runtime error
Browse files
@@ -18,100 +18,97 @@ load_dotenv()
18 |
MODEL = "text-embedding-ada-002"
19 |
st.set_page_config(page_title="Visual Embeddings and Similarity", page_icon="🤖", layout="wide")
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
st.write("Please enter your OpenAI API Key and Nomic Token in the sidebar")
116 |
if __name__ == "__main__":
117 |
18 |
MODEL = "text-embedding-ada-002"
19 |
st.set_page_config(page_title="Visual Embeddings and Similarity", page_icon="🤖", layout="wide")
20 |
21 |
# sidebar with openai api key and nomic token
22 |
23 |
st.sidebar.write("OpenAI API Key")
24 |
openai_api_key = st.sidebar.text_input("Enter your OpenAI API Key", value=os.getenv("OPENAI_API_KEY"))
25 |
st.sidebar.write("Nomic Token")
26 |
nomic_token = st.sidebar.text_input("Enter your Nomic Token", value=os.getenv("NOMIC_TOKEN"))
27 |
28 |
openai.api_key = os.getenv("OPENAI_API_KEY")
29 |
30 |
31 |
# get data
32 |
datafile_path = "food_review.csv"
33 |
# show only columns ProductId, Score, Summary, Text, n_tokens, embedding
34 |
df = pd.read_csv(datafile_path, usecols=[0,1,3, 5, 7, 8])
35 |
st.title("Visual Embeddings and Similarity")
36 |
st.write("Amazon food reviews dataset")
37 |
38 |
39 |
st.write("Search similarity")
40 |
form = st.form('Embeddings')
41 |
question = form.text_input("Enter a sentence to search for semantic similarity", value="I love this soup")
42 |
btn = form.form_submit_button("Run")
43 |
44 |
if btn:
45 |
# si openai api key no es none y nomic token no es none
46 |
if openai_api_key is not None and nomic_token is not None:
47 |
with st.spinner("Loading"):
48 |
search_term_vector = get_embedding(question, engine="text-embedding-ada-002")
49 |
search_term_vector = np.array(search_term_vector)
50 |
51 |
matrix = np.array(df.embedding.apply(literal_eval).to_list())
52 |
53 |
# Compute distances to the search_term_vector
54 |
distances = np.linalg.norm(matrix - search_term_vector, axis=1)
55 |
df['distance_to_search_term'] = distances
56 |
57 |
# Normalize the distances to range 0-1 for coloring
58 |
df['normalized_distance'] = (df['distance_to_search_term'] - df['distance_to_search_term'].min()) / (df['distance_to_search_term'].max() - df['distance_to_search_term'].min())
59 |
60 |
# 2D visualization
61 |
# Create a t-SNE model and transform the data
62 |
tsne = TSNE(n_components=2, perplexity=15, random_state=42, init='random', learning_rate=200)
63 |
vis_dims = tsne.fit_transform(matrix)
64 |
65 |
colors = cm.rainbow(df['normalized_distance'])
66 |
x = [x for x,y in vis_dims]
67 |
y = [y for x,y in vis_dims]
68 |
69 |
# Plot points with colors corresponding to their distance from search_term_vector
70 |
plt.scatter(x, y, color=colors, alpha=0.3)
71 |
72 |
# Set title and plot
73 |
plt.title("Similarity to search term visualized in language using t-SNE")
74 |
75 |
76 |
# Convert 'embedding' column to numpy arrays
77 |
df['embedding'] = df['embedding'].apply(lambda x: np.array(literal_eval(x)))
78 |
df["similarities"] = df['embedding'].apply(lambda x: cosine_similarity(x, search_term_vector))
79 |
80 |
st.title("Visual embedding of the search term and the 20 most similar sentences")
81 |
#create two columns
82 |
col1, col2 = st.columns(2)
83 |
84 |
#show st.plot in col1
85 |
86 |
87 |
88 |
#show df in col2, but only the columns, text and similarities
89 |
col2.write(df[['similarities','Text']].sort_values("similarities", ascending=False).head(20))
90 |
91 |
# Convert to a list of lists of floats
92 |
st.title("Nomic mappping embeddings")
93 |
embeddings = np.array(df.embedding.to_list())
94 |
df = df.drop('embedding', axis=1)
95 |
df = df.rename(columns={'Unnamed: 0': 'id'})
96 |
97 |
data = df.to_dict('records')
98 |
project = atlas.map_embeddings(embeddings=embeddings, data=data,
99 |
100 |
101 |
# Convert project to a string before getting link information
102 |
project_str = str(project)
103 |
104 |
105 |
# Split the project string at the colon and take the second part (index 1)
106 |
project_link = project_str.split(':', 1)[1]
107 |
108 |
# Trim any leading or trailing whitespace
109 |
project_link = project_link.strip()
110 |
111 |
# Crea un iframe con la URL y muéstralo con Streamlit
112 |
st.markdown(f'<iframe src="{project_link}" width="100%" height="600px"></iframe>', unsafe_allow_html=True)
113 |
114 |
st.write("Please enter your OpenAI API Key and Nomic Token in the sidebar")