agalma / plots.py
Mark7549's picture
fastened the 3d plot creation by using pretrained vectors, stored in ./3d_models directory
88d7eed
raw
history blame
1.8 kB
import streamlit as st
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import umap
import pandas as pd
from word2vec import *
from sklearn.preprocessing import StandardScaler
import plotly.express as px
from sklearn.manifold import TSNE
def make_3d_plot_tSNE(vectors_list, target_word, time_slice_model):
"""
Turn list of 100D vectors into a 3D plot using t-SNE and Plotly.
List structure: [(word, model_name, vector, cosine_sim)]
"""
word = target_word
# Load model
model = load_word2vec_model(f'models/{time_slice_model}.model')
# Extract vectors and names from ./3d_models/{time_slice_model}.model
all_vectors = {}
with open(f'./3d_models/{time_slice_model}.model', 'rb') as f:
result_with_names = pickle.load(f)
for word, vector in result_with_names:
all_vectors[word] = vector
# Only keep the vectors that are in vectors_list and their cosine similarities
result_with_names = [(word, all_vectors[word], cosine_sim) for word, _, _, cosine_sim in vectors_list]
# Create DataFrame from the transformed vectors
df = pd.DataFrame(result_with_names, columns=['word', '3d_vector', 'cosine_sim'])
# Sort dataframe by cosine_sim
df = df.sort_values(by='cosine_sim', ascending=False)
x = df['3d_vector'].apply(lambda v: v[0])
y = df['3d_vector'].apply(lambda v: v[1])
z = df['3d_vector'].apply(lambda v: v[2])
# Plot
fig = px.scatter_3d(df, x=x, y=y, z=z, text='word', color='cosine_sim', color_continuous_scale='Reds')
fig.update_traces(marker=dict(size=5))
fig.update_layout(title=f'3D plot of nearest neighbours to {target_word}')
return fig, df