|
import pandas as pd |
|
from word2vec import * |
|
import plotly.express as px |
|
import pickle |
|
|
|
|
|
def make_3d_plot_tSNE(vectors_list, target_word, time_slice_model): |
|
""" |
|
Create a 3D plot using t-SNE and Plotly from a list of 100-dimensional vectors. |
|
|
|
vectors_list: list of tuples containing (word, model_name, vector, cosine_sim) |
|
- word: the word in the model |
|
- model_name: the name of the model |
|
- vector: the 100-dimensional vector representation of the word |
|
- cosine_sim: the cosine similarity of the word to the target word |
|
|
|
target_word: the word for which the nearest neighbours are calculated and plotted |
|
|
|
time_slice_model: the time slice model name used to extract 3D vector representations |
|
|
|
Return: a tuple containing: |
|
- fig: the Plotly 3D scatter plot figure |
|
- df: a pandas DataFrame containing the words, their 3D vectors, and cosine similarities |
|
""" |
|
word = target_word |
|
|
|
|
|
all_vectors = {} |
|
with open(f'./3d_models/{time_slice_model}.model', 'rb') as f: |
|
result_with_names = pickle.load(f) |
|
|
|
for word, vector in result_with_names: |
|
all_vectors[word] = vector |
|
|
|
|
|
|
|
|
|
result_with_names = [(word, all_vectors[word], cosine_sim) for word, _, _, cosine_sim in vectors_list] |
|
|
|
|
|
df = pd.DataFrame(result_with_names, columns=['word', '3d_vector', 'cosine_sim']) |
|
|
|
|
|
df = df.sort_values(by='cosine_sim', ascending=False) |
|
|
|
|
|
x = df['3d_vector'].apply(lambda v: v[0]) |
|
y = df['3d_vector'].apply(lambda v: v[1]) |
|
z = df['3d_vector'].apply(lambda v: v[2]) |
|
|
|
|
|
fig = px.scatter_3d(df, x=x, y=y, z=z, text='word', color='cosine_sim', color_continuous_scale='Reds') |
|
fig.update_traces(marker=dict(size=5)) |
|
fig.update_layout(title=f'3D plot of nearest neighbours to {target_word}') |
|
|
|
return fig, df |
|
|
|
|