import streamlit as st import torch import numpy as np import views from resources import load_corrector, load_data, load_model_and_tokenizer, reduce_embeddings use_cpu = not torch.cuda.is_available() device = "cpu" if use_cpu else "cuda" df = load_data() encoder, tokenizer = load_model_and_tokenizer(device) corrector = load_corrector() # Caching the precomputed embeddings since they are stored locally and large @st.cache_data def load_embeddings(): return np.load("syac-title-embeddings.npy") embeddings = load_embeddings() vectors_2d, reducer = reduce_embeddings(embeddings) def sidebar(): st.sidebar.title("About this app") st.sidebar.markdown( "This app is intended to give a more intuitive and interactive understanding of sequence embeddings (e.g. sentence), \n" "through interactive plots and operations with these embeddings, with a focus on embedding inversion.\n" "We explore both sequence embedding inversion using the method described in [Morris et al., 2023](https://arxiv.org/abs/2310.06816), as well as" " dimensionality rediction transforms and inverse transforms, and its effect on embedding inversion." ) sidebar() tab1, tab2 = st.tabs(["plot", "diffs"]) with tab1: views.plot( df=df, embeddings=embeddings, vectors_2d=vectors_2d, reducer=reducer, corrector=corrector, ) with tab2: views.diffs(embeddings, corrector, encoder, tokenizer)