|
import streamlit as st |
|
import torch |
|
import numpy as np |
|
import views |
|
from resources import load_corrector, load_data, load_model_and_tokenizer, reduce_embeddings |
|
|
|
use_cpu = not torch.cuda.is_available() |
|
|
|
device = "cpu" |
|
|
|
df = load_data() |
|
|
|
encoder, tokenizer = load_model_and_tokenizer(device) |
|
|
|
corrector = load_corrector() |
|
|
|
|
|
@st.cache_data |
|
def load_embeddings(): |
|
return np.load("syac-title-embeddings.npy") |
|
|
|
embeddings = load_embeddings() |
|
vectors_2d, reducer = reduce_embeddings(embeddings) |
|
|
|
def sidebar(): |
|
st.sidebar.title("About this app") |
|
st.sidebar.markdown( |
|
"This app is intended to give a more intuitive and interactive understanding of sequence embeddings (e.g. sentence), \n" |
|
"through interactive plots and operations with these embeddings, with a focus on embedding inversion.\n" |
|
"We explore both sequence embedding inversion using the method described in [Morris et al., 2023](https://arxiv.org/abs/2310.06816), as well as" |
|
" dimensionality rediction transforms and inverse transforms, and its effect on embedding inversion." |
|
) |
|
st.sidebar.markdown( |
|
"### The Dataset\nThe dataset in use is the Reddit SYAC dataset train split ([Heiervang, 2022](https://www.duo.uio.no/handle/10852/96578)), which contains the title of different clickbait articles." |
|
) |
|
|
|
sidebar() |
|
|
|
tab1, tab2 = st.tabs(["plot", "diffs"]) |
|
|
|
with tab1: |
|
views.plot( |
|
df=df, |
|
embeddings=embeddings, |
|
vectors_2d=vectors_2d, |
|
reducer=reducer, |
|
corrector=corrector, |
|
device=device, |
|
) |
|
|
|
with tab2: |
|
views.diffs(embeddings, corrector, encoder, tokenizer) |
|
|