File size: 1,749 Bytes
8aa44e7 44fce2f 78022ff 74e9a4d 9c88742 f5fb53f 6b30d5d 9c88742 44fce2f e8bfa89 44fce2f 74e9a4d 44fce2f 3eacaec 6b30d5d 3eacaec 78022ff 44fce2f 78022ff 74e9a4d 05dd656 74e9a4d 44fce2f 78022ff bdef5c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import streamlit as st
import torch
import numpy as np
import views
from resources import load_corrector, load_data, load_model_and_tokenizer, reduce_embeddings
use_cpu = not torch.cuda.is_available()
# device = "cpu" if use_cpu else "cuda"
device = "cpu"
df = load_data()
encoder, tokenizer = load_model_and_tokenizer(device)
corrector = load_corrector()
# Caching the precomputed embeddings since they are stored locally and large
@st.cache_data
def load_embeddings():
return np.load("syac-title-embeddings.npy")
embeddings = load_embeddings()
vectors_2d, reducer = reduce_embeddings(embeddings)
def sidebar():
st.sidebar.title("About this app")
st.sidebar.markdown(
"This app is intended to give a more intuitive and interactive understanding of sequence embeddings (e.g. sentence), \n"
"through interactive plots and operations with these embeddings, with a focus on embedding inversion.\n"
"We explore both sequence embedding inversion using the method described in [Morris et al., 2023](https://arxiv.org/abs/2310.06816), as well as"
" dimensionality rediction transforms and inverse transforms, and its effect on embedding inversion."
)
st.sidebar.markdown(
"### The Dataset\nThe dataset in use is the Reddit SYAC dataset train split ([Heiervang, 2022](https://www.duo.uio.no/handle/10852/96578)), which contains the title of different clickbait articles."
)
sidebar()
tab1, tab2 = st.tabs(["plot", "diffs"])
with tab1:
views.plot(
df=df,
embeddings=embeddings,
vectors_2d=vectors_2d,
reducer=reducer,
corrector=corrector,
device=device,
)
with tab2:
views.diffs(embeddings, corrector, encoder, tokenizer)
|