Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import altair as alt | |
import os | |
from PIL import Image | |
from embeddings.embeddings import load_model | |
from sentence_transformers import util | |
import warnings | |
warnings.filterwarnings('ignore') | |
st.set_page_config(page_title="Sinhala Embedding Space", page_icon=":bar_chart:") | |
# cluster PNG file | |
image = Image.open('plots/clusters.png') | |
# Load data | |
# @st.cache_data | |
def load_data(): | |
chart_data = pd.read_csv(r"data/top_cluster_dataset.csv",dtype={'Headline': str, 'x': np.float64, 'y': np.float64, 'labels': str}) | |
return chart_data | |
chart_data = load_data() | |
# Create a Streamlit app | |
# Define tabs | |
tabs = ["Clustering Results","Sentences Similarity"] | |
selected_tab = st.sidebar.radio("Select a Tab", tabs) | |
def get_altair_chart(): | |
chart = alt.Chart(chart_data).mark_circle(size=60).encode(x='x', y='y', color='labels', tooltip=['Headline']).interactive() | |
return chart | |
# Main content | |
if selected_tab == "Sentences Similarity": | |
sample_sentences = chart_data['Headline'].sample(10, random_state=1).tolist() | |
st.title("Calculate Sentences Similarity") | |
# select model to use dropdown | |
st.subheader("Select a model to use") | |
model_list = ["Ransaka/SinhalaRoberta","keshan/SinhalaBERTo"] | |
selected_model = st.selectbox("Select Model", model_list) | |
model = load_model(selected_model) | |
sentence1 = st.text_input("Enter Sentence 1", "") | |
sentence2 = st.text_input("Enter Sentence 2", "") | |
if sentence1 and sentence2: | |
# add button to calculate similarity | |
if st.button("Calculate Similarity"): | |
with st.spinner('Calculating Similarity...'): | |
# Calculate similarity | |
similarity = util.pytorch_cos_sim(model.encode(sentence1), model.encode(sentence2))[0][0] | |
if similarity > 0.7: | |
st.success(f"Sentences are similar (Score: {similarity:.3f})") | |
elif similarity > 0.5: | |
st.warning(f"Sentences are somewhat similar (Score: {similarity:.3f})") | |
else: | |
st.error(f"Sentences are not similar (Score: {similarity:.3f})") | |
else: | |
st.write("Enter two sentences to calculate similarity. Or start with sample sentences below.") | |
# change radio button to randomize sentences and show sample sentences | |
if st.button("Randomize Sentences"): | |
sample_sentences = chart_data['Headline'].sample(10).tolist() | |
for sentence in sample_sentences: | |
# show sample sentences in small font | |
st.write(sentence) | |
elif selected_tab == "Clustering Results": | |
st.title("Clustering Results") | |
# Display PNG image | |
st.subheader("Full Clustering Results") | |
st.image(image, use_column_width=False, caption='Static PNG File',width=750) | |
# with st.spinner('Loading Interactive Results...'): | |
# Display Altair chart | |
st.subheader("Interactive Chart") | |
chart = get_altair_chart() | |
st.altair_chart(chart, use_container_width=True) | |
# Dropdown functionality to update DataFrame | |
st.subheader("Select a cluster") | |
unique_clusters = chart_data['labels'].unique().tolist() | |
selected_value = st.selectbox("Select Value", unique_clusters) | |
# Filter and display results based on selected cluster | |
if selected_value: | |
filtered_data = chart_data[chart_data['labels'].str.contains(selected_value, case=False)].sample(10)[['Headline']].reset_index(drop=True) | |
st.dataframe(filtered_data,width=750) | |
else: | |
st.write("Select a cluster to display results.") | |