Spaces:
Sleeping
Sleeping
File size: 2,640 Bytes
333cd19 f0ad92c 333cd19 f0ad92c 333cd19 f0ad92c 333cd19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import pickle
import pandas as pd
import streamlit as st
from datasets import Dataset
import model
from utils import check_columns, count_labels, get_download_link
# Main function to run the Streamlit app
def main():
# Set app title
st.title("Few Shot Learning Demo using SetFit")
st.write("Prepare CSV file with text and label header, here is the sample file")
df = pd.read_csv("data/sample.csv")
# Display a link to download the file
st.markdown(get_download_link(df), unsafe_allow_html=True)
# Display the session ID
# st.write(f"Session ID: {st.session_state.key}")
session_id = st.session_state.key
# Create file uploader
uploaded_file = st.file_uploader("Choose a CSV file to upload", type="csv")
# Check if file was uploaded
if uploaded_file is not None:
# Read CSV file into pandas DataFrame
df = pd.read_csv(uploaded_file)
# Check if DataFrame has expected columns
if check_columns(df):
# Display DataFrame as a table
st.write(df)
# Calculate the number of instances of each label class
label_counts = count_labels(df)
st.write(f"Number of instances of each label class: {label_counts}")
labels = set(df["label"].tolist())
label_map = {label: idx for idx, label in enumerate(labels)}
df["label"] = df["label"].map(label_map)
dataset = Dataset.from_pandas(df)
model_name = st.text_input("Input the model name")
pretrained_model_options = ["all-MiniLM-L6-v2", "paraphrase-MiniLM-L3-v2"]
pretrained_model = st.selectbox(
"Select a pretrained model", options=pretrained_model_options
)
# Add Train button
if st.button("Train"):
# Train the model
with st.spinner("Training model..."):
model_path = model.run_setfit_training(
session_id,
pretrained_model,
model_name,
dataset,
1,
10,
)
st.write(f"Model checkpoint saved {model_path.split('/')[-1]}")
label_map = {v: k for k, v in label_map.items()}
with open(f"{model_path}/label.pkl", "wb") as f:
pickle.dump(label_map, f)
st.write("Training Finished")
st.write("Go to Validation Page")
else:
st.error("File must have 'text' and 'label' columns.")
|