File size: 2,640 Bytes
333cd19
 
 
 
 
 
 
f0ad92c
333cd19
 
 
 
 
 
f0ad92c
 
 
 
 
 
 
333cd19
f0ad92c
333cd19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pickle
import pandas as pd
import streamlit as st

from datasets import Dataset

import model
from utils import check_columns, count_labels, get_download_link

# Main function to run the Streamlit app
def main():
    # Set app title
    st.title("Few Shot Learning Demo using SetFit")

    st.write("Prepare CSV file with text and label header, here is the sample file")

    df = pd.read_csv("data/sample.csv")

    # Display a link to download the file
    st.markdown(get_download_link(df), unsafe_allow_html=True)

    # Display the session ID
    # st.write(f"Session ID: {st.session_state.key}")
    session_id = st.session_state.key

    # Create file uploader
    uploaded_file = st.file_uploader("Choose a CSV file to upload", type="csv")

    # Check if file was uploaded
    if uploaded_file is not None:
        # Read CSV file into pandas DataFrame
        df = pd.read_csv(uploaded_file)

        # Check if DataFrame has expected columns
        if check_columns(df):
            # Display DataFrame as a table
            st.write(df)

            # Calculate the number of instances of each label class
            label_counts = count_labels(df)
            st.write(f"Number of instances of each label class: {label_counts}")

            labels = set(df["label"].tolist())
            label_map = {label: idx for idx, label in enumerate(labels)}

            df["label"] = df["label"].map(label_map)

            dataset = Dataset.from_pandas(df)

            model_name = st.text_input("Input the model name")

            pretrained_model_options = ["all-MiniLM-L6-v2", "paraphrase-MiniLM-L3-v2"]

            pretrained_model = st.selectbox(
                "Select a pretrained model", options=pretrained_model_options
            )

            # Add Train button
            if st.button("Train"):
                # Train the model
                with st.spinner("Training model..."):
                    model_path = model.run_setfit_training(
                        session_id,
                        pretrained_model,
                        model_name,
                        dataset,
                        1,
                        10,
                    )

                st.write(f"Model checkpoint saved {model_path.split('/')[-1]}")

                label_map = {v: k for k, v in label_map.items()}
                with open(f"{model_path}/label.pkl", "wb") as f:
                    pickle.dump(label_map, f)

                st.write("Training Finished")
                st.write("Go to Validation Page")

        else:
            st.error("File must have 'text' and 'label' columns.")