nileshhanotia commited on
Commit
59782fa
1 Parent(s): c8ab462

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -2
app.py CHANGED
@@ -1,12 +1,13 @@
 
1
  import os
2
  import json
 
3
  import streamlit as st
4
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
5
  from datasets import Dataset
6
  import torch
7
  from huggingface_hub import Repository, HfFolder
8
  import subprocess
9
- from sklearn.model_selection import train_test_split
10
 
11
  # Authenticate Hugging Face Hub
12
  hf_token = st.secrets["HF_TOKEN"]
@@ -33,6 +34,19 @@ def load_data(file_path):
33
  st.error(f"Error loading dataset: {str(e)}")
34
  return None
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  @st.cache_resource
37
  def initialize_model_and_tokenizer(model_name, num_labels):
38
  try:
@@ -60,6 +74,11 @@ def create_dataset(data, tokenizer, max_length):
60
  })
61
  return dataset
62
 
 
 
 
 
 
63
  def main():
64
  st.title("Appointment Classification Model Training")
65
 
@@ -88,7 +107,7 @@ def main():
88
 
89
  st.write("Preparing dataset...")
90
  # Split the data into train and evaluation sets
91
- train_data, eval_data = train_test_split(data, test_size=0.2, random_state=42)
92
 
93
  train_dataset = create_dataset(train_data, tokenizer, max_length)
94
  eval_dataset = create_dataset(eval_data, tokenizer, max_length)
 
1
+
2
  import os
3
  import json
4
+ import random
5
  import streamlit as st
6
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
7
  from datasets import Dataset
8
  import torch
9
  from huggingface_hub import Repository, HfFolder
10
  import subprocess
 
11
 
12
  # Authenticate Hugging Face Hub
13
  hf_token = st.secrets["HF_TOKEN"]
 
34
  st.error(f"Error loading dataset: {str(e)}")
35
  return None
36
 
37
+ @st.cache_data
38
+ def load_data(file_path):
39
+ if not os.path.exists(file_path):
40
+ st.error(f"File not found: {file_path}")
41
+ return None
42
+ try:
43
+ with open(file_path, 'r') as f:
44
+ data = json.load(f)
45
+ return data
46
+ except Exception as e:
47
+ st.error(f"Error loading dataset: {str(e)}")
48
+ return None
49
+
50
  @st.cache_resource
51
  def initialize_model_and_tokenizer(model_name, num_labels):
52
  try:
 
74
  })
75
  return dataset
76
 
77
+ def split_data(data, test_size=0.2):
78
+ random.shuffle(data)
79
+ split_index = int(len(data) * (1 - test_size))
80
+ return data[:split_index], data[split_index:]
81
+
82
  def main():
83
  st.title("Appointment Classification Model Training")
84
 
 
107
 
108
  st.write("Preparing dataset...")
109
  # Split the data into train and evaluation sets
110
+ train_data, eval_data = split_data(data)
111
 
112
  train_dataset = create_dataset(train_data, tokenizer, max_length)
113
  eval_dataset = create_dataset(eval_data, tokenizer, max_length)