nileshhanotia commited on
Commit
3caf963
1 Parent(s): c899f78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -15
app.py CHANGED
@@ -5,16 +5,21 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingA
5
  from transformers import DataCollatorForLanguageModeling
6
  import torch
7
  from huggingface_hub import Repository, HfFolder
 
8
 
9
  # Authenticate Hugging Face Hub
10
  hf_token = st.secrets["HF_TOKEN"] # Store your token in the Hugging Face Space Secrets
11
  HfFolder.save_token(hf_token)
12
 
13
- import os
14
-
15
  # Set Git user identity
16
- os.system('git config user.email "[email protected]"')
17
- os.system('git config user.name "Nilesh"')
 
 
 
 
 
 
18
 
19
  @st.cache_data
20
  def load_data(file_path):
@@ -64,6 +69,16 @@ def create_dataset(data, tokenizer, max_length):
64
  })
65
  return inputs
66
 
 
 
 
 
 
 
 
 
 
 
67
  def main():
68
  st.title("Model Training with Streamlit")
69
 
@@ -94,17 +109,6 @@ def main():
94
  st.write("Tokenizing dataset...")
95
  tokenized_dataset = create_dataset(data, tokenizer, max_length)
96
 
97
- # Convert tokenized_dataset to a torch Dataset
98
- class SimpleDataset(torch.utils.data.Dataset):
99
- def __init__(self, encodings):
100
- self.encodings = encodings
101
-
102
- def __getitem__(self, idx):
103
- return {key: val[idx] for key, val in self.encodings[idx].items()}
104
-
105
- def __len__(self):
106
- return len(self.encodings)
107
-
108
  dataset = SimpleDataset(tokenized_dataset)
109
 
110
  # Define training arguments
 
5
  from transformers import DataCollatorForLanguageModeling
6
  import torch
7
  from huggingface_hub import Repository, HfFolder
8
+ import subprocess
9
 
10
  # Authenticate Hugging Face Hub
11
  hf_token = st.secrets["HF_TOKEN"] # Store your token in the Hugging Face Space Secrets
12
  HfFolder.save_token(hf_token)
13
 
 
 
14
  # Set Git user identity
15
+ def set_git_config():
16
+ try:
17
+ subprocess.run(['git', 'config', 'user.email', '[email protected]'], check=True)
18
+ subprocess.run(['git', 'config', 'user.name', 'Nilesh'], check=True)
19
+ except subprocess.CalledProcessError as e:
20
+ st.error(f"Git configuration error: {str(e)}")
21
+
22
+ set_git_config()
23
 
24
  @st.cache_data
25
  def load_data(file_path):
 
69
  })
70
  return inputs
71
 
72
+ class SimpleDataset(torch.utils.data.Dataset):
73
+ def __init__(self, encodings):
74
+ self.encodings = encodings
75
+
76
+ def __getitem__(self, idx):
77
+ return {key: val[idx] for key, val in self.encodings[idx].items()}
78
+
79
+ def __len__(self):
80
+ return len(self.encodings)
81
+
82
  def main():
83
  st.title("Model Training with Streamlit")
84
 
 
109
  st.write("Tokenizing dataset...")
110
  tokenized_dataset = create_dataset(data, tokenizer, max_length)
111
 
 
 
 
 
 
 
 
 
 
 
 
112
  dataset = SimpleDataset(tokenized_dataset)
113
 
114
  # Define training arguments