nileshhanotia commited on
Commit
4270cfb
1 Parent(s): cae9fb8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
3
+ from datasets import load_dataset
4
+ import os
5
+ from transformers import set_seed
6
+
7
+ # Streamlit App title
8
+ st.title("Fine-Tune Mixtral 8x7B Model")
9
+
10
+ # Model name input field
11
+ model_name = "mistral-8x7b" # Use the Mixtral 8x7B model
12
+
13
+ # Access the Hugging Face token from Streamlit secrets
14
+ token = st.secrets["HF_TOKEN"]
15
+
16
+ # Load the tokenizer and model
17
+ try:
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
19
+ model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=token)
20
+ st.write("Model and tokenizer loaded successfully!")
21
+ except Exception as e:
22
+ st.error(f"An error occurred while loading the model: {e}")
23
+
24
+ # Load the dataset from the existing file
25
+ dataset_path = "training_data.json"
26
+
27
+ try:
28
+ dataset = load_dataset('json', data_files={'train': dataset_path})
29
+ st.write("Dataset loaded successfully!")
30
+ except Exception as e:
31
+ st.error(f"An error occurred while loading the dataset: {e}")
32
+
33
+ # Tokenize the dataset
34
+ def preprocess_function(examples):
35
+ return tokenizer(examples['prompt'], truncation=True, padding="max_length", max_length=128)
36
+
37
+ try:
38
+ tokenized_dataset = dataset['train'].map(preprocess_function, batched=True)
39
+ st.write("Dataset tokenized successfully!")
40
+ except Exception as e:
41
+ st.error(f"An error occurred while tokenizing the dataset: {e}")
42
+
43
+ # Training arguments for fine-tuning
44
+ training_args = TrainingArguments(
45
+ output_dir="./results",
46
+ evaluation_strategy="epoch",
47
+ learning_rate=2e-5,
48
+ per_device_train_batch_size=1,
49
+ num_train_epochs=3,
50
+ weight_decay=0.01,
51
+ )
52
+
53
+ # Initialize the Trainer for fine-tuning
54
+ trainer = Trainer(
55
+ model=model,
56
+ args=training_args,
57
+ train_dataset=tokenized_dataset,
58
+ )
59
+
60
+ # Button to start fine-tuning
61
+ if st.button("Start Fine-Tuning"):
62
+ with st.spinner("Fine-tuning in progress..."):
63
+ try:
64
+ trainer.train()
65
+ st.success("Fine-tuning completed!")
66
+ # Save the fine-tuned model
67
+ model.save_pretrained("./fine_tuned_model")
68
+ st.write("Fine-tuned model saved!")
69
+ except Exception as e:
70
+ st.error(f"An error occurred during fine-tuning: {e}")