Sebastien De Greef commited on
Commit
6baccb3
·
1 Parent(s): 4af8a78

adds "gradio" to the requirements.txt and handle buttons up to training

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +278 -18
  3. requirements.txt +4 -2
  4. unsloth.png +0 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv/*
app.py CHANGED
@@ -1,32 +1,292 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def process_input(model_name, checkbox1, checkbox2, text1, text2):
4
- result = f"Model: {model_name}\nCheckbox 1: {checkbox1}\nCheckbox 2: {checkbox2}\nText Field 1: {text1}\nText Field 2: {text2}"
5
- return result
6
-
 
 
7
  # Dropdown options
8
- model_options = ["Model A", "Model B", "Model C"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # Create the Gradio interface
11
  with gr.Blocks() as demo:
12
- gr.Markdown("## Gradio Interface Example")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- # Dropdown for model_name
15
- model_name = gr.Dropdown(choices=model_options, label="Select Model")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- # Checkboxes
18
- checkbox1 = gr.Checkbox(label="Checkbox 1")
19
- checkbox2 = gr.Checkbox(label="Checkbox 2")
20
 
21
- # Text fields
22
- text1 = gr.Textbox(label="Text Field 1")
23
- text2 = gr.Textbox(label="Text Field 2")
 
 
 
 
 
 
 
24
 
 
25
  # Output
26
- output = gr.Textbox(label="Output")
27
 
28
- # Button to submit and process the input
29
- submit_btn = gr.Button("Submit")
30
- submit_btn.click(process_input, inputs=[model_name, checkbox1, checkbox2, text1, text2], outputs=output)
31
 
32
  demo.launch()
 
1
  import gradio as gr
2
+ from huggingface_hub import HfApi
3
+ from unsloth import FastLanguageModel
4
+ from trl import SFTTrainer
5
+ from transformers import TrainingArguments, TrainerCallback
6
+ from unsloth import is_bfloat16_supported
7
+ import torch
8
+ from datasets import load_dataset
9
+ import logging
10
+ from io import StringIO
11
+ import time
12
+ import asyncio
13
+ # Create a string stream to capture log messages
14
+ log_stream = StringIO()
15
 
16
+ # Configure logging to use the string stream
17
+ logging.basicConfig(stream=log_stream, level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
18
+ logger = logging.getLogger(__name__)
19
+ log_contents = log_stream.getvalue()
20
+ print(log_contents)
21
+ logger.debug('This is a debug message')
22
  # Dropdown options
23
+ model_options = [
24
+ "unsloth/mistral-7b-v0.3-bnb-4bit", # New Mistral v3 2x faster!
25
+ "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
26
+ "unsloth/llama-3-8b-bnb-4bit", # Llama-3 15 trillion tokens model 2x faster!
27
+ "unsloth/llama-3-8b-Instruct-bnb-4bit",
28
+ "unsloth/llama-3-70b-bnb-4bit",
29
+ "unsloth/Phi-3-mini-4k-instruct", # Phi-3 2x faster!
30
+ "unsloth/Phi-3-medium-4k-instruct",
31
+ "unsloth/mistral-7b-bnb-4bit",
32
+ "unsloth/gemma-2-9b-bnb-4bit",
33
+ "unsloth/gemma-2-27b-bnb-4bit", # Gemma 2x faster!
34
+ ]
35
+ gpu_stats = torch.cuda.get_device_properties(0)
36
+ start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
37
+ max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
38
+
39
+ model=None
40
+ tokenizer = None
41
+ dataset = None
42
+ max_seq_length = 2048
43
+
44
+ class PrinterCallback(TrainerCallback):
45
+ step = 0
46
+ def __init__(self, progress):
47
+ self.progress = progress
48
+ def on_log(self, args, state, control, logs=None, **kwargs):
49
+ _ = logs.pop("total_flos", None)
50
+ if state.is_local_process_zero:
51
+ print(logs)
52
+ def on_step_end(self, args, state, control, **kwargs):
53
+ if state.is_local_process_zero:
54
+ self.step = state.global_step
55
+ self.progress.update(self.step)
56
+ print("**Step ", state.global_step)
57
+
58
+
59
+
60
+ def formatting_prompts_func(examples, prompt):
61
+ EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
62
+ instructions = examples["instruction"]
63
+ inputs = examples["input"]
64
+ outputs = examples["output"]
65
+ texts = []
66
+ for instruction, input, output in zip(instructions, inputs, outputs):
67
+ # Must add EOS_TOKEN, otherwise your generation will go on forever!
68
+ text = prompt.format(instruction, input, output) + EOS_TOKEN
69
+ texts.append(text)
70
+ return { "text" : texts, }
71
+ pass
72
+
73
+
74
+ def load_model(initial_model_name, load_in_4bit, max_sequence_length):
75
+ global model, tokenizer, max_seq_length
76
+ dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
77
+ max_seq_length = max_sequence_length
78
+ model, tokenizer = FastLanguageModel.from_pretrained(
79
+ model_name = initial_model_name,
80
+ max_seq_length = max_sequence_length,
81
+ dtype = dtype,
82
+ load_in_4bit = load_in_4bit,
83
+ # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
84
+ )
85
+ log_contents = log_stream.getvalue()
86
+ print(log_contents)
87
+ return f"Model {initial_model_name} loaded, using {max_sequence_length} as max sequence length.", gr.update(visible=True, interactive=True), gr.update(interactive=True),gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False)
88
+
89
+ def load_data(dataset_name, data_template_style, data_template):
90
+ global dataset
91
+ dataset = load_dataset(dataset_name, split = "train")
92
+ dataset = dataset.map(lambda examples: formatting_prompts_func(examples, data_template), batched=True)
93
+ return f"Data loaded {len(dataset)} records loaded.", gr.update(visible=True, interactive=True), gr.update(visible=True, interactive=True)
94
+
95
+
96
+
97
+
98
+ async def train_model(model_name: str, lora_r: int, lora_alpha: int, lora_dropout: float, per_device_train_batch_size: int, warmup_steps: int, max_steps: int,
99
+ gradient_accumulation_steps: int, logging_steps: int, log_to_tensorboard: bool, optim, learning_rate, weight_decay, lr_scheduler_type, seed: int, output_dir, progress= gr.Progress()):
100
+ global model, tokenizer
101
+ print(f"$$$ Training model {model_name} with {lora_r} R, {lora_alpha} alpha, {lora_dropout} dropout, {per_device_train_batch_size} per device train batch size, {warmup_steps} warmup steps, {max_steps} max steps, {gradient_accumulation_steps} gradient accumulation steps, {logging_steps} logging steps, {log_to_tensorboard} log to tensorboard, {optim} optimizer, {learning_rate} learning rate, {weight_decay} weight decay, {lr_scheduler_type} lr scheduler type, {seed} seed, {output_dir} output dir")
102
+ iseed = seed
103
+ model = FastLanguageModel.get_peft_model(
104
+ model,
105
+ r = lora_r,
106
+ target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
107
+ "gate_proj", "up_proj", "down_proj",],
108
+ lora_alpha = lora_alpha,
109
+ lora_dropout = lora_dropout,
110
+ bias = "none",
111
+ use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
112
+ random_state=iseed,
113
+ use_rslora = False, # We support rank stabilized LoRA
114
+ loftq_config = None, # And LoftQ
115
+ )
116
+
117
+ trainer = SFTTrainer(
118
+ model = model,
119
+ tokenizer = tokenizer,
120
+ train_dataset = dataset,
121
+ dataset_text_field = "text",
122
+ max_seq_length = max_seq_length,
123
+ dataset_num_proc = 2,
124
+ packing = False, # Can make training 5x faster for short sequences.
125
+ callbacks = [PrinterCallback(progress)],
126
+ args = TrainingArguments(
127
+ per_device_train_batch_size = per_device_train_batch_size,
128
+ gradient_accumulation_steps = gradient_accumulation_steps,
129
+ warmup_steps = warmup_steps,
130
+ max_steps = 60, # Set num_train_epochs = 1 for full training runs
131
+ learning_rate = learning_rate,
132
+ fp16 = not is_bfloat16_supported(),
133
+ bf16 = is_bfloat16_supported(),
134
+ logging_steps = logging_steps,
135
+ optim = "adamw_8bit",
136
+ weight_decay = weight_decay,
137
+ lr_scheduler_type = "linear",
138
+ seed = iseed,
139
+ report_to="tensorboard" if log_to_tensorboard else None,
140
+ output_dir = output_dir
141
+ ),
142
+ )
143
+ trainer.train()
144
+ return "Model training",gr.update(visible=True, interactive=False), gr.update(visible=True, interactive=True), gr.update(interactive=True)
145
+
146
+ def save_model():
147
+ return "Model saved", gr.update(visible=True, interactive=True), gr.update(visible=True, interactive=False), gr.update(interactive=False)
148
+
149
 
150
  # Create the Gradio interface
151
  with gr.Blocks() as demo:
152
+ with gr.Column():
153
+ gr.Image("unsloth.png", width="300px", interactive=False, show_download_button=False, show_label=False)
154
+ with gr.Column():
155
+ gr.Markdown(f"**GPU Information:** {gpu_stats.name} ({max_memory} GB)")
156
+ with gr.Tab("Base Model Parameters"):
157
+
158
+ with gr.Row():
159
+ initial_model_name = gr.Dropdown(choices=model_options, label="Select Base Model", allow_custom_value=True)
160
+ load_in_4bit = gr.Checkbox(label="Load 4bit model", value=True)
161
+
162
+ gr.Markdown("### Target Model Parameters")
163
+ with gr.Row():
164
+ max_sequence_length = gr.Slider(minimum=128, value=512, step=64, maximum=128*1024, interactive=True, label="Max Sequence Length")
165
+ load_btn = gr.Button("Load")
166
+ output = gr.Textbox(label="Model Load Status", value="Model not loaded", interactive=False)
167
+ gr.Markdown("---")
168
+
169
+ with gr.Tab("Data Preparation"):
170
+ with gr.Row():
171
+ dataset_name = gr.Textbox(label="Dataset Name", value="yahma/alpaca-cleaned")
172
+ data_template_style = gr.Dropdown(label="Template", choices=["alpaca","custom"], value="alpaca", allow_custom_value=True)
173
+ with gr.Row():
174
+ data_tempalte = gr.TextArea(label="Data Template", value="""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
175
+
176
+ ### Instruction:
177
+ {}
178
+
179
+ ### Input:
180
+ {}
181
+
182
+ ### Response:
183
+ {}""")
184
+ gr.Markdown("---")
185
+ output_load_data = gr.Textbox(label="Data Load Status", value="Data not loaded", interactive=False)
186
+ load_data_btn = gr.Button("Load Dataset", interactive=True)
187
+ load_data_btn.click(load_data, inputs=[dataset_name, data_template_style, data_tempalte], outputs=[output_load_data, load_data_btn])
188
+
189
+ with gr.Tab("Fine-Tuning"):
190
+ gr.Markdown("""### Fine-Tuned Model Parameters""")
191
+ with gr.Row():
192
+ model_name = gr.Textbox(label="Model Name", value=initial_model_name.value, interactive=True)
193
+
194
+ gr.Markdown("""### Lora Parameters""")
195
+
196
+ with gr.Row():
197
+ lora_r = gr.Number(label="R", value=16, interactive=True)
198
+ lora_alpha = gr.Number(label="Lora Alpha", value=16, interactive=True)
199
+ lora_dropout = gr.Number(label="Lora Dropout", value=0.1, interactive=True)
200
+
201
+ gr.Markdown("---")
202
+ gr.Markdown("""### Training Parameters""")
203
+ with gr.Row():
204
+ with gr.Column():
205
+ with gr.Row():
206
+ per_device_train_batch_size = gr.Number(label="Per Device Train Batch Size", value=2, interactive=True)
207
+ warmup_steps = gr.Number(label="Warmup Steps", value=5, interactive=True)
208
+ max_steps = gr.Number(label="Max Steps", value=60, interactive=True)
209
+ gradient_accumulation_steps = gr.Number(label="Gradient Accumulation Steps", value=4, interactive=True)
210
+ with gr.Row():
211
+ logging_steps = gr.Number(label="Logging Steps", value=1, interactive=True)
212
+ log_to_tensorboard = gr.Checkbox(label="Log to Tensorboard", value=True, interactive=True)
213
+
214
+ with gr.Row():
215
+ optim = gr.Dropdown(choices=["adamw_8bit", "adamw", "sgd"], label="Optimizer", value="adamw_8bit")
216
+ learning_rate = gr.Number(label="Learning Rate", value=2e-4, interactive=True)
217
+
218
+ with gr.Row():
219
+ weight_decay = gr.Number(label="Weight Decay", value=0.01, interactive=True)
220
+ lr_scheduler_type = gr.Dropdown(choices=["linear", "cosine", "constant"], label="LR Scheduler Type", value="linear")
221
+ gr.Markdown("---")
222
+
223
+ with gr.Row():
224
+ seed = gr.Number(label="Seed", value=3407, interactive=True)
225
+ output_dir = gr.Textbox(label="Output Directory", value="outputs", interactive=True)
226
+ gr.Markdown("---")
227
+
228
+ train_output = gr.Textbox(label="Training Status", value="Model not trained", interactive=False)
229
+ train_btn = gr.Button("Train", visible=True)
230
+ train_btn.click(train_model, inputs=[model_name, lora_r, lora_alpha, lora_dropout, per_device_train_batch_size, warmup_steps, max_steps, gradient_accumulation_steps, logging_steps, log_to_tensorboard, optim, learning_rate, weight_decay, lr_scheduler_type, seed, output_dir], outputs=[train_output, train_btn])
231
+
232
+ with gr.Tab("Save & Push Options"):
233
+
234
+
235
+
236
+ with gr.Row():
237
+ gr.Markdown("### Merging Options")
238
+ with gr.Column():
239
+ merge_16bit = gr.Checkbox(label="Merge to 16bit", value=False, interactive=True)
240
+ merge_4bit = gr.Checkbox(label="Merge to 4bit", value=False, interactive=True)
241
+ just_lora = gr.Checkbox(label="Just LoRA Adapter", value=False, interactive=True)
242
+ gr.Markdown("---")
243
+
244
+ with gr.Row():
245
+ gr.Markdown("### GGUF Options")
246
+ with gr.Column():
247
+ merge_16bit = gr.Checkbox(label="Quantize to f16", value=False, interactive=True)
248
+ merge_16bit = gr.Checkbox(label="Quantize to 8bit (Q8_0)", value=False, interactive=True)
249
+ merge_16bit = gr.Checkbox(label="Quantize to 4bit (q4_k_m)", value=False, interactive=True)
250
+ with gr.Column():
251
+ merge_custom = gr.Checkbox(label="Custom", value=False, interactive=True)
252
+ merge_custom_value = gr.Textbox(label="", value="Q5_K", interactive=True)
253
+ gr.Markdown("---")
254
 
255
+ with gr.Row():
256
+ gr.Markdown("### Hugging Face Hub Options")
257
+ push_to_hub = gr.Checkbox(label="Push to Hub", value=False, interactive=True)
258
+ with gr.Column():
259
+ hub_model_name = gr.Textbox(label="Hub Model Name", value=f"username/model_name", interactive=True)
260
+ hub_token = gr.Textbox(label="Hub Token", interactive=True, type="password")
261
+ ollama_pub_key = gr.Button("HuggingFace Access Token")
262
+ gr.Markdown("---")
263
+
264
+ with gr.Row():
265
+ gr.Markdown("### Ollama options")
266
+ with gr.Column():
267
+ ollama_create_local = gr.Checkbox(label="Create in Ollama (local)", value=False, interactive=True)
268
+ ollama_push_to_hub = gr.Checkbox(label="Push to Ollama", value=False, interactive=True)
269
+ with gr.Column():
270
+ ollama_model_name = gr.Textbox(label="Ollama Model Name", value="user/model_name")
271
+ ollama_pub_key = gr.Button("Ollama Pub Key")
272
+ gr.Markdown("---")
273
 
 
 
 
274
 
275
+ with gr.Tab("Inference"):
276
+ with gr.Row():
277
+ gr.Textbox(label="Input Text", lines=4, value="""\
278
+ Continue the fibonnaci sequence.
279
+ # instruction
280
+ 1, 1, 2, 3, 5, 8
281
+ # input
282
+ """, interactive=True)
283
+ gr.Textbox(label="Output Text", lines=4, value="""\
284
+ """, interactive=False)
285
 
286
+ inference_button = gr.Button("Inference", visible=False, interactive=False)
287
  # Output
 
288
 
289
+ # Button click events
290
+ load_btn.click(load_model, inputs=[initial_model_name, load_in_4bit, max_sequence_length], outputs=[output, load_btn, train_btn, initial_model_name, load_in_4bit, max_sequence_length])
 
291
 
292
  demo.launch()
requirements.txt CHANGED
@@ -1,6 +1,8 @@
1
  unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
2
- xformers
3
  trl<0.9.0
4
  peft
5
  accelerate
6
- bitsandbytes
 
 
 
1
  unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
2
+ xformers<0.0.27
3
  trl<0.9.0
4
  peft
5
  accelerate
6
+ bitsandbytes
7
+ gradio
8
+ tensorboard
unsloth.png ADDED