myr1-2

Running on Zero

App Files Files Community

wuhp commited on 3 days ago

Commit

b446d41

verified ·

1 Parent(s): 5755412

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -27

app.py CHANGED Viewed

@@ -1,52 +1,73 @@
 import gradio as gr
 import spaces
 import torch
 from transformers import (
     AutoConfig,
     AutoTokenizer,
     AutoModelForCausalLM,
-    pipeline
 )
-# 1) Decorate your GPU-dependent function(s)
-@spaces.GPU(duration=60)  # default is 60s, can increase if needed
-def load_pipeline():
-    # -- load config & model from wuhp/myr1 --
     config = AutoConfig.from_pretrained("wuhp/myr1", subfolder="myr1", trust_remote_code=True)
     tokenizer = AutoTokenizer.from_pretrained("wuhp/myr1", subfolder="myr1", trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         "wuhp/myr1",
         subfolder="myr1",
         config=config,
-        torch_dtype=torch.float16,  # half precision
         device_map="auto",
         trust_remote_code=True
     )
-    # optional: load generation config if you have generation_config.json
-    text_pipeline = pipeline(
-        "text-generation",
-        model=model,
-        tokenizer=tokenizer
-    )
-    return text_pipeline
-# We'll load it once and store globally
-text_pipeline = load_pipeline()
-def predict(prompt, max_new_tokens=64):
-    outputs = text_pipeline(
-        prompt, max_new_tokens=int(max_new_tokens), do_sample=True, temperature=0.7
     )
-    return outputs[0]["generated_text"]
-# 2) Build your Gradio app
-with gr.Blocks() as demo:
-    gr.Markdown("## My LLM Inference (ZeroGPU)")
-    prompt = gr.Textbox(label="Prompt")
-    max_nt = gr.Slider(1, 200, value=64, step=1, label="Max New Tokens")
-    output = gr.Textbox(label="Generated Text")
-    btn = gr.Button("Generate")
-    btn.click(fn=predict, inputs=[prompt, max_nt], outputs=output)
 demo.launch()

 import gradio as gr
 import spaces
 import torch
+from transformers import Trainer, TrainingArguments
+from datasets import load_dataset
 from transformers import (
     AutoConfig,
     AutoTokenizer,
     AutoModelForCausalLM,
+    DataCollatorForLanguageModeling,
 )
+@spaces.GPU(duration=600)  # 10 minutes
+def run_finetuning():
+    # Load dataset
+    ds = load_dataset("Magpie-Align/Magpie-Reasoning-V2-250K-CoT-Deepseek-R1-Llama-70B")
+    # maybe select a small subset (like 1000 rows) or you'll likely time out
+    ds_small = ds["train"].select(range(1000))
+    # Format example:
+    def format_row(ex):
+        return {"text": f"User: {ex['instruction']}\nAssistant: {ex['response']}"}
+    ds_small = ds_small.map(format_row)
+    # Load config/tokenizer/model with trust_remote_code
     config = AutoConfig.from_pretrained("wuhp/myr1", subfolder="myr1", trust_remote_code=True)
     tokenizer = AutoTokenizer.from_pretrained("wuhp/myr1", subfolder="myr1", trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         "wuhp/myr1",
         subfolder="myr1",
         config=config,
+        torch_dtype=torch.float16,
         device_map="auto",
         trust_remote_code=True
     )
+    # Tokenize
+    def tokenize(ex):
+        return tokenizer(ex["text"], truncation=True, max_length=512)
+    ds_small = ds_small.map(tokenize, batched=True)
+    ds_small.set_format("torch")
+    collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
+    # Trainer
+    args = TrainingArguments(
+        output_dir="finetuned_model",
+        num_train_epochs=1,
+        per_device_train_batch_size=1,
+        logging_steps=5,
+        fp16=True,
+        save_strategy="no",
     )
+    trainer = Trainer(
+        model=model,
+        args=args,
+        train_dataset=ds_small,
+        data_collator=collator,
+    )
+    trainer.train()
+    # Save
+    trainer.save_model("finetuned_model")
+    tokenizer.save_pretrained("finetuned_model")
+    return "Finetuning done!"
+# Then define a Gradio UI that calls run_finetuning
+with gr.Blocks() as demo:
+    btn = gr.Button("Run Finetuning (10 min max!)")
+    status = gr.Textbox(label="Status")
+    btn.click(fn=run_finetuning, inputs=None, outputs=status)
 demo.launch()