myr1-2

Sleeping

App Files Files Community

wuhp commited on 9 days ago

Commit

eccd8f6

verified ·

1 Parent(s): 0b10a3a

Update app.py

Browse files

Files changed (1) hide show

app.py +234 -27

app.py CHANGED Viewed

@@ -1,33 +1,240 @@
-import gradio as gr
-import spaces
-import transformers
-# Load *only* your model's interface.
-#
-# The original snippet loaded three models:
-#   demo = gr.load(name="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", src=transformers_gradio.registry)
-#   demo = gr.load(name="deepseek-ai/DeepSeek-R1", src=transformers_gradio.registry)
-#   demo = gr.load(name="deepseek-ai/DeepSeek-R1-Zero", src=transformers_gradio.registry)
-#
-# But we want the same UI, using *your* model from Hugging Face. So we do a single gr.load(...) call:
-#
-# IMPORTANT:
-#   1) "name" should be the exact repository you want to load.
-#   2) If your UI code was stored as a "Space" with a 'app.py' or 'api' in your "wuhp/myr1" repo,
-#      this approach should pull that same Gradio interface.
-#   3) If "transformers_gradio.registry" is correct for your space, keep it.
-#      Otherwise, you might need "src='spaces'" or a different source, depending on how your space is set up.
-demo = gr.load(
-    name="wuhp/myr1",
-    src="transformers_gradio.registry"
 )
-# If you want GPU usage (like the original snippet):
-demo.fn = spaces.GPU()(demo.fn)
-# Remove API names (like the original snippet):
-for fn in demo.fns.values():
-    fn.api_name = False
 if __name__ == "__main__":
     demo.launch()

+import os
+import torch
+from torch.utils.data import Dataset
+from transformers import (
+    AutoConfig,
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    Trainer,
+    TrainingArguments,
+    GenerationConfig,
+    pipeline
 )
+import gradio as gr
+# ---------------------------
+# A) Dummy training dataset
+# ---------------------------
+class MyTextDataset(Dataset):
+    """
+    Very simple dataset example. In reality:
+      - Use real text data,
+      - Possibly use HF 'datasets' library,
+      - Tokenize in chunks, etc.
+    """
+    def __init__(self, tokenizer, texts, block_size=128):
+        self.examples = []
+        for txt in texts:
+            # Tokenize each text
+            tokens = tokenizer(txt, truncation=True, max_length=block_size)
+            self.examples.append(tokens["input_ids"])
+    def __len__(self):
+        return len(self.examples)
+    def __getitem__(self, idx):
+        return torch.tensor(self.examples[idx], dtype=torch.long)
+# ---------------------------
+# B) Training routine
+# ---------------------------
+def train_model(
+    model_name_or_path="wuhp/myr1",
+    subfolder="myr1",
+    output_dir="finetuned_myr1",
+    epochs=1
+):
+    """
+    Demonstrates how to load your custom model from HF, and run a
+    quick 'Trainer' to finetune it on some mock texts.
+    - model_name_or_path: huggingface repo ID (or local folder).
+    - subfolder: if your model config/weights live in a subfolder
+                 within that repo, specify it here.
+    - output_dir: where to save final trained model.
+    - epochs: how many epochs for this mock training example.
+    """
+    # 1) Load config (trust_remote_code=True so we can import custom .py from your repo)
+    config = AutoConfig.from_pretrained(
+        model_name_or_path,
+        subfolder=subfolder,
+        trust_remote_code=True
+    )
+    # 2) Load tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_name_or_path,
+        subfolder=subfolder,
+        trust_remote_code=True
+    )
+    # 3) Load model
+    #    AutoModelForCausalLM will detect your custom architecture from modeling_deepseek.py
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name_or_path,
+        subfolder=subfolder,
+        config=config,
+        torch_dtype=torch.float16,       # or "auto", or float32
+        device_map="auto",               # If you have enough GPU memory, or "cpu"
+        trust_remote_code=True
+    )
+    # 4) Create a tiny training dataset
+    train_texts = [
+        "Hello from DeepSeek!",
+        "The sky is blue.",
+        "Large language models can do amazing things."
+    ]
+    eval_texts = [
+        "Testing is essential for robust code.",
+        "Generative AI is fun."
+    ]
+    train_dataset = MyTextDataset(tokenizer, train_texts)
+    eval_dataset  = MyTextDataset(tokenizer, eval_texts)
+    # 5) Trainer hyperparams
+    training_args = TrainingArguments(
+        output_dir=output_dir,
+        overwrite_output_dir=True,
+        num_train_epochs=epochs,
+        per_device_train_batch_size=1,
+        per_device_eval_batch_size=1,
+        evaluation_strategy="epoch",
+        save_strategy="epoch",
+        logging_steps=1,
+        gradient_accumulation_steps=1,
+        fp16=True if torch.cuda.is_available() else False,
+        # If you have limited VRAM and can't do FP16, set fp16=False above
+    )
+    # 6) Define data collator for causal LM. Typically:
+    from transformers import DataCollatorForLanguageModeling
+    data_collator = DataCollatorForLanguageModeling(
+        tokenizer=tokenizer, mlm=False
+    )
+    # 7) Build trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        data_collator=data_collator,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset
+    )
+    # 8) Train
+    trainer.train()
+    # 9) Save model & tokenizer
+    trainer.save_model(output_dir)
+    tokenizer.save_pretrained(output_dir)
+    return trainer
+# ---------------------------
+# C) Gradio app function
+# ---------------------------
+def create_gradio_demo(
+    model_name_or_path="finetuned_myr1",
+    generation_config_path=None
+):
+    """
+    Loads a (fine-tuned) model from local or HF, sets up
+    a text-generation pipeline, and returns a Gradio interface.
+    """
+    # 1) Load config
+    config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
+    # 2) Load model & tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name_or_path,
+        config=config,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        device_map="auto",
+        trust_remote_code=True
+    )
+    # 3) (Optional) load generation config if present
+    #    e.g. custom top_k, top_p, temperature, etc.
+    #    If your repo has "generation_config.json" in subfolder="myr1",
+    #    you could also do:
+    #       GenerationConfig.from_pretrained("wuhp/myr1", subfolder="myr1", ...)
+    #    Or from local path if downloaded.
+    if generation_config_path:
+        gen_config = GenerationConfig.from_json_file(generation_config_path)
+    else:
+        # fallback to default or config
+        gen_config = GenerationConfig.from_model_config(config)
+    # 4) Build a text-generation pipeline
+    text_pipeline = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        generation_config=gen_config,
+    )
+    # 5) Define Gradio predict function
+    def predict(prompt, max_new_tokens=64, temperature=0.7, top_p=0.95):
+        """
+        Generates text from the model given a user prompt.
+        """
+        outputs = text_pipeline(
+            prompt,
+            max_new_tokens=int(max_new_tokens),
+            temperature=float(temperature),
+            top_p=float(top_p)
+        )
+        # The pipeline returns a list of dicts like [{'generated_text': '...'}]
+        return outputs[0]["generated_text"]
+    # 6) Create the Gradio Interface
+    with gr.Blocks() as demo:
+        gr.Markdown("## DeepSeek LLM Demo")
+        prompt = gr.Textbox(label="Enter your prompt:")
+        max_new_tokens = gr.Slider(1, 512, step=1, value=64, label="Max New Tokens")
+        temperature = gr.Slider(0.0, 1.5, step=0.1, value=0.7, label="Temperature")
+        top_p = gr.Slider(0.0, 1.0, step=0.05, value=0.95, label="Top-p")
+        output = gr.Textbox(label="Generated Text")
+        generate_btn = gr.Button("Generate")
+        generate_btn.click(
+            fn=predict,
+            inputs=[prompt, max_new_tokens, temperature, top_p],
+            outputs=output
+        )
+    return demo
+# ---------------------------
+# D) Main: train + launch
+# ---------------------------
 if __name__ == "__main__":
+    # 1) TRAIN (mock demonstration).
+    #    If you just want to *load* your existing model, skip this step.
+    print("Starting mock training on wuhp/myr1 (subfolder myr1)...")
+    trainer = train_model(
+        model_name_or_path="wuhp/myr1",
+        subfolder="myr1",
+        output_dir="finetuned_myr1",
+        epochs=1
+    )
+    print("Training complete.")
+    # 2) Build Gradio app from the newly saved model in 'finetuned_myr1'
+    #    If you want to load the original (un-finetuned) weights, just pass
+    #    model_name_or_path="wuhp/myr1" and subfolder="myr1" again.
+    demo = create_gradio_demo(
+        model_name_or_path="finetuned_myr1",
+        generation_config_path=None  # or "finetuned_myr1/generation_config.json"
+    )
+    # 3) Launch
+    print("Launching Gradio demo on http://127.0.0.1:7860 ...")
     demo.launch()