myr1-2

Sleeping

App Files Files Community

wuhp commited on 8 days ago

Commit

c8df7a5

verified ·

1 Parent(s): 4df6952

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -30

app.py CHANGED Viewed

@@ -20,10 +20,6 @@ from peft import LoraConfig, TaskType, get_peft_model, prepare_model_for_kbit_tr
 from sentence_transformers import SentenceTransformer
-# Import your custom configuration overrides.
-# For example, your configuration_deepseek.py might export a dictionary called CONFIG_OVERRIDES.
-import configuration_deepseek
 # Global variables for pipelines and settings.
 TEXT_PIPELINE = None
 COMPARISON_PIPELINE = None
@@ -32,13 +28,14 @@ NUM_EXAMPLES = 1000
 @spaces.GPU(duration=300)
 def finetune_small_subset():
     """
-    1) Loads your custom model ("wuhp/myr1") in 4-bit quantization (QLoRA style),
-    2) Adds LoRA adapters (trainable),
-    3) Fine-tunes on a small subset of the ServiceNow-AI/R1-Distill-SFT dataset,
-    4) Saves the LoRA adapter to "finetuned_myr1",
-    5) Reloads the LoRA adapter for inference.
     """
-    # Load the new dataset.
     ds = load_dataset("ServiceNow-AI/R1-Distill-SFT", split="train")
     ds = ds.select(range(min(NUM_EXAMPLES, len(ds))))
@@ -49,15 +46,13 @@ def finetune_small_subset():
         bnb_4bit_quant_type="nf4",
     )
-    # Load the base configuration from your model repository.
     base_config = AutoConfig.from_pretrained(
         "wuhp/myr1",
         subfolder="myr1",
         trust_remote_code=True,
     )
-    # Apply your custom overrides (from configuration_deepseek.py).
-    for key, value in configuration_deepseek.CONFIG_OVERRIDES.items():
-        setattr(base_config, key, value)
     tokenizer = AutoTokenizer.from_pretrained(
         "wuhp/myr1",
@@ -65,8 +60,6 @@ def finetune_small_subset():
         trust_remote_code=True
     )
-    # Load the model. With trust_remote_code=True, your custom model class (e.g. DeepseekV3ForCausalLM)
-    # will be loaded from the repository.
     base_model = AutoModelForCausalLM.from_pretrained(
         "wuhp/myr1",
         subfolder="myr1",
@@ -88,7 +81,6 @@ def finetune_small_subset():
     )
     lora_model = get_peft_model(base_model, lora_config)
-    # For this dataset, assume "problem" is the prompt and "solution" is the target.
     def tokenize_fn(ex):
         text = (
             f"Problem: {ex['problem']}\n\n"
@@ -107,9 +99,9 @@ def finetune_small_subset():
         per_device_train_batch_size=1,
         gradient_accumulation_steps=2,
         logging_steps=5,
-        save_steps=999999,   # High save interval
         save_total_limit=1,
-        fp16=False,          # Set to True if supported by your hardware
     )
     trainer = Trainer(
@@ -120,11 +112,9 @@ def finetune_small_subset():
     )
     trainer.train()
-    # Save the LoRA adapter and tokenizer.
     trainer.model.save_pretrained("finetuned_myr1")
     tokenizer.save_pretrained("finetuned_myr1")
-    # Reload the base model and attach the LoRA adapter for inference.
     base_model_2 = AutoModelForCausalLM.from_pretrained(
         "wuhp/myr1",
         subfolder="myr1",
@@ -147,8 +137,7 @@ def finetune_small_subset():
 def ensure_pipeline():
     """
-    If we haven't fine-tuned yet (i.e. TEXT_PIPELINE is None),
-    load the base model (without LoRA) in 4-bit mode.
     """
     global TEXT_PIPELINE
     if TEXT_PIPELINE is None:
@@ -159,8 +148,6 @@ def ensure_pipeline():
             bnb_4bit_quant_type="nf4",
         )
         base_config = AutoConfig.from_pretrained("wuhp/myr1", subfolder="myr1", trust_remote_code=True)
-        for key, value in configuration_deepseek.CONFIG_OVERRIDES.items():
-            setattr(base_config, key, value)
         tokenizer = AutoTokenizer.from_pretrained("wuhp/myr1", subfolder="myr1", trust_remote_code=True)
         base_model = AutoModelForCausalLM.from_pretrained(
             "wuhp/myr1",
@@ -175,7 +162,7 @@ def ensure_pipeline():
 def ensure_comparison_pipeline():
     """
-    Load a reference DeepSeek model pipeline if not already loaded.
     """
     global COMPARISON_PIPELINE
     if COMPARISON_PIPELINE is None:
@@ -233,8 +220,7 @@ def compare_models(prompt, temperature, top_p, min_new_tokens, max_new_tokens):
 class ConversationRetriever:
     """
-    A simple in-memory FAISS-based retriever.
-    Each text chunk is embedded using SentenceTransformer.
     """
     def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2", embed_dim=384):
         self.embed_model = SentenceTransformer(model_name)
@@ -270,7 +256,7 @@ retriever = ConversationRetriever()
 def build_rag_prompt(user_query, retrieved_chunks):
     """
-    Build a prompt for retrieval-augmented generation.
     """
     context_str = ""
     for i, (chunk, dist) in enumerate(retrieved_chunks):
@@ -285,7 +271,7 @@ def build_rag_prompt(user_query, retrieved_chunks):
 @spaces.GPU(duration=120)
 def chat_rag(user_input, history, temperature, top_p, min_new_tokens, max_new_tokens):
     """
-    Chat function with retrieval augmentation.
     """
     pipe = ensure_pipeline()
     retriever.add_text(f"User: {user_input}")

 from sentence_transformers import SentenceTransformer
 # Global variables for pipelines and settings.
 TEXT_PIPELINE = None
 COMPARISON_PIPELINE = None
 @spaces.GPU(duration=300)
 def finetune_small_subset():
     """
+    Fine-tunes the custom DeepSeekV3 model on a small subset of the ServiceNow-AI/R1-Distill-SFT dataset.
+    Steps:
+      1) Loads the model from "wuhp/myr1" (using files from the "myr1" subfolder via trust_remote_code).
+      2) Applies 4-bit quantization and prepares for QLoRA training.
+      3) Fine-tunes on the dataset (mapping "problem" to prompt and "solution" to target).
+      4) Saves the LoRA adapter to "finetuned_myr1".
+      5) Reloads the adapter for inference.
     """
     ds = load_dataset("ServiceNow-AI/R1-Distill-SFT", split="train")
     ds = ds.select(range(min(NUM_EXAMPLES, len(ds))))
         bnb_4bit_quant_type="nf4",
     )
+    # Load the custom model configuration from the repository.
     base_config = AutoConfig.from_pretrained(
         "wuhp/myr1",
         subfolder="myr1",
         trust_remote_code=True,
     )
+    # (Optionally apply local overrides here if needed.)
     tokenizer = AutoTokenizer.from_pretrained(
         "wuhp/myr1",
         trust_remote_code=True
     )
     base_model = AutoModelForCausalLM.from_pretrained(
         "wuhp/myr1",
         subfolder="myr1",
     )
     lora_model = get_peft_model(base_model, lora_config)
     def tokenize_fn(ex):
         text = (
             f"Problem: {ex['problem']}\n\n"
         per_device_train_batch_size=1,
         gradient_accumulation_steps=2,
         logging_steps=5,
+        save_steps=999999,
         save_total_limit=1,
+        fp16=False,
     )
     trainer = Trainer(
     )
     trainer.train()
     trainer.model.save_pretrained("finetuned_myr1")
     tokenizer.save_pretrained("finetuned_myr1")
     base_model_2 = AutoModelForCausalLM.from_pretrained(
         "wuhp/myr1",
         subfolder="myr1",
 def ensure_pipeline():
     """
+    Loads the base model (without LoRA) if no fine-tuned model is available.
     """
     global TEXT_PIPELINE
     if TEXT_PIPELINE is None:
             bnb_4bit_quant_type="nf4",
         )
         base_config = AutoConfig.from_pretrained("wuhp/myr1", subfolder="myr1", trust_remote_code=True)
         tokenizer = AutoTokenizer.from_pretrained("wuhp/myr1", subfolder="myr1", trust_remote_code=True)
         base_model = AutoModelForCausalLM.from_pretrained(
             "wuhp/myr1",
 def ensure_comparison_pipeline():
     """
+    Loads a reference DeepSeek model pipeline if not already loaded.
     """
     global COMPARISON_PIPELINE
     if COMPARISON_PIPELINE is None:
 class ConversationRetriever:
     """
+    A FAISS-based retriever using SentenceTransformer for embedding.
     """
     def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2", embed_dim=384):
         self.embed_model = SentenceTransformer(model_name)
 def build_rag_prompt(user_query, retrieved_chunks):
     """
+    Builds a prompt for retrieval-augmented generation.
     """
     context_str = ""
     for i, (chunk, dist) in enumerate(retrieved_chunks):
 @spaces.GPU(duration=120)
 def chat_rag(user_input, history, temperature, top_p, min_new_tokens, max_new_tokens):
     """
+    Chat with retrieval augmentation.
     """
     pipe = ensure_pipeline()
     retriever.add_text(f"User: {user_input}")