myr1-2

Running on Zero

App Files Files Community

wuhp commited on 1 day ago

Commit

7014802

verified ·

1 Parent(s): e2ec65a

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -57

app.py CHANGED Viewed

@@ -237,7 +237,7 @@ def compare_models(
     Args:
         prompt (str): The input prompt for text generation.
         temperature (float): Sampling temperature.
-        top_p (float): Sampling top-p.
         min_new_tokens (int): Minimum number of new tokens to generate.
         max_new_tokens (int): Maximum number of new tokens to generate.
@@ -385,7 +385,7 @@ def chat_rag(
         user_input (str): The user's chat input.
         history (list[list[str]]): The chat history.
         temperature (float): Sampling temperature.
-        top_p (float): Sampling top-p.
         min_new_tokens (int): Minimum number of new tokens to generate.
         max_new_tokens (int): Maximum number of new tokens to generate.
@@ -424,60 +424,70 @@ def chat_rag(
     return history, history
-# Build the Gradio interface with tabs.
-with gr.Blocks(css="""
-    body {background-color: #f5f5f5; font-family: Arial, sans-serif;}
-    .gradio-container {max-width: 1000px; margin: auto; background: white; padding: 20px; border-radius: 10px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);}
-    h1 {color: #333; text-align: center; font-size: 2rem;}
-    h2 {color: #444; margin-top: 10px; font-size: 1.5rem;}
-    .gr-tab {padding: 10px;}
-""") as demo:
-    gr.Markdown("# 🚀 QLoRA Fine-tuning & RAG Chat Demo")
-    gr.Markdown("Welcome to the enhanced **QLoRA fine-tuning and RAG-based chatbot interface**. This tool lets you fine-tune an AI model, generate text, and interact with a chatbot using retrieval-augmented responses.")
-    with gr.TabbedInterface():
-        # Fine-tuning tab
-        with gr.Tab(label="⚙️ Fine-tune Model"):
-            gr.Markdown("### Train your custom R1 model")
-            gr.Markdown("Fine-tune the model using QLoRA. This is **optional**, but recommended for better performance.")
-            finetune_btn = gr.Button("Start Fine-tuning")
-            finetune_output = gr.Textbox(label="Status", interactive=False)
-            finetune_btn.click(finetune_small_subset, inputs=None, outputs=finetune_output)
-        # Text Generation tab
-        with gr.Tab(label="✍️ Text Generation"):
-            gr.Markdown("### Generate text using your fine-tuned model")
-            input_prompt = gr.Textbox(label="Enter Prompt", placeholder="Type something here...", lines=3)
-            temp_slider = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
-            topp_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
-            min_tokens = gr.Slider(1, 1000, value=50, step=10, label="Min New Tokens")
-            max_tokens = gr.Slider(1, 1000, value=200, step=10, label="Max New Tokens")
-            generate_btn = gr.Button("Generate Text")
-            output_box = gr.Textbox(label="Generated Output", lines=8, interactive=False)
-            generate_btn.click(predict, inputs=[input_prompt, temp_slider, topp_slider, min_tokens, max_tokens], outputs=output_box)
-        # Model Comparison tab
-        with gr.Tab(label="🆚 Compare Models"):
-            gr.Markdown("### Compare text outputs from your fine-tuned model and the official model")
-            compare_prompt = gr.Textbox(label="Enter Comparison Prompt", placeholder="Enter a prompt here...", lines=3)
-            compare_temp = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
-            compare_topp = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
-            compare_min_tokens = gr.Slider(1, 1000, value=50, step=10, label="Min New Tokens")
-            compare_max_tokens = gr.Slider(1, 1000, value=200, step=10, label="Max New Tokens")
-            compare_btn = gr.Button("Compare Models")
-            compare_output1 = gr.Textbox(label="Custom Model Output", lines=6, interactive=False)
-            compare_output2 = gr.Textbox(label="Official Model Output", lines=6, interactive=False)
-            compare_btn.click(compare_models, inputs=[compare_prompt, compare_temp, compare_topp, compare_min_tokens, compare_max_tokens], outputs=[compare_output1, compare_output2])
-        # Chatbot tab
-        with gr.Tab(label="💬 AI Chatbot"):
-            gr.Markdown("### Chat with an AI assistant using retrieval-augmented generation (RAG)")
-            chatbot = gr.Chatbot(label="AI Chatbot", height=400)
-            chat_input = gr.Textbox(placeholder="Ask me anything...", lines=2)
-            chat_btn = gr.Button("Send")
-            chat_output = gr.Chatbot(label="Chat History")
-            chat_btn.click(chat_rag, inputs=[chat_input, chatbot, temp_slider, topp_slider, min_tokens, max_tokens], outputs=[chat_output, chatbot])
 demo.launch()

     Args:
         prompt (str): The input prompt for text generation.
         temperature (float): Sampling temperature.
+        top_p (float): Top-p sampling probability.
         min_new_tokens (int): Minimum number of new tokens to generate.
         max_new_tokens (int): Maximum number of new tokens to generate.
         user_input (str): The user's chat input.
         history (list[list[str]]): The chat history.
         temperature (float): Sampling temperature.
+        top_p (float): Top-p sampling probability.
         min_new_tokens (int): Minimum number of new tokens to generate.
         max_new_tokens (int): Maximum number of new tokens to generate.
     return history, history
+# Build the Gradio interface.
+with gr.Blocks() as demo:
+    gr.Markdown("# QLoRA Fine-tuning & RAG-based Chat Demo using Custom R1 Model")
+    gr.Markdown("---")
+    gr.Markdown("## ⚙️ Fine-tuning (Optional)")
+    gr.Markdown("This section allows you to fine-tune the custom R1 model on a small subset of the ServiceNow dataset. This step is optional but can potentially improve the model's performance on ServiceNow-related tasks. **Note:** This process may take up to 5 minutes.")
+    finetune_btn = gr.Button("🚀 Start Fine-tuning (QLoRA)")
+    status_box = gr.Textbox(label="Fine-tuning Status", interactive=False)
+    finetune_btn.click(fn=finetune_small_subset, outputs=status_box)
+    gr.Markdown("---")
+    gr.Markdown("## ✍️ Direct Generation (No Retrieval)")
+    gr.Markdown("Enter a prompt below to generate text directly using the custom R1 model. This is standard text generation without retrieval augmentation.")
+    prompt_in = gr.Textbox(lines=3, label="Input Prompt", placeholder="Enter your prompt here...")
+    temperature = gr.Slider(0.0, 1.5, step=0.1, value=0.7, label="Temperature (Creativity)")
+    top_p = gr.Slider(0.0, 1.0, step=0.05, value=0.9, label="Top-p (Sampling Nucleus)")
+    min_tokens = gr.Slider(1, 2500, value=50, step=10, label="Min New Tokens")
+    max_tokens = gr.Slider(1, 2500, value=200, step=50, label="Max New Tokens")
+    output_box = gr.Textbox(label="Custom R1 Output", lines=8, interactive=False)
+    gen_btn = gr.Button("✨ Generate Text")
+    gen_btn.click(
+        fn=predict,
+        inputs=[prompt_in, temperature, top_p, min_tokens, max_tokens],
+        outputs=output_box
+    )
+    gr.Markdown("---")
+    gr.Markdown("## 🆚 Compare Custom R1 vs Official R1")
+    gr.Markdown("Enter a prompt to compare the text generation of your fine-tuned custom R1 model with the official DeepSeek-R1-Distill-Llama-8B model.")
+    compare_prompt_in = gr.Textbox(lines=3, label="Comparison Prompt", placeholder="Enter prompt for comparison...")
+    compare_btn = gr.Button("⚖️ Compare Models")
+    out_custom = gr.Textbox(label="Custom R1 Output", lines=6, interactive=False)
+    out_official = gr.Textbox(label="Official R1 Output", lines=6, interactive=False)
+    compare_btn.click(
+        fn=compare_models,
+        inputs=[compare_prompt_in, temperature, top_p, min_tokens, max_tokens],
+        outputs=[out_custom, out_official]
+    )
+    gr.Markdown("---")
+    gr.Markdown("## 💬 Chat with Retrieval-Augmented Memory (RAG)")
+    gr.Markdown("Chat with the custom R1 model, enhanced with a retrieval-augmented memory. The model will retrieve relevant information based on your queries to provide more informed responses.")
+    with gr.Row():
+        with gr.Column():
+            chatbot = gr.Chatbot(label="RAG Chatbot")
+            chat_state = gr.State([])
+            user_input = gr.Textbox(
+                show_label=False,
+                placeholder="Ask a question to the RAG Chatbot...",
+                lines=2
+            )
+            send_btn = gr.Button("➡️ Send")
+    user_input.submit(
+        fn=chat_rag,
+        inputs=[user_input, chat_state, temperature, top_p, min_tokens, max_tokens],
+        outputs=[chat_state, chatbot]
+    )
+    send_btn.click(
+        fn=chat_rag,
+        inputs=[user_input, chat_state, temperature, top_p, min_tokens, max_tokens],
+        outputs=[chat_state, chatbot]
+    )
+    gr.Markdown("---")
 demo.launch()