Spaces:

ivwhy
/

iris

Sleeping

App Files Files Community

Jimin Park commited on Dec 8, 2024

Commit

2ddfac0

1 Parent(s): 130f61e

updated app.py

Browse files

Files changed (2) hide show

README.md +6 -7
app.py +84 -1

README.md CHANGED Viewed

@@ -1,14 +1,13 @@
 ---
-title: Iris
-emoji: 💬
-colorFrom: yellow
-colorTo: purple
 sdk: gradio
-sdk_version: 5.0.1
 app_file: app.py
 pinned: false
-license: apache-2.0
-short_description: id2223 lab 2
 ---
 An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 ---
+title: Unsloth Fine-Tuned Chatbot
+emoji: 🤖
+colorFrom: blue
+colorTo: green
 sdk: gradio
+sdk_version: 4.19.2
+python_version: 3.8
 app_file: app.py
 pinned: false
 ---
 An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

app.py CHANGED Viewed

@@ -3,6 +3,87 @@ import transformers
 import gradio as gr
 from unsloth import FastLanguageModel
 # Load the fine-tuned Unsloth model
 max_seq_length = 2048  # Adjust based on your training
 dtype = None  # None for auto detection
@@ -76,4 +157,6 @@ demo = gr.ChatInterface(
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 from unsloth import FastLanguageModel
+# Load the fine-tuned Unsloth model
+max_seq_length = 2048  # Adjust based on your training
+dtype = None  # Auto-detect is fine for CPU
+def load_model():
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name="ivwhy/lora_model",  # Your fine-tuned model path
+        max_seq_length=max_seq_length,
+        dtype=dtype,
+        load_in_4bit=True,  # Keep 4-bit loading enabled
+    )
+    # Optional: Add special tokens for chat if needed
+    tokenizer.pad_token = tokenizer.eos_token
+    # Create the pipeline for CPU
+    pipeline = transformers.pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        device=-1  # Force CPU usage
+    )
+    return pipeline, tokenizer
+# Load model globally
+generation_pipeline, tokenizer = load_model()
+def chat_function(message, history, system_prompt, max_new_tokens, temperature):
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": message}
+    ]
+    # Apply chat template
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+    )
+    # Define terminators
+    terminators = [
+        tokenizer.eos_token_id,
+        tokenizer.convert_tokens_to_ids("<|eot_id|>")
+    ]
+    # Generate response
+    outputs = generation_pipeline(
+        prompt,
+        max_new_tokens=max_new_tokens,
+        eos_token_id=terminators,
+        do_sample=True,
+        temperature=temperature,
+        top_p=0.9,
+    )
+    # Extract and return just the generated text
+    return outputs[0]["generated_text"][len(prompt):]
+# Create Gradio interface
+demo = gr.ChatInterface(
+    chat_function,
+    textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
+    chatbot=gr.Chatbot(height=400),
+    additional_inputs=[
+        gr.Textbox("You are helpful AI", label="System Prompt"),
+        gr.Slider(minimum=1, maximum=4000, value=500, label="Max New Tokens"),
+        gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")
+    ]
+)
+if __name__ == "__main__":
+    demo.launch()
+'''================================== OLD VER ==============================
+import torch
+import transformers
+import gradio as gr
+from unsloth import FastLanguageModel
 # Load the fine-tuned Unsloth model
 max_seq_length = 2048  # Adjust based on your training
 dtype = None  # None for auto detection
 )
 if __name__ == "__main__":
+    demo.launch()
+'''