Jimin Park commited on
Commit
2ddfac0
·
1 Parent(s): 130f61e

updated app.py

Browse files
Files changed (2) hide show
  1. README.md +6 -7
  2. app.py +84 -1
README.md CHANGED
@@ -1,14 +1,13 @@
1
  ---
2
- title: Iris
3
- emoji: 💬
4
- colorFrom: yellow
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.0.1
 
8
  app_file: app.py
9
  pinned: false
10
- license: apache-2.0
11
- short_description: id2223 lab 2
12
  ---
13
 
14
  An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
 
1
  ---
2
+ title: Unsloth Fine-Tuned Chatbot
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 4.19.2
8
+ python_version: 3.8
9
  app_file: app.py
10
  pinned: false
 
 
11
  ---
12
 
13
  An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
app.py CHANGED
@@ -3,6 +3,87 @@ import transformers
3
  import gradio as gr
4
  from unsloth import FastLanguageModel
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  # Load the fine-tuned Unsloth model
7
  max_seq_length = 2048 # Adjust based on your training
8
  dtype = None # None for auto detection
@@ -76,4 +157,6 @@ demo = gr.ChatInterface(
76
  )
77
 
78
  if __name__ == "__main__":
79
- demo.launch()
 
 
 
3
  import gradio as gr
4
  from unsloth import FastLanguageModel
5
 
6
+ # Load the fine-tuned Unsloth model
7
+ max_seq_length = 2048 # Adjust based on your training
8
+ dtype = None # Auto-detect is fine for CPU
9
+
10
+ def load_model():
11
+ model, tokenizer = FastLanguageModel.from_pretrained(
12
+ model_name="ivwhy/lora_model", # Your fine-tuned model path
13
+ max_seq_length=max_seq_length,
14
+ dtype=dtype,
15
+ load_in_4bit=True, # Keep 4-bit loading enabled
16
+ )
17
+
18
+ # Optional: Add special tokens for chat if needed
19
+ tokenizer.pad_token = tokenizer.eos_token
20
+
21
+ # Create the pipeline for CPU
22
+ pipeline = transformers.pipeline(
23
+ "text-generation",
24
+ model=model,
25
+ tokenizer=tokenizer,
26
+ device=-1 # Force CPU usage
27
+ )
28
+
29
+ return pipeline, tokenizer
30
+
31
+ # Load model globally
32
+ generation_pipeline, tokenizer = load_model()
33
+
34
+ def chat_function(message, history, system_prompt, max_new_tokens, temperature):
35
+ messages = [
36
+ {"role": "system", "content": system_prompt},
37
+ {"role": "user", "content": message}
38
+ ]
39
+
40
+ # Apply chat template
41
+ prompt = tokenizer.apply_chat_template(
42
+ messages,
43
+ tokenize=False,
44
+ add_generation_prompt=True,
45
+ )
46
+
47
+ # Define terminators
48
+ terminators = [
49
+ tokenizer.eos_token_id,
50
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
51
+ ]
52
+
53
+ # Generate response
54
+ outputs = generation_pipeline(
55
+ prompt,
56
+ max_new_tokens=max_new_tokens,
57
+ eos_token_id=terminators,
58
+ do_sample=True,
59
+ temperature=temperature,
60
+ top_p=0.9,
61
+ )
62
+
63
+ # Extract and return just the generated text
64
+ return outputs[0]["generated_text"][len(prompt):]
65
+
66
+ # Create Gradio interface
67
+ demo = gr.ChatInterface(
68
+ chat_function,
69
+ textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
70
+ chatbot=gr.Chatbot(height=400),
71
+ additional_inputs=[
72
+ gr.Textbox("You are helpful AI", label="System Prompt"),
73
+ gr.Slider(minimum=1, maximum=4000, value=500, label="Max New Tokens"),
74
+ gr.Slider(minimum=0, maximum=1, value=0.7, label="Temperature")
75
+ ]
76
+ )
77
+
78
+ if __name__ == "__main__":
79
+ demo.launch()
80
+
81
+ '''================================== OLD VER ==============================
82
+ import torch
83
+ import transformers
84
+ import gradio as gr
85
+ from unsloth import FastLanguageModel
86
+
87
  # Load the fine-tuned Unsloth model
88
  max_seq_length = 2048 # Adjust based on your training
89
  dtype = None # None for auto detection
 
157
  )
158
 
159
  if __name__ == "__main__":
160
+ demo.launch()
161
+
162
+ '''