sagar007 commited on
Commit
4ea5b3f
·
verified ·
1 Parent(s): a66049a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -18
app.py CHANGED
@@ -2,6 +2,7 @@ import torch
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel, PeftConfig
 
5
 
6
  # Load model and tokenizer
7
  MODEL_PATH = "sagar007/phi2_finetune"
@@ -9,29 +10,18 @@ MODEL_PATH = "sagar007/phi2_finetune"
9
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
10
  tokenizer.pad_token = tokenizer.eos_token
11
 
12
- # Load the base model
13
  base_model = AutoModelForCausalLM.from_pretrained(
14
  "microsoft/phi-2",
15
- torch_dtype=torch.float32,
16
- device_map="cpu",
17
- trust_remote_code=True,
18
- low_cpu_mem_usage=True
19
  )
20
 
21
- # Apply PEFT
22
  peft_config = PeftConfig.from_pretrained(MODEL_PATH)
23
- model = PeftModel.from_pretrained(base_model, MODEL_PATH, device_map="cpu")
24
-
25
- # Merge the PEFT model with the base model
26
- model = model.merge_and_unload()
27
-
28
- # Quantize the model
29
- model = torch.quantization.quantize_dynamic(
30
- model, {torch.nn.Linear}, dtype=torch.qint8
31
- )
32
-
33
  model.eval()
34
 
 
35
  def generate_response(instruction, max_length=512):
36
  prompt = f"Instruction: {instruction}\nResponse:"
37
  inputs = tokenizer(prompt, return_tensors="pt")
@@ -55,8 +45,8 @@ def chatbot(message, history):
55
 
56
  demo = gr.ChatInterface(
57
  chatbot,
58
- title="Fine-tuned Phi-2 Chatbot (CPU Optimized)",
59
- description="This is a chatbot using a quantized, fine-tuned version of the Phi-2 model, optimized for CPU inference.",
60
  theme="default",
61
  examples=[
62
  "Explain the concept of machine learning.",
 
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel, PeftConfig
5
+ import spaces
6
 
7
  # Load model and tokenizer
8
  MODEL_PATH = "sagar007/phi2_finetune"
 
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
11
  tokenizer.pad_token = tokenizer.eos_token
12
 
 
13
  base_model = AutoModelForCausalLM.from_pretrained(
14
  "microsoft/phi-2",
15
+ torch_dtype=torch.float32, # Use float32 for CPU
16
+ device_map="auto",
17
+ trust_remote_code=True
 
18
  )
19
 
 
20
  peft_config = PeftConfig.from_pretrained(MODEL_PATH)
21
+ model = PeftModel.from_pretrained(base_model, MODEL_PATH)
 
 
 
 
 
 
 
 
 
22
  model.eval()
23
 
24
+ @spaces.GPU(duration=60)
25
  def generate_response(instruction, max_length=512):
26
  prompt = f"Instruction: {instruction}\nResponse:"
27
  inputs = tokenizer(prompt, return_tensors="pt")
 
45
 
46
  demo = gr.ChatInterface(
47
  chatbot,
48
+ title="Fine-tuned Phi-2 Chatbot",
49
+ description="This is a chatbot using a fine-tuned version of the Phi-2 model.",
50
  theme="default",
51
  examples=[
52
  "Explain the concept of machine learning.",