Spaces:
Runtime error
Runtime error
import gradio as gr | |
from peft import AutoPeftModelForCausalLM | |
from transformers import AutoTokenizer | |
from unsloth import FastLanguageModel | |
import torch | |
import nltk | |
nltk.download('punkt') | |
import re | |
qa_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. | |
### Instruction: | |
{} | |
### Input: | |
{} | |
### Response: | |
{}""" | |
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! | |
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+ | |
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False. | |
def preprocess_text(text): | |
# Convert to lowercase | |
text = text.lower() | |
# Remove punctuation | |
text = re.sub(r'[^\w\s]', '', text) | |
# Remove extra whitespace | |
text = ' '.join(text.split()) | |
return text | |
def generate_answer(model_name,question, load_in_4bit=True): # Added load_in_4bit as a parameter with a default value | |
model, tokenizer = FastLanguageModel.from_pretrained( | |
model_name = model_name, # YOUR MODEL YOU USED FOR TRAINING | |
max_seq_length = max_seq_length, | |
dtype = dtype, | |
load_in_4bit = load_in_4bit, | |
#load_in_8bit_fp32_cpu_offload=True, # Add this line to enable CPU offloading | |
device_map={"":0} # Add this line to specify GPU 0 for model placement | |
) | |
FastLanguageModel.for_inference(model) # Enable native 2x faster inference | |
inputs = tokenizer( | |
[ | |
qa_prompt.format( | |
"Please provide the answer for the question", # instruction | |
question, # input | |
"", # output - leave this blank for generation! | |
) | |
], return_tensors = "pt").to("cuda") | |
outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True) | |
predicted_answer = tokenizer.batch_decode(outputs) | |
response = predicted_answer[0] | |
response = response.split("### Response:")[-1].strip() | |
response = "".join(response) | |
response = response.replace("</s>", "") | |
return response | |
iface = gr.Interface( | |
fn=generate_answer, | |
inputs=[ | |
gr.Dropdown(choices=["GSridhar1982/AIML_QA_Mistral7B_FineTuned_Unsloth","GSridhar1982/AIML_QA_Mistral7B_FineTuned_Unsloth"], label="Select Model"), | |
gr.Textbox(lines=5, label="Question") | |
], | |
outputs=gr.Textbox(label="Answer") | |
) | |
iface.launch(debug=True) |