Spaces:
Runtime error
Runtime error
File size: 1,694 Bytes
ea3fea4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
import sqlparse
import torch
model_name = "defog/sqlcoder-7b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Update the model loading process with potential disk offloading
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
torch_dtype=torch.float16, # Use reduced precision
device_map="auto", # Automatically distribute model layers
use_cache=True,
# Specify an offload folder if your setup requires offloading to disk
offload_folder="text_to_sql_defog_7b/offfolder", # Uncomment and set path as necessary
offload_state_dict=True, # Uncomment if offloading state dict is needed
)
def generate_response(prompt):
# Ensure the tokenizer and model are on the correct device
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
generated_ids = model.generate(
**inputs,
num_return_sequences=1,
max_new_tokens=400,
do_sample=False,
num_beams=1,
)
outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
formatted_sql = sqlparse.format(outputs[0], reindent=True)
return formatted_sql
iface = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=7, label="Input Prompt", placeholder="Enter your prompt here..."),
outputs=gr.Textbox(label="Generated SQL"),
title="SQL Query Generator",
description="Generates SQL queries based on the provided natural language prompt. Powered by the 'defog/sqlcoder-7b' model."
)
iface.launch(share=True) |