pepe_2 / sql_generator.py
nileshhanotia's picture
Create sql_generator.py
9a4c76d verified
import spacy
# Load the spaCy English model
nlp = spacy.load("en_core_web_sm")
# Define the mapping of keywords to SQL column names based on the new schema
column_mapping = {
"handle": "Handle",
"title": "Title",
"body": "Body (HTML)",
"vendor": "Vendor",
"type": "Type",
"tags": "Tags",
"published": "Published",
"option1_name": "Option1 Name",
"option1_value": "Option1 Value",
"option2_name": "Option2 Name",
"option2_value": "Option2 Value",
"option3_name": "Option3 Name",
"option3_value": "Option3 Value",
"variant_sku": "Variant SKU",
"variant_price": "Variant Price",
"variant_inventory_qty": "Variant Inventory Qty",
# Add more mappings as needed
}
# Function to generate SQL query based on natural language input
def generate_sql_query(natural_language_input):
doc = nlp(natural_language_input)
# Initialize SQL query parts
sql_query = "SELECT * FROM products WHERE "
conditions = []
# Extract keywords and values for building the query
for token in doc:
if token.lemma_ in column_mapping:
column_name = column_mapping[token.lemma_]
# Look for the word 'is' and the value after it
if token.nbor(1).text.lower() == "is":
value_token = token.nbor(2) # Get the token after 'is'
# Check if the value token is valid (e.g., if it's a string)
if value_token and value_token.text not in ["and", "or"]:
# Format the value correctly for SQL
conditions.append(f"{column_name} = '{value_token.text}'")
# Join conditions with AND
if conditions:
sql_query += " AND ".join(conditions)
else:
sql_query = sql_query[:-7] # Remove the last " WHERE " if no conditions were added
return sql_query
# Example natural language input
natural_language_input = "Find products where title is laptop and vendor is apple and published is active"
# Generate SQL query
sql_query = generate_sql_query(natural_language_input)
print(sql_query)