Spaces:
Sleeping
Sleeping
sql as streamlit
Browse files
Makefile
CHANGED
@@ -16,3 +16,8 @@ chat:
|
|
16 |
rag:
|
17 |
@echo "\n π preview at: \033[1m https://${BASE}${JUPYTERHUB_SERVICE_PREFIX}proxy/8501/ \033[0m \n"
|
18 |
streamlit run rag.py --server.port 8501 1> /dev/null 2>&1
|
|
|
|
|
|
|
|
|
|
|
|
16 |
rag:
|
17 |
@echo "\n π preview at: \033[1m https://${BASE}${JUPYTERHUB_SERVICE_PREFIX}proxy/8501/ \033[0m \n"
|
18 |
streamlit run rag.py --server.port 8501 1> /dev/null 2>&1
|
19 |
+
|
20 |
+
.PHONY: sql
|
21 |
+
sql:
|
22 |
+
@echo "\n π preview at: \033[1m https://${BASE}${JUPYTERHUB_SERVICE_PREFIX}proxy/8501/ \033[0m \n"
|
23 |
+
streamlit run sql.py --server.port 8501 1> /dev/null 2>&1
|
sql.py
CHANGED
@@ -1,27 +1,27 @@
|
|
1 |
import streamlit as st
|
2 |
|
3 |
-
|
4 |
-
from langchain_openai import ChatOpenAI
|
5 |
-
from langchain.chains import create_sql_query_chain
|
6 |
-
|
7 |
-
# +
|
8 |
-
# Set up Langchain SQL access
|
9 |
-
parquet = "https://espm-157-f24.github.io/spatial-carl-amanda-tyler/new_haven_stats.parquet"
|
10 |
|
11 |
-
|
12 |
-
db.run(f"create or replace view mydata as select * from read_parquet('{parquet}');")
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
base_url = "https://llm.nrp-nautilus.io")
|
18 |
|
19 |
-
|
|
|
|
|
|
|
|
|
20 |
|
21 |
-
#
|
|
|
|
|
22 |
|
|
|
|
|
23 |
|
24 |
-
|
25 |
template = '''
|
26 |
You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
|
27 |
Always return all columns from a query (select *) unless otherwise instructed.
|
@@ -30,34 +30,35 @@ Pay attention to use only the column names you can see in the tables below.
|
|
30 |
Be careful to not query for columns that do not exist.
|
31 |
Also, pay attention to which column is in which table.
|
32 |
Pay attention to use today() function to get the current date, if the question involves "today".
|
33 |
-
|
34 |
Respond with only the SQL query to run. Do not repeat the question or explanation. Just the raw SQL query.
|
35 |
-
|
36 |
Only use the following tables:
|
37 |
{table_info}
|
38 |
-
|
39 |
-
Question: {input}
|
40 |
-
|
41 |
'''
|
|
|
|
|
42 |
prompt = PromptTemplate.from_template(template, partial_variables = {"dialect": "duckdb", "top_k": 10})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
chain = create_sql_query_chain(llm, db, prompt)
|
44 |
|
45 |
-
|
46 |
-
#print(db.dialect)
|
47 |
-
#print(db.get_usable_table_names())
|
48 |
-
#chain.get_prompts()[0].pretty_print()
|
49 |
-
# -
|
50 |
|
51 |
-
|
52 |
-
response
|
|
|
|
|
|
|
|
|
53 |
|
54 |
-
# +
|
55 |
-
# use the response in a query
|
56 |
|
57 |
-
import ibis
|
58 |
-
from ibis import _
|
59 |
-
con = ibis.duckdb.connect()
|
60 |
-
tbl = con.read_parquet(parquet, "mydata")
|
61 |
-
tbl.sql(response).execute()
|
62 |
|
63 |
|
|
|
1 |
import streamlit as st
|
2 |
|
3 |
+
st.title("SQL demo")
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
+
parquet = st.text_input("parquet file:", "https://espm-157-f24.github.io/spatial-carl-amanda-tyler/new_haven_stats.parquet")
|
|
|
6 |
|
7 |
+
# create sharable low-level connection, see: https://github.com/Mause/duckdb_engine
|
8 |
+
import sqlalchemy
|
9 |
+
eng = sqlalchemy.create_engine("duckdb:///:memory:")
|
|
|
10 |
|
11 |
+
# ibis can talk to this connection and create the VIEW
|
12 |
+
import ibis
|
13 |
+
from ibis import _
|
14 |
+
con = ibis.duckdb.from_connection(eng.raw_connection())
|
15 |
+
tbl = con.read_parquet(parquet, "mydata")
|
16 |
|
17 |
+
# langchain can also talk to this connection and see the table:
|
18 |
+
from langchain_community.utilities import SQLDatabase
|
19 |
+
db = SQLDatabase(eng, view_support=True)
|
20 |
|
21 |
+
#db.run(f"create or replace view mydata as select * from read_parquet('{parquet}');")
|
22 |
+
#print(db.get_usable_table_names())
|
23 |
|
24 |
+
# Build the template for system prompt
|
25 |
template = '''
|
26 |
You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
|
27 |
Always return all columns from a query (select *) unless otherwise instructed.
|
|
|
30 |
Be careful to not query for columns that do not exist.
|
31 |
Also, pay attention to which column is in which table.
|
32 |
Pay attention to use today() function to get the current date, if the question involves "today".
|
|
|
33 |
Respond with only the SQL query to run. Do not repeat the question or explanation. Just the raw SQL query.
|
|
|
34 |
Only use the following tables:
|
35 |
{table_info}
|
36 |
+
Question: {input}
|
|
|
|
|
37 |
'''
|
38 |
+
|
39 |
+
from langchain_core.prompts import PromptTemplate
|
40 |
prompt = PromptTemplate.from_template(template, partial_variables = {"dialect": "duckdb", "top_k": 10})
|
41 |
+
|
42 |
+
# Now we are ready to create our model and start querying!
|
43 |
+
from langchain_openai import ChatOpenAI
|
44 |
+
llm = ChatOpenAI(model="gorilla", # Try: llama3, gorilla, or groq-tools, or other models
|
45 |
+
temperature=0,
|
46 |
+
api_key=st.secrets["LITELLM_KEY"],
|
47 |
+
base_url = "https://llm.nrp-nautilus.io")
|
48 |
+
|
49 |
+
|
50 |
+
from langchain.chains import create_sql_query_chain
|
51 |
chain = create_sql_query_chain(llm, db, prompt)
|
52 |
|
53 |
+
prompt = st.chat_input("What is the mean ndvi by grade?")
|
|
|
|
|
|
|
|
|
54 |
|
55 |
+
if prompt:
|
56 |
+
response = chain.invoke({"question": prompt})
|
57 |
+
with st.chat_message("ai"):
|
58 |
+
st.write(response)
|
59 |
+
df = tbl.sql(response).head(10).execute()
|
60 |
+
df
|
61 |
|
|
|
|
|
62 |
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
|