cboettig commited on
Commit
df3be67
Β·
1 Parent(s): 73afc0c

sql as streamlit

Browse files
Files changed (2) hide show
  1. Makefile +5 -0
  2. sql.py +36 -35
Makefile CHANGED
@@ -16,3 +16,8 @@ chat:
16
  rag:
17
  @echo "\n 🌎 preview at: \033[1m https://${BASE}${JUPYTERHUB_SERVICE_PREFIX}proxy/8501/ \033[0m \n"
18
  streamlit run rag.py --server.port 8501 1> /dev/null 2>&1
 
 
 
 
 
 
16
  rag:
17
  @echo "\n 🌎 preview at: \033[1m https://${BASE}${JUPYTERHUB_SERVICE_PREFIX}proxy/8501/ \033[0m \n"
18
  streamlit run rag.py --server.port 8501 1> /dev/null 2>&1
19
+
20
+ .PHONY: sql
21
+ sql:
22
+ @echo "\n 🌎 preview at: \033[1m https://${BASE}${JUPYTERHUB_SERVICE_PREFIX}proxy/8501/ \033[0m \n"
23
+ streamlit run sql.py --server.port 8501 1> /dev/null 2>&1
sql.py CHANGED
@@ -1,27 +1,27 @@
1
  import streamlit as st
2
 
3
- from langchain_community.utilities import SQLDatabase
4
- from langchain_openai import ChatOpenAI
5
- from langchain.chains import create_sql_query_chain
6
-
7
- # +
8
- # Set up Langchain SQL access
9
- parquet = "https://espm-157-f24.github.io/spatial-carl-amanda-tyler/new_haven_stats.parquet"
10
 
11
- db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
12
- db.run(f"create or replace view mydata as select * from read_parquet('{parquet}');")
13
 
14
- llm = ChatOpenAI(model="llama3",
15
- temperature=0,
16
- api_key=st.secrets["LITELLM_KEY"],
17
- base_url = "https://llm.nrp-nautilus.io")
18
 
19
- db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
 
 
 
 
20
 
21
- # -
 
 
22
 
 
 
23
 
24
- from langchain_core.prompts import PromptTemplate
25
  template = '''
26
  You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
27
  Always return all columns from a query (select *) unless otherwise instructed.
@@ -30,34 +30,35 @@ Pay attention to use only the column names you can see in the tables below.
30
  Be careful to not query for columns that do not exist.
31
  Also, pay attention to which column is in which table.
32
  Pay attention to use today() function to get the current date, if the question involves "today".
33
-
34
  Respond with only the SQL query to run. Do not repeat the question or explanation. Just the raw SQL query.
35
-
36
  Only use the following tables:
37
  {table_info}
38
-
39
- Question: {input}
40
-
41
  '''
 
 
42
  prompt = PromptTemplate.from_template(template, partial_variables = {"dialect": "duckdb", "top_k": 10})
 
 
 
 
 
 
 
 
 
 
43
  chain = create_sql_query_chain(llm, db, prompt)
44
 
45
- # +
46
- #print(db.dialect)
47
- #print(db.get_usable_table_names())
48
- #chain.get_prompts()[0].pretty_print()
49
- # -
50
 
51
- response = chain.invoke({"question": "what is the mean ndvi by grade?"})
52
- response
 
 
 
 
53
 
54
- # +
55
- # use the response in a query
56
 
57
- import ibis
58
- from ibis import _
59
- con = ibis.duckdb.connect()
60
- tbl = con.read_parquet(parquet, "mydata")
61
- tbl.sql(response).execute()
62
 
63
 
 
1
  import streamlit as st
2
 
3
+ st.title("SQL demo")
 
 
 
 
 
 
4
 
5
+ parquet = st.text_input("parquet file:", "https://espm-157-f24.github.io/spatial-carl-amanda-tyler/new_haven_stats.parquet")
 
6
 
7
+ # create sharable low-level connection, see: https://github.com/Mause/duckdb_engine
8
+ import sqlalchemy
9
+ eng = sqlalchemy.create_engine("duckdb:///:memory:")
 
10
 
11
+ # ibis can talk to this connection and create the VIEW
12
+ import ibis
13
+ from ibis import _
14
+ con = ibis.duckdb.from_connection(eng.raw_connection())
15
+ tbl = con.read_parquet(parquet, "mydata")
16
 
17
+ # langchain can also talk to this connection and see the table:
18
+ from langchain_community.utilities import SQLDatabase
19
+ db = SQLDatabase(eng, view_support=True)
20
 
21
+ #db.run(f"create or replace view mydata as select * from read_parquet('{parquet}');")
22
+ #print(db.get_usable_table_names())
23
 
24
+ # Build the template for system prompt
25
  template = '''
26
  You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
27
  Always return all columns from a query (select *) unless otherwise instructed.
 
30
  Be careful to not query for columns that do not exist.
31
  Also, pay attention to which column is in which table.
32
  Pay attention to use today() function to get the current date, if the question involves "today".
 
33
  Respond with only the SQL query to run. Do not repeat the question or explanation. Just the raw SQL query.
 
34
  Only use the following tables:
35
  {table_info}
36
+ Question: {input}
 
 
37
  '''
38
+
39
+ from langchain_core.prompts import PromptTemplate
40
  prompt = PromptTemplate.from_template(template, partial_variables = {"dialect": "duckdb", "top_k": 10})
41
+
42
+ # Now we are ready to create our model and start querying!
43
+ from langchain_openai import ChatOpenAI
44
+ llm = ChatOpenAI(model="gorilla", # Try: llama3, gorilla, or groq-tools, or other models
45
+ temperature=0,
46
+ api_key=st.secrets["LITELLM_KEY"],
47
+ base_url = "https://llm.nrp-nautilus.io")
48
+
49
+
50
+ from langchain.chains import create_sql_query_chain
51
  chain = create_sql_query_chain(llm, db, prompt)
52
 
53
+ prompt = st.chat_input("What is the mean ndvi by grade?")
 
 
 
 
54
 
55
+ if prompt:
56
+ response = chain.invoke({"question": prompt})
57
+ with st.chat_message("ai"):
58
+ st.write(response)
59
+ df = tbl.sql(response).head(10).execute()
60
+ df
61
 
 
 
62
 
 
 
 
 
 
63
 
64