Spaces:

cboettig
/

streamlit-demo

Sleeping

App Files Files Community

cboettig commited on Nov 21, 2024

Commit

df3be67

1 Parent(s): 73afc0c

sql as streamlit

Browse files

Files changed (2) hide show

Makefile +5 -0
sql.py +36 -35

Makefile CHANGED Viewed

@@ -16,3 +16,8 @@ chat:
 rag:
 	@echo "\n 🌎  preview at: \033[1m https://${BASE}${JUPYTERHUB_SERVICE_PREFIX}proxy/8501/ \033[0m \n"
 	streamlit run rag.py --server.port 8501  1> /dev/null 2>&1

 rag:
 	@echo "\n 🌎  preview at: \033[1m https://${BASE}${JUPYTERHUB_SERVICE_PREFIX}proxy/8501/ \033[0m \n"
 	streamlit run rag.py --server.port 8501  1> /dev/null 2>&1
+.PHONY: sql
+sql:
+	@echo "\n 🌎  preview at: \033[1m https://${BASE}${JUPYTERHUB_SERVICE_PREFIX}proxy/8501/ \033[0m \n"
+	streamlit run sql.py --server.port 8501  1> /dev/null 2>&1

sql.py CHANGED Viewed

@@ -1,27 +1,27 @@
 import streamlit as st
-from langchain_community.utilities import SQLDatabase
-from langchain_openai import ChatOpenAI
-from langchain.chains import create_sql_query_chain
-# +
-# Set up Langchain SQL access
-parquet = "https://espm-157-f24.github.io/spatial-carl-amanda-tyler/new_haven_stats.parquet"
-db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
-db.run(f"create or replace view mydata as select * from read_parquet('{parquet}');")
-llm = ChatOpenAI(model="llama3",
-                 temperature=0,
-                 api_key=st.secrets["LITELLM_KEY"],
-                 base_url = "https://llm.nrp-nautilus.io")
-db = SQLDatabase.from_uri("duckdb:///tmp.db", view_support=True)
-# -
-from langchain_core.prompts import PromptTemplate
 template = '''
 You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
 Always return all columns from a query (select *) unless otherwise instructed.
@@ -30,34 +30,35 @@ Pay attention to use only the column names you can see in the tables below.
 Be careful to not query for columns that do not exist.
 Also, pay attention to which column is in which table.
 Pay attention to use today() function to get the current date, if the question involves "today".
 Respond with only the SQL query to run.  Do not repeat the question or explanation. Just the raw SQL query.
 Only use the following tables:
 {table_info}
-Question: {input}
 '''
 prompt = PromptTemplate.from_template(template, partial_variables = {"dialect": "duckdb", "top_k": 10})
 chain = create_sql_query_chain(llm, db, prompt)
-# +
-#print(db.dialect)
-#print(db.get_usable_table_names())
-#chain.get_prompts()[0].pretty_print()
-# -
-response = chain.invoke({"question": "what is the mean ndvi by grade?"})
-response
-# +
-# use the response in a query
-import ibis
-from ibis import _
-con = ibis.duckdb.connect()
-tbl = con.read_parquet(parquet, "mydata")
-tbl.sql(response).execute()

 import streamlit as st
+st.title("SQL demo")
+parquet = st.text_input("parquet file:", "https://espm-157-f24.github.io/spatial-carl-amanda-tyler/new_haven_stats.parquet")
+# create sharable low-level connection, see: https://github.com/Mause/duckdb_engine
+import sqlalchemy
+eng = sqlalchemy.create_engine("duckdb:///:memory:")
+# ibis can talk to this connection and create the VIEW
+import ibis
+from ibis import _
+con = ibis.duckdb.from_connection(eng.raw_connection())
+tbl = con.read_parquet(parquet, "mydata")
+# langchain can also talk to this connection and see the table:
+from langchain_community.utilities import SQLDatabase
+db = SQLDatabase(eng, view_support=True)
+#db.run(f"create or replace view mydata as select * from read_parquet('{parquet}');")
+#print(db.get_usable_table_names())
+# Build the template for system prompt
 template = '''
 You are a {dialect} expert. Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer to the input question.
 Always return all columns from a query (select *) unless otherwise instructed.
 Be careful to not query for columns that do not exist.
 Also, pay attention to which column is in which table.
 Pay attention to use today() function to get the current date, if the question involves "today".
 Respond with only the SQL query to run.  Do not repeat the question or explanation. Just the raw SQL query.
 Only use the following tables:
 {table_info}
+Question: {input}
 '''
+from langchain_core.prompts import PromptTemplate
 prompt = PromptTemplate.from_template(template, partial_variables = {"dialect": "duckdb", "top_k": 10})
+# Now we are ready to create our model and start querying!
+from langchain_openai import ChatOpenAI
+llm = ChatOpenAI(model="gorilla", # Try: llama3, gorilla, or groq-tools, or other models
+                 temperature=0,
+                 api_key=st.secrets["LITELLM_KEY"],
+                 base_url = "https://llm.nrp-nautilus.io")
+from langchain.chains import create_sql_query_chain
 chain = create_sql_query_chain(llm, db, prompt)
+prompt = st.chat_input("What is the mean ndvi by grade?")
+if prompt:
+    response = chain.invoke({"question": prompt})
+    with st.chat_message("ai"):
+        st.write(response)
+        df = tbl.sql(response).head(10).execute()
+        df