Spaces:

ramisn
/

AutomatiX-Depot

Running

App Files Files Community

ramisn commited on Feb 4

Commit

81149e0

verified ·

1 Parent(s): 823be3f

Updated

Browse files

Files changed (1) hide show

app.py +253 -253

app.py CHANGED Viewed

@@ -1,254 +1,254 @@
-import streamlit as st
-import os
-import pandas as pd
-import plotly.express as px
-import ast
-import google.generativeai as genai
-from io import StringIO
-from dotenv import load_dotenv
-# Load environment variables
-load_dotenv()
-# Configure Genai Key
-# genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
-genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
-# Function to load Google Gemini Model and provide queries as response
-def get_gemini_response(question, prompt):
-    model = genai.GenerativeModel('gemini-pro')
-    response = model.generate_content([prompt[0], question])
-    return response.text.strip()
-# Function to load data from CSV
-@st.cache_data
-def load_data():
-    # This is a sample CSV content. In practice, you'd read this from a file.
-    csv_content = """
-id,product_name,category,price,stock_quantity,supplier,last_restock_date
-1,Cotton T-Shirt,Clothing,19.99,100,FashionCo,2024-03-01
-2,Denim Jeans,Clothing,49.99,75,DenimWorld,2024-02-15
-3,Running Shoes,Footwear,79.99,50,SportyFeet,2024-03-10
-4,Leather Wallet,Accessories,29.99,30,LeatherCrafts,2024-01-20
-5,Smartphone Case,Electronics,14.99,200,TechProtect,2024-03-05
-6,Coffee Maker,Appliances,89.99,25,KitchenTech,2024-02-28
-7,Yoga Mat,Sports,24.99,40,YogaEssentials,2024-03-15
-8,Backpack,Bags,39.99,60,TravelGear,2024-02-10
-9,Sunglasses,Accessories,59.99,35,ShadesMaster,2024-03-20
-10,Bluetooth Speaker,Electronics,69.99,45,SoundWave,2024-01-30
-"""
-    df = pd.read_csv(StringIO(csv_content))
-    df['price'] = pd.to_numeric(df['price'], errors='coerce')
-    df['last_restock_date'] = pd.to_datetime(df['last_restock_date'], errors='coerce')
-    return df
-# # Function to execute pandas query
-# def execute_pandas_query(df, query):
-#     try:
-#         # This is a very simple and unsafe way to execute queries.
-#         # In a real application, you'd need to parse the SQL and translate it to pandas operations.
-#         result = eval(f"df.{query}")
-#         return result
-#     except Exception as e:
-#         st.error(f"An error occurred: {e}")
-#         return pd.DataFrame()
-# # Define Your Prompt
-# prompt = [
-#     """
-#     You are an expert in converting English questions to pandas DataFrame operations!
-#     The DataFrame 'df' has the following columns:
-#     id, product_name, category, price, stock_quantity, supplier, last_restock_date.
-#     Examples:
-#     - How many products do we have in total?
-#       The pandas operation will be: len()
-#     - What are all the products in the Electronics category?
-#       The pandas operation will be: query("category == 'Electronics'")
-#     The pandas operation should be a valid Python expression that can be applied to a DataFrame 'df'.
-#     """
-# ]
-# Function to execute pandas query
-# def execute_pandas_query(df, query):
-#     try:
-#         # Remove any 'df.' prefixes from the query
-#         query = query.replace('df.', '')
-#         # Execute the query
-#         if query.startswith('query'):
-#             # For filtering operations
-#             result = df.query(query.split('(', 1)[1].rsplit(')', 1)[0].strip('"\''))
-#         elif query.startswith('groupby'):
-#             # For groupby operations
-#             group_col, agg_func = query.split('.', 2)[1:]
-#             result = eval(f"df.groupby('{group_col}').{agg_func}")
-#         else:
-#             # For other operations
-#             result = eval(f"df.{query}")
-#         return result
-#     except Exception as e:
-#         st.error(f"An error occurred: {e}")
-#         return pd.DataFrame()
-# # Define Your Prompt
-# prompt = [
-#     """
-#     You are an expert in converting English questions to pandas DataFrame operations!
-#     The DataFrame 'df' has the following columns:
-#     id, product_name, category, price, stock_quantity, supplier, last_restock_date.
-#     Examples:
-#     - How many products do we have in total?
-#       The pandas operation will be: shape[0]
-#     - What are all the products in the Electronics category?
-#       The pandas operation will be: query("category == 'Electronics'")
-#     - What is the average price of products in each category?
-#       The pandas operation will be: groupby('category').mean()['price']
-#     The pandas operation should be a valid Python expression that can be applied to a DataFrame without the 'df.' prefix.
-#     """
-# ]
-# Function to safely evaluate a string as a Python expression
-def safe_eval(expr, df):
-    try:
-        # Parse the expression
-        parsed = ast.parse(expr, mode='eval')
-        # Define allowed names
-        allowed_names = {
-            'df': df,
-            'query': df.query,
-            'groupby': df.groupby,
-            'mean': pd.DataFrame.mean,
-            'sum': pd.DataFrame.sum,
-            'count': pd.DataFrame.count,
-            'max': pd.DataFrame.max,
-            'min': pd.DataFrame.min
-        }
-        # Evaluate the expression
-        return eval(compile(parsed, '<string>', 'eval'), allowed_names)
-    except Exception as e:
-        st.error(f"Error in query execution: {e}")
-        return pd.DataFrame()
-# Function to execute pandas query
-def execute_pandas_query(df, query):
-    try:
-        # Remove any 'df.' prefixes from the query
-        query = query.replace('df.', '')
-        # Execute the query
-        result = safe_eval(query, df)
-        # Convert result to DataFrame if it's not already
-        if not isinstance(result, pd.DataFrame):
-            if isinstance(result, pd.Series):
-                result = result.to_frame()
-            else:
-                result = pd.DataFrame({'Result': [result]})
-        return result
-    except Exception as e:
-        st.error(f"An error occurred: {e}")
-        return pd.DataFrame()
-# Define Your Prompt
-prompt = [
-    """
-    You are an expert in converting English questions to pandas DataFrame operations!
-    The DataFrame 'df' has the following columns:
-    id, product_name, category, price, stock_quantity, supplier, last_restock_date.
-    Examples:
-    - How many products do we have in total?
-      The pandas operation will be: len(df)
-    - What are all the products in the Electronics category?
-      The pandas operation will be: df.query("category == 'Electronics'")
-    - What is the average price of products in each category?
-      The pandas operation will be: df.groupby('category')['price'].mean()
-    The pandas operation should be a valid Python expression that can be applied to a DataFrame named 'df'.
-    Always include 'df.' at the beginning of your operations unless you're using a function like len().
-    """
-]
-# Streamlit App
-st.set_page_config(page_title="AutomatiX - Department Store Analytics", layout="wide")
-# Load data
-df = load_data()
-# Sidebar for user input
-st.sidebar.title("AutomatiX - Department Store Chat Interface")
-question = st.sidebar.text_area("Enter your question:", key="input")
-submit = st.sidebar.button("Ask Me")
-# Main content area
-st.title("AutomatiX - Department Store Dashboard")
-if submit:
-    with st.spinner("Generating and Fetching the data..."):
-        pandas_query = get_gemini_response(question, prompt)
-        # st.code(pandas_query, language="python")
-        result_df = execute_pandas_query(df, pandas_query)
-        if not result_df.empty:
-            st.success("Query executed successfully!")
-            # Display data in a table
-            st.subheader("Data Table")
-            st.dataframe(result_df)
-            # # Create visualizations based on the data
-            st.subheader("Data Visualizations")
-            col1, col2 = st.columns(2)
-            with col1:
-                if 'price' in result_df.columns and result_df['price'].notna().any():
-                    fig = px.histogram(result_df, x='price', title='Price Distribution')
-                    st.plotly_chart(fig, use_container_width=True)
-                if 'category' in result_df.columns:
-                    category_counts = result_df['category'].value_counts()
-                    fig = px.pie(values=category_counts.values, names=category_counts.index, title='Products by Category')
-                    st.plotly_chart(fig, use_container_width=True)
-            with col2:
-                if 'last_restock_date' in result_df.columns:
-                    result_df['restock_month'] = result_df['last_restock_date'].dt.to_period('M')
-                    restock_counts = result_df['restock_month'].value_counts().sort_index()
-                    fig = px.line(x=restock_counts.index.astype(str), y=restock_counts.values, title='Restocking Trend')
-                    st.plotly_chart(fig, use_container_width=True)
-                if 'product_name' in result_df.columns and 'price' in result_df.columns and result_df['price'].notna().any():
-                    top_prices = result_df.sort_values('price', ascending=False).head(10)
-                    fig = px.bar(top_prices, x='product_name', y='price', title='Top 10 Most Expensive Products')
-                    st.plotly_chart(fig, use_container_width=True)
-        else:
-            st.warning("No data returned from the query.")
-else:
-    st.info("Enter a question and click 'Ask Me' to get started!")
-# Footer
-st.sidebar.markdown("---")
-st.sidebar.subheader("Example Queries")
-st.sidebar.info("""
-Try these example queries to explore the dashboard:
-1. What are the top 5 most expensive products in the Electronics category?
-2. What is the average price and total stock for each category?
-3. List the all the products?
-4. What is the distribution of prices for products supplied by each supplier?
-5. Which products have a stock quantity less than 30?
-Feel free to modify these queries or ask your own questions!
-""")
 st.sidebar.warning("© AutomatiX - Powered by Streamlit and Google Gemini")

+import streamlit as st
+import os
+import pandas as pd
+import plotly.express as px
+import ast
+import google.generativeai as genai
+from io import StringIO
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Configure Genai Key
+# genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
+genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+# Function to load Google Gemini Model and provide queries as response
+def get_gemini_response(question, prompt):
+    model = genai.GenerativeModel('gemini-pro')
+    response = model.generate_content([prompt[0], question])
+    return response.text.strip()
+# Function to load data from CSV
+@st.cache_data
+def load_data():
+    # This is a sample CSV content. In practice, you'd read this from a file.
+    csv_content = """
+id,product_name,category,price,stock_quantity,supplier,last_restock_date
+1,Cotton T-Shirt,Clothing,19.99,100,FashionCo,2024-03-01
+2,Denim Jeans,Clothing,49.99,75,DenimWorld,2024-02-15
+3,Running Shoes,Footwear,79.99,50,SportyFeet,2024-03-10
+4,Leather Wallet,Accessories,29.99,30,LeatherCrafts,2024-01-20
+5,Smartphone Case,Electronics,14.99,200,TechProtect,2024-03-05
+6,Coffee Maker,Appliances,89.99,25,KitchenTech,2024-02-28
+7,Yoga Mat,Sports,24.99,40,YogaEssentials,2024-03-15
+8,Backpack,Bags,39.99,60,TravelGear,2024-02-10
+9,Sunglasses,Accessories,59.99,35,ShadesMaster,2024-03-20
+10,Bluetooth Speaker,Electronics,69.99,45,SoundWave,2024-01-30
+"""
+    df = pd.read_csv(StringIO(csv_content))
+    df['price'] = pd.to_numeric(df['price'], errors='coerce')
+    df['last_restock_date'] = pd.to_datetime(df['last_restock_date'], errors='coerce')
+    return df
+# # Function to execute pandas query
+# def execute_pandas_query(df, query):
+#     try:
+#         # This is a very simple and unsafe way to execute queries.
+#         # In a real application, you'd need to parse the SQL and translate it to pandas operations.
+#         result = eval(f"df.{query}")
+#         return result
+#     except Exception as e:
+#         st.error(f"An error occurred: {e}")
+#         return pd.DataFrame()
+# # Define Your Prompt
+# prompt = [
+#     """
+#     You are an expert in converting English questions to pandas DataFrame operations!
+#     The DataFrame 'df' has the following columns:
+#     id, product_name, category, price, stock_quantity, supplier, last_restock_date.
+#     Examples:
+#     - How many products do we have in total?
+#       The pandas operation will be: len()
+#     - What are all the products in the Electronics category?
+#       The pandas operation will be: query("category == 'Electronics'")
+#     The pandas operation should be a valid Python expression that can be applied to a DataFrame 'df'.
+#     """
+# ]
+# Function to execute pandas query
+# def execute_pandas_query(df, query):
+#     try:
+#         # Remove any 'df.' prefixes from the query
+#         query = query.replace('df.', '')
+#         # Execute the query
+#         if query.startswith('query'):
+#             # For filtering operations
+#             result = df.query(query.split('(', 1)[1].rsplit(')', 1)[0].strip('"\''))
+#         elif query.startswith('groupby'):
+#             # For groupby operations
+#             group_col, agg_func = query.split('.', 2)[1:]
+#             result = eval(f"df.groupby('{group_col}').{agg_func}")
+#         else:
+#             # For other operations
+#             result = eval(f"df.{query}")
+#         return result
+#     except Exception as e:
+#         st.error(f"An error occurred: {e}")
+#         return pd.DataFrame()
+# # Define Your Prompt
+# prompt = [
+#     """
+#     You are an expert in converting English questions to pandas DataFrame operations!
+#     The DataFrame 'df' has the following columns:
+#     id, product_name, category, price, stock_quantity, supplier, last_restock_date.
+#     Examples:
+#     - How many products do we have in total?
+#       The pandas operation will be: shape[0]
+#     - What are all the products in the Electronics category?
+#       The pandas operation will be: query("category == 'Electronics'")
+#     - What is the average price of products in each category?
+#       The pandas operation will be: groupby('category').mean()['price']
+#     The pandas operation should be a valid Python expression that can be applied to a DataFrame without the 'df.' prefix.
+#     """
+# ]
+# Function to safely evaluate a string as a Python expression
+def safe_eval(expr, df):
+    try:
+        # Parse the expression
+        parsed = ast.parse(expr, mode='eval')
+        # Define allowed names
+        allowed_names = {
+            'df': df,
+            'query': df.query,
+            'groupby': df.groupby,
+            'mean': pd.DataFrame.mean,
+            'sum': pd.DataFrame.sum,
+            'count': pd.DataFrame.count,
+            'max': pd.DataFrame.max,
+            'min': pd.DataFrame.min
+        }
+        # Evaluate the expression
+        return eval(compile(parsed, '<string>', 'eval'), allowed_names)
+    except Exception as e:
+        st.error(f"Error in query execution: {e}")
+        return pd.DataFrame()
+# Function to execute pandas query
+def execute_pandas_query(df, query):
+    try:
+        # Remove any 'df.' prefixes from the query
+        query = query.replace('df.', '')
+        # Execute the query
+        result = safe_eval(query, df)
+        # Convert result to DataFrame if it's not already
+        if not isinstance(result, pd.DataFrame):
+            if isinstance(result, pd.Series):
+                result = result.to_frame()
+            else:
+                result = pd.DataFrame({'Result': [result]})
+        return result
+    except Exception as e:
+        st.error(f"An error occurred: {e}")
+        return pd.DataFrame()
+# Define Your Prompt
+prompt = [
+    """
+    You are an expert in converting English questions to pandas DataFrame operations!
+    The DataFrame 'df' has the following columns:
+    id, product_name, category, price, stock_quantity, supplier, last_restock_date.
+    Examples:
+    - How many products do we have in total?
+      The pandas operation will be: len(df)
+    - What are all the products in the Electronics category?
+      The pandas operation will be: df.query("category == 'Electronics'")
+    - What is the average price of products in each category?
+      The pandas operation will be: df.groupby('category')['price'].mean()
+    The pandas operation should be a valid Python expression that can be applied to a DataFrame named 'df'.
+    Always include 'df.' at the beginning of your operations unless you're using a function like len().
+    """
+]
+# Streamlit App
+st.set_page_config(page_title="AutomatiX - Department Store Analytics", layout="wide")
+# Load data
+df = load_data()
+# Sidebar for user input
+st.sidebar.title("Swetha-Manisha-Kavya- PAVINAYA- AutomatiX - Department Store Chat Interface")
+question = st.sidebar.text_area("Enter your question:", key="input")
+submit = st.sidebar.button("Ask Me")
+# Main content area
+st.title("AutomatiX - Department Store Dashboard")
+if submit:
+    with st.spinner("Generating and Fetching the data..."):
+        pandas_query = get_gemini_response(question, prompt)
+        # st.code(pandas_query, language="python")
+        result_df = execute_pandas_query(df, pandas_query)
+        if not result_df.empty:
+            st.success("Query executed successfully!")
+            # Display data in a table
+            st.subheader("Data Table")
+            st.dataframe(result_df)
+            # # Create visualizations based on the data
+            st.subheader("Data Visualizations")
+            col1, col2 = st.columns(2)
+            with col1:
+                if 'price' in result_df.columns and result_df['price'].notna().any():
+                    fig = px.histogram(result_df, x='price', title='Price Distribution')
+                    st.plotly_chart(fig, use_container_width=True)
+                if 'category' in result_df.columns:
+                    category_counts = result_df['category'].value_counts()
+                    fig = px.pie(values=category_counts.values, names=category_counts.index, title='Products by Category')
+                    st.plotly_chart(fig, use_container_width=True)
+            with col2:
+                if 'last_restock_date' in result_df.columns:
+                    result_df['restock_month'] = result_df['last_restock_date'].dt.to_period('M')
+                    restock_counts = result_df['restock_month'].value_counts().sort_index()
+                    fig = px.line(x=restock_counts.index.astype(str), y=restock_counts.values, title='Restocking Trend')
+                    st.plotly_chart(fig, use_container_width=True)
+                if 'product_name' in result_df.columns and 'price' in result_df.columns and result_df['price'].notna().any():
+                    top_prices = result_df.sort_values('price', ascending=False).head(10)
+                    fig = px.bar(top_prices, x='product_name', y='price', title='Top 10 Most Expensive Products')
+                    st.plotly_chart(fig, use_container_width=True)
+        else:
+            st.warning("No data returned from the query.")
+else:
+    st.info("Enter a question and click 'Ask Me' to get started!")
+# Footer
+st.sidebar.markdown("---")
+st.sidebar.subheader("Example Queries")
+st.sidebar.info("""
+Try these example queries to explore the dashboard:
+1. What are the top 5 most expensive products in the Electronics category?
+2. What is the average price and total stock for each category?
+3. List the all the products?
+4. What is the distribution of prices for products supplied by each supplier?
+5. Which products have a stock quantity less than 30?
+Feel free to modify these queries or ask your own questions!
+""")
 st.sidebar.warning("© AutomatiX - Powered by Streamlit and Google Gemini")