ramisn commited on
Commit
81149e0
·
verified ·
1 Parent(s): 823be3f
Files changed (1) hide show
  1. app.py +253 -253
app.py CHANGED
@@ -1,254 +1,254 @@
1
- import streamlit as st
2
- import os
3
- import pandas as pd
4
- import plotly.express as px
5
- import ast
6
- import google.generativeai as genai
7
- from io import StringIO
8
- from dotenv import load_dotenv
9
-
10
- # Load environment variables
11
- load_dotenv()
12
-
13
- # Configure Genai Key
14
- # genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
15
- genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
16
-
17
- # Function to load Google Gemini Model and provide queries as response
18
- def get_gemini_response(question, prompt):
19
- model = genai.GenerativeModel('gemini-pro')
20
- response = model.generate_content([prompt[0], question])
21
- return response.text.strip()
22
-
23
- # Function to load data from CSV
24
- @st.cache_data
25
- def load_data():
26
- # This is a sample CSV content. In practice, you'd read this from a file.
27
- csv_content = """
28
- id,product_name,category,price,stock_quantity,supplier,last_restock_date
29
- 1,Cotton T-Shirt,Clothing,19.99,100,FashionCo,2024-03-01
30
- 2,Denim Jeans,Clothing,49.99,75,DenimWorld,2024-02-15
31
- 3,Running Shoes,Footwear,79.99,50,SportyFeet,2024-03-10
32
- 4,Leather Wallet,Accessories,29.99,30,LeatherCrafts,2024-01-20
33
- 5,Smartphone Case,Electronics,14.99,200,TechProtect,2024-03-05
34
- 6,Coffee Maker,Appliances,89.99,25,KitchenTech,2024-02-28
35
- 7,Yoga Mat,Sports,24.99,40,YogaEssentials,2024-03-15
36
- 8,Backpack,Bags,39.99,60,TravelGear,2024-02-10
37
- 9,Sunglasses,Accessories,59.99,35,ShadesMaster,2024-03-20
38
- 10,Bluetooth Speaker,Electronics,69.99,45,SoundWave,2024-01-30
39
- """
40
- df = pd.read_csv(StringIO(csv_content))
41
- df['price'] = pd.to_numeric(df['price'], errors='coerce')
42
- df['last_restock_date'] = pd.to_datetime(df['last_restock_date'], errors='coerce')
43
- return df
44
-
45
- # # Function to execute pandas query
46
- # def execute_pandas_query(df, query):
47
- # try:
48
- # # This is a very simple and unsafe way to execute queries.
49
- # # In a real application, you'd need to parse the SQL and translate it to pandas operations.
50
- # result = eval(f"df.{query}")
51
- # return result
52
- # except Exception as e:
53
- # st.error(f"An error occurred: {e}")
54
- # return pd.DataFrame()
55
-
56
- # # Define Your Prompt
57
- # prompt = [
58
- # """
59
- # You are an expert in converting English questions to pandas DataFrame operations!
60
- # The DataFrame 'df' has the following columns:
61
- # id, product_name, category, price, stock_quantity, supplier, last_restock_date.
62
-
63
- # Examples:
64
- # - How many products do we have in total?
65
- # The pandas operation will be: len()
66
- # - What are all the products in the Electronics category?
67
- # The pandas operation will be: query("category == 'Electronics'")
68
-
69
- # The pandas operation should be a valid Python expression that can be applied to a DataFrame 'df'.
70
- # """
71
- # ]
72
-
73
- # Function to execute pandas query
74
- # def execute_pandas_query(df, query):
75
- # try:
76
- # # Remove any 'df.' prefixes from the query
77
- # query = query.replace('df.', '')
78
-
79
- # # Execute the query
80
- # if query.startswith('query'):
81
- # # For filtering operations
82
- # result = df.query(query.split('(', 1)[1].rsplit(')', 1)[0].strip('"\''))
83
- # elif query.startswith('groupby'):
84
- # # For groupby operations
85
- # group_col, agg_func = query.split('.', 2)[1:]
86
- # result = eval(f"df.groupby('{group_col}').{agg_func}")
87
- # else:
88
- # # For other operations
89
- # result = eval(f"df.{query}")
90
-
91
- # return result
92
- # except Exception as e:
93
- # st.error(f"An error occurred: {e}")
94
- # return pd.DataFrame()
95
-
96
- # # Define Your Prompt
97
- # prompt = [
98
- # """
99
- # You are an expert in converting English questions to pandas DataFrame operations!
100
- # The DataFrame 'df' has the following columns:
101
- # id, product_name, category, price, stock_quantity, supplier, last_restock_date.
102
-
103
- # Examples:
104
- # - How many products do we have in total?
105
- # The pandas operation will be: shape[0]
106
- # - What are all the products in the Electronics category?
107
- # The pandas operation will be: query("category == 'Electronics'")
108
- # - What is the average price of products in each category?
109
- # The pandas operation will be: groupby('category').mean()['price']
110
-
111
- # The pandas operation should be a valid Python expression that can be applied to a DataFrame without the 'df.' prefix.
112
- # """
113
- # ]
114
-
115
- # Function to safely evaluate a string as a Python expression
116
- def safe_eval(expr, df):
117
- try:
118
- # Parse the expression
119
- parsed = ast.parse(expr, mode='eval')
120
-
121
- # Define allowed names
122
- allowed_names = {
123
- 'df': df,
124
- 'query': df.query,
125
- 'groupby': df.groupby,
126
- 'mean': pd.DataFrame.mean,
127
- 'sum': pd.DataFrame.sum,
128
- 'count': pd.DataFrame.count,
129
- 'max': pd.DataFrame.max,
130
- 'min': pd.DataFrame.min
131
- }
132
-
133
- # Evaluate the expression
134
- return eval(compile(parsed, '<string>', 'eval'), allowed_names)
135
- except Exception as e:
136
- st.error(f"Error in query execution: {e}")
137
- return pd.DataFrame()
138
-
139
- # Function to execute pandas query
140
- def execute_pandas_query(df, query):
141
- try:
142
- # Remove any 'df.' prefixes from the query
143
- query = query.replace('df.', '')
144
-
145
- # Execute the query
146
- result = safe_eval(query, df)
147
-
148
- # Convert result to DataFrame if it's not already
149
- if not isinstance(result, pd.DataFrame):
150
- if isinstance(result, pd.Series):
151
- result = result.to_frame()
152
- else:
153
- result = pd.DataFrame({'Result': [result]})
154
-
155
- return result
156
- except Exception as e:
157
- st.error(f"An error occurred: {e}")
158
- return pd.DataFrame()
159
-
160
- # Define Your Prompt
161
- prompt = [
162
- """
163
- You are an expert in converting English questions to pandas DataFrame operations!
164
- The DataFrame 'df' has the following columns:
165
- id, product_name, category, price, stock_quantity, supplier, last_restock_date.
166
-
167
- Examples:
168
- - How many products do we have in total?
169
- The pandas operation will be: len(df)
170
- - What are all the products in the Electronics category?
171
- The pandas operation will be: df.query("category == 'Electronics'")
172
- - What is the average price of products in each category?
173
- The pandas operation will be: df.groupby('category')['price'].mean()
174
-
175
- The pandas operation should be a valid Python expression that can be applied to a DataFrame named 'df'.
176
- Always include 'df.' at the beginning of your operations unless you're using a function like len().
177
- """
178
- ]
179
-
180
- # Streamlit App
181
- st.set_page_config(page_title="AutomatiX - Department Store Analytics", layout="wide")
182
-
183
- # Load data
184
- df = load_data()
185
-
186
- # Sidebar for user input
187
- st.sidebar.title("AutomatiX - Department Store Chat Interface")
188
- question = st.sidebar.text_area("Enter your question:", key="input")
189
- submit = st.sidebar.button("Ask Me")
190
-
191
- # Main content area
192
- st.title("AutomatiX - Department Store Dashboard")
193
-
194
- if submit:
195
- with st.spinner("Generating and Fetching the data..."):
196
- pandas_query = get_gemini_response(question, prompt)
197
- # st.code(pandas_query, language="python")
198
-
199
- result_df = execute_pandas_query(df, pandas_query)
200
-
201
- if not result_df.empty:
202
- st.success("Query executed successfully!")
203
-
204
- # Display data in a table
205
- st.subheader("Data Table")
206
- st.dataframe(result_df)
207
-
208
- # # Create visualizations based on the data
209
- st.subheader("Data Visualizations")
210
-
211
- col1, col2 = st.columns(2)
212
-
213
- with col1:
214
- if 'price' in result_df.columns and result_df['price'].notna().any():
215
- fig = px.histogram(result_df, x='price', title='Price Distribution')
216
- st.plotly_chart(fig, use_container_width=True)
217
-
218
- if 'category' in result_df.columns:
219
- category_counts = result_df['category'].value_counts()
220
- fig = px.pie(values=category_counts.values, names=category_counts.index, title='Products by Category')
221
- st.plotly_chart(fig, use_container_width=True)
222
-
223
- with col2:
224
- if 'last_restock_date' in result_df.columns:
225
- result_df['restock_month'] = result_df['last_restock_date'].dt.to_period('M')
226
- restock_counts = result_df['restock_month'].value_counts().sort_index()
227
- fig = px.line(x=restock_counts.index.astype(str), y=restock_counts.values, title='Restocking Trend')
228
- st.plotly_chart(fig, use_container_width=True)
229
-
230
- if 'product_name' in result_df.columns and 'price' in result_df.columns and result_df['price'].notna().any():
231
- top_prices = result_df.sort_values('price', ascending=False).head(10)
232
- fig = px.bar(top_prices, x='product_name', y='price', title='Top 10 Most Expensive Products')
233
- st.plotly_chart(fig, use_container_width=True)
234
- else:
235
- st.warning("No data returned from the query.")
236
-
237
- else:
238
- st.info("Enter a question and click 'Ask Me' to get started!")
239
-
240
- # Footer
241
- st.sidebar.markdown("---")
242
- st.sidebar.subheader("Example Queries")
243
- st.sidebar.info("""
244
- Try these example queries to explore the dashboard:
245
-
246
- 1. What are the top 5 most expensive products in the Electronics category?
247
- 2. What is the average price and total stock for each category?
248
- 3. List the all the products?
249
- 4. What is the distribution of prices for products supplied by each supplier?
250
- 5. Which products have a stock quantity less than 30?
251
-
252
- Feel free to modify these queries or ask your own questions!
253
- """)
254
  st.sidebar.warning("© AutomatiX - Powered by Streamlit and Google Gemini")
 
1
+ import streamlit as st
2
+ import os
3
+ import pandas as pd
4
+ import plotly.express as px
5
+ import ast
6
+ import google.generativeai as genai
7
+ from io import StringIO
8
+ from dotenv import load_dotenv
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ # Configure Genai Key
14
+ # genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
15
+ genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
16
+
17
+ # Function to load Google Gemini Model and provide queries as response
18
+ def get_gemini_response(question, prompt):
19
+ model = genai.GenerativeModel('gemini-pro')
20
+ response = model.generate_content([prompt[0], question])
21
+ return response.text.strip()
22
+
23
+ # Function to load data from CSV
24
+ @st.cache_data
25
+ def load_data():
26
+ # This is a sample CSV content. In practice, you'd read this from a file.
27
+ csv_content = """
28
+ id,product_name,category,price,stock_quantity,supplier,last_restock_date
29
+ 1,Cotton T-Shirt,Clothing,19.99,100,FashionCo,2024-03-01
30
+ 2,Denim Jeans,Clothing,49.99,75,DenimWorld,2024-02-15
31
+ 3,Running Shoes,Footwear,79.99,50,SportyFeet,2024-03-10
32
+ 4,Leather Wallet,Accessories,29.99,30,LeatherCrafts,2024-01-20
33
+ 5,Smartphone Case,Electronics,14.99,200,TechProtect,2024-03-05
34
+ 6,Coffee Maker,Appliances,89.99,25,KitchenTech,2024-02-28
35
+ 7,Yoga Mat,Sports,24.99,40,YogaEssentials,2024-03-15
36
+ 8,Backpack,Bags,39.99,60,TravelGear,2024-02-10
37
+ 9,Sunglasses,Accessories,59.99,35,ShadesMaster,2024-03-20
38
+ 10,Bluetooth Speaker,Electronics,69.99,45,SoundWave,2024-01-30
39
+ """
40
+ df = pd.read_csv(StringIO(csv_content))
41
+ df['price'] = pd.to_numeric(df['price'], errors='coerce')
42
+ df['last_restock_date'] = pd.to_datetime(df['last_restock_date'], errors='coerce')
43
+ return df
44
+
45
+ # # Function to execute pandas query
46
+ # def execute_pandas_query(df, query):
47
+ # try:
48
+ # # This is a very simple and unsafe way to execute queries.
49
+ # # In a real application, you'd need to parse the SQL and translate it to pandas operations.
50
+ # result = eval(f"df.{query}")
51
+ # return result
52
+ # except Exception as e:
53
+ # st.error(f"An error occurred: {e}")
54
+ # return pd.DataFrame()
55
+
56
+ # # Define Your Prompt
57
+ # prompt = [
58
+ # """
59
+ # You are an expert in converting English questions to pandas DataFrame operations!
60
+ # The DataFrame 'df' has the following columns:
61
+ # id, product_name, category, price, stock_quantity, supplier, last_restock_date.
62
+
63
+ # Examples:
64
+ # - How many products do we have in total?
65
+ # The pandas operation will be: len()
66
+ # - What are all the products in the Electronics category?
67
+ # The pandas operation will be: query("category == 'Electronics'")
68
+
69
+ # The pandas operation should be a valid Python expression that can be applied to a DataFrame 'df'.
70
+ # """
71
+ # ]
72
+
73
+ # Function to execute pandas query
74
+ # def execute_pandas_query(df, query):
75
+ # try:
76
+ # # Remove any 'df.' prefixes from the query
77
+ # query = query.replace('df.', '')
78
+
79
+ # # Execute the query
80
+ # if query.startswith('query'):
81
+ # # For filtering operations
82
+ # result = df.query(query.split('(', 1)[1].rsplit(')', 1)[0].strip('"\''))
83
+ # elif query.startswith('groupby'):
84
+ # # For groupby operations
85
+ # group_col, agg_func = query.split('.', 2)[1:]
86
+ # result = eval(f"df.groupby('{group_col}').{agg_func}")
87
+ # else:
88
+ # # For other operations
89
+ # result = eval(f"df.{query}")
90
+
91
+ # return result
92
+ # except Exception as e:
93
+ # st.error(f"An error occurred: {e}")
94
+ # return pd.DataFrame()
95
+
96
+ # # Define Your Prompt
97
+ # prompt = [
98
+ # """
99
+ # You are an expert in converting English questions to pandas DataFrame operations!
100
+ # The DataFrame 'df' has the following columns:
101
+ # id, product_name, category, price, stock_quantity, supplier, last_restock_date.
102
+
103
+ # Examples:
104
+ # - How many products do we have in total?
105
+ # The pandas operation will be: shape[0]
106
+ # - What are all the products in the Electronics category?
107
+ # The pandas operation will be: query("category == 'Electronics'")
108
+ # - What is the average price of products in each category?
109
+ # The pandas operation will be: groupby('category').mean()['price']
110
+
111
+ # The pandas operation should be a valid Python expression that can be applied to a DataFrame without the 'df.' prefix.
112
+ # """
113
+ # ]
114
+
115
+ # Function to safely evaluate a string as a Python expression
116
+ def safe_eval(expr, df):
117
+ try:
118
+ # Parse the expression
119
+ parsed = ast.parse(expr, mode='eval')
120
+
121
+ # Define allowed names
122
+ allowed_names = {
123
+ 'df': df,
124
+ 'query': df.query,
125
+ 'groupby': df.groupby,
126
+ 'mean': pd.DataFrame.mean,
127
+ 'sum': pd.DataFrame.sum,
128
+ 'count': pd.DataFrame.count,
129
+ 'max': pd.DataFrame.max,
130
+ 'min': pd.DataFrame.min
131
+ }
132
+
133
+ # Evaluate the expression
134
+ return eval(compile(parsed, '<string>', 'eval'), allowed_names)
135
+ except Exception as e:
136
+ st.error(f"Error in query execution: {e}")
137
+ return pd.DataFrame()
138
+
139
+ # Function to execute pandas query
140
+ def execute_pandas_query(df, query):
141
+ try:
142
+ # Remove any 'df.' prefixes from the query
143
+ query = query.replace('df.', '')
144
+
145
+ # Execute the query
146
+ result = safe_eval(query, df)
147
+
148
+ # Convert result to DataFrame if it's not already
149
+ if not isinstance(result, pd.DataFrame):
150
+ if isinstance(result, pd.Series):
151
+ result = result.to_frame()
152
+ else:
153
+ result = pd.DataFrame({'Result': [result]})
154
+
155
+ return result
156
+ except Exception as e:
157
+ st.error(f"An error occurred: {e}")
158
+ return pd.DataFrame()
159
+
160
+ # Define Your Prompt
161
+ prompt = [
162
+ """
163
+ You are an expert in converting English questions to pandas DataFrame operations!
164
+ The DataFrame 'df' has the following columns:
165
+ id, product_name, category, price, stock_quantity, supplier, last_restock_date.
166
+
167
+ Examples:
168
+ - How many products do we have in total?
169
+ The pandas operation will be: len(df)
170
+ - What are all the products in the Electronics category?
171
+ The pandas operation will be: df.query("category == 'Electronics'")
172
+ - What is the average price of products in each category?
173
+ The pandas operation will be: df.groupby('category')['price'].mean()
174
+
175
+ The pandas operation should be a valid Python expression that can be applied to a DataFrame named 'df'.
176
+ Always include 'df.' at the beginning of your operations unless you're using a function like len().
177
+ """
178
+ ]
179
+
180
+ # Streamlit App
181
+ st.set_page_config(page_title="AutomatiX - Department Store Analytics", layout="wide")
182
+
183
+ # Load data
184
+ df = load_data()
185
+
186
+ # Sidebar for user input
187
+ st.sidebar.title("Swetha-Manisha-Kavya- PAVINAYA- AutomatiX - Department Store Chat Interface")
188
+ question = st.sidebar.text_area("Enter your question:", key="input")
189
+ submit = st.sidebar.button("Ask Me")
190
+
191
+ # Main content area
192
+ st.title("AutomatiX - Department Store Dashboard")
193
+
194
+ if submit:
195
+ with st.spinner("Generating and Fetching the data..."):
196
+ pandas_query = get_gemini_response(question, prompt)
197
+ # st.code(pandas_query, language="python")
198
+
199
+ result_df = execute_pandas_query(df, pandas_query)
200
+
201
+ if not result_df.empty:
202
+ st.success("Query executed successfully!")
203
+
204
+ # Display data in a table
205
+ st.subheader("Data Table")
206
+ st.dataframe(result_df)
207
+
208
+ # # Create visualizations based on the data
209
+ st.subheader("Data Visualizations")
210
+
211
+ col1, col2 = st.columns(2)
212
+
213
+ with col1:
214
+ if 'price' in result_df.columns and result_df['price'].notna().any():
215
+ fig = px.histogram(result_df, x='price', title='Price Distribution')
216
+ st.plotly_chart(fig, use_container_width=True)
217
+
218
+ if 'category' in result_df.columns:
219
+ category_counts = result_df['category'].value_counts()
220
+ fig = px.pie(values=category_counts.values, names=category_counts.index, title='Products by Category')
221
+ st.plotly_chart(fig, use_container_width=True)
222
+
223
+ with col2:
224
+ if 'last_restock_date' in result_df.columns:
225
+ result_df['restock_month'] = result_df['last_restock_date'].dt.to_period('M')
226
+ restock_counts = result_df['restock_month'].value_counts().sort_index()
227
+ fig = px.line(x=restock_counts.index.astype(str), y=restock_counts.values, title='Restocking Trend')
228
+ st.plotly_chart(fig, use_container_width=True)
229
+
230
+ if 'product_name' in result_df.columns and 'price' in result_df.columns and result_df['price'].notna().any():
231
+ top_prices = result_df.sort_values('price', ascending=False).head(10)
232
+ fig = px.bar(top_prices, x='product_name', y='price', title='Top 10 Most Expensive Products')
233
+ st.plotly_chart(fig, use_container_width=True)
234
+ else:
235
+ st.warning("No data returned from the query.")
236
+
237
+ else:
238
+ st.info("Enter a question and click 'Ask Me' to get started!")
239
+
240
+ # Footer
241
+ st.sidebar.markdown("---")
242
+ st.sidebar.subheader("Example Queries")
243
+ st.sidebar.info("""
244
+ Try these example queries to explore the dashboard:
245
+
246
+ 1. What are the top 5 most expensive products in the Electronics category?
247
+ 2. What is the average price and total stock for each category?
248
+ 3. List the all the products?
249
+ 4. What is the distribution of prices for products supplied by each supplier?
250
+ 5. Which products have a stock quantity less than 30?
251
+
252
+ Feel free to modify these queries or ask your own questions!
253
+ """)
254
  st.sidebar.warning("© AutomatiX - Powered by Streamlit and Google Gemini")