import streamlit as st import os import pandas as pd import plotly.express as px import google.generativeai as genai from io import StringIO # Configure Genai Key genai.configure(api_key=os.environ.get("GOOGLE_API_KEY")) # Function to load Google Gemini Model and provide queries as response def get_gemini_response(question, prompt): model = genai.GenerativeModel('gemini-pro') response = model.generate_content([prompt[0], question]) return response.text.strip() # Function to load data from CSV @st.cache_data def load_data(): # This is a sample CSV content. In practice, you'd read this from a file. csv_content = """ id,product_name,category,price,stock_quantity,supplier,last_restock_date 1,Cotton T-Shirt,Clothing,19.99,100,FashionCo,2024-03-01 2,Denim Jeans,Clothing,49.99,75,DenimWorld,2024-02-15 3,Running Shoes,Footwear,79.99,50,SportyFeet,2024-03-10 4,Leather Wallet,Accessories,29.99,30,LeatherCrafts,2024-01-20 5,Smartphone Case,Electronics,14.99,200,TechProtect,2024-03-05 6,Coffee Maker,Appliances,89.99,25,KitchenTech,2024-02-28 7,Yoga Mat,Sports,24.99,40,YogaEssentials,2024-03-15 8,Backpack,Bags,39.99,60,TravelGear,2024-02-10 9,Sunglasses,Accessories,59.99,35,ShadesMaster,2024-03-20 10,Bluetooth Speaker,Electronics,69.99,45,SoundWave,2024-01-30 """ df = pd.read_csv(StringIO(csv_content)) df['price'] = pd.to_numeric(df['price'], errors='coerce') df['last_restock_date'] = pd.to_datetime(df['last_restock_date'], errors='coerce') return df # Function to execute pandas query def execute_pandas_query(df, query): try: # This is a very simple and unsafe way to execute queries. # In a real application, you'd need to parse the SQL and translate it to pandas operations. result = eval(f"df.{query}") return result except Exception as e: st.error(f"An error occurred: {e}") return pd.DataFrame() # Define Your Prompt prompt = [ """ You are an expert in converting English questions to pandas DataFrame operations! The DataFrame 'df' has the following columns: id, product_name, category, price, stock_quantity, supplier, last_restock_date. Examples: - How many products do we have in total? The pandas operation will be: len() - What are all the products in the Electronics category? The pandas operation will be: query("category == 'Electronics'") The pandas operation should be a valid Python expression that can be applied to a DataFrame 'df'. """ ] # Streamlit App st.set_page_config(page_title="Department Store Analytics", layout="wide") # Load data df = load_data() # Sidebar for user input st.sidebar.title("Department Store Query Interface") question = st.sidebar.text_area("Enter your question:", key="input") submit = st.sidebar.button("Ask Me") # Main content area st.title("Department Store Dashboard") if submit: with st.spinner("Generating query and fetching data..."): pandas_query = get_gemini_response(question, prompt) st.code(pandas_query, language="python") result_df = execute_pandas_query(df, pandas_query) if not result_df.empty: st.success("Query executed successfully!") # Display data in a table st.subheader("Data Table") st.dataframe(result_df) # Create visualizations based on the data st.subheader("Data Visualizations") col1, col2 = st.columns(2) with col1: if 'price' in result_df.columns and result_df['price'].notna().any(): fig = px.histogram(result_df, x='price', title='Price Distribution') st.plotly_chart(fig, use_container_width=True) if 'category' in result_df.columns: category_counts = result_df['category'].value_counts() fig = px.pie(values=category_counts.values, names=category_counts.index, title='Products by Category') st.plotly_chart(fig, use_container_width=True) with col2: if 'last_restock_date' in result_df.columns: result_df['restock_month'] = result_df['last_restock_date'].dt.to_period('M') restock_counts = result_df['restock_month'].value_counts().sort_index() fig = px.line(x=restock_counts.index.astype(str), y=restock_counts.values, title='Restocking Trend') st.plotly_chart(fig, use_container_width=True) if 'product_name' in result_df.columns and 'price' in result_df.columns and result_df['price'].notna().any(): top_prices = result_df.sort_values('price', ascending=False).head(10) fig = px.bar(top_prices, x='product_name', y='price', title='Top 10 Most Expensive Products') st.plotly_chart(fig, use_container_width=True) else: st.warning("No data returned from the query.") else: st.info("Enter a question and click 'Ask Me' to get started!") # Footer st.sidebar.markdown("---") st.sidebar.warning("AutomatiX - Department Store Analytics - Powered by Streamlit and Google Gemini")