Spaces:

girishwangikar
/

SmolAgents_DA

Running

App Files Files Community

girishwangikar commited on Jan 7

Commit

0e7b5e0

verified ·

1 Parent(s): 1b4aded

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -83

app.py CHANGED Viewed

@@ -12,7 +12,6 @@ import tempfile
 import base64
 import io
-# Custom Groq Model Class remains unchanged
 class GroqModel:
     def __init__(self, model_name="llama2-70b-4096"):
         self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
@@ -29,15 +28,10 @@ class GroqModel:
         )
         return response.choices[0].message.content
 @tool
 def analyze_basic_stats(data: pd.DataFrame) -> str:
-    """Calculate basic statistics for numerical columns.
-    Args:
-        data: Input DataFrame
-    Returns:
-        String containing formatted basic statistics
-    """
     stats = {}
     numeric_cols = data.select_dtypes(include=[np.number]).columns
@@ -54,13 +48,7 @@ def analyze_basic_stats(data: pd.DataFrame) -> str:
 @tool
 def generate_correlation_matrix(data: pd.DataFrame) -> str:
-    """Generate correlation matrix visualization for numerical columns.
-    Args:
-        data: Input DataFrame
-    Returns:
-        Base64 encoded string of correlation matrix plot
-    """
     numeric_data = data.select_dtypes(include=[np.number])
     plt.figure(figsize=(10, 8))
@@ -74,13 +62,7 @@ def generate_correlation_matrix(data: pd.DataFrame) -> str:
 @tool
 def analyze_categorical_columns(data: pd.DataFrame) -> str:
-    """Analyze categorical columns in the dataset.
-    Args:
-        data: Input DataFrame
-    Returns:
-        String containing formatted categorical analysis
-    """
     categorical_cols = data.select_dtypes(include=['object', 'category']).columns
     analysis = {}
@@ -95,13 +77,7 @@ def analyze_categorical_columns(data: pd.DataFrame) -> str:
 @tool
 def suggest_features(data: pd.DataFrame) -> str:
-    """Suggest potential feature engineering steps.
-    Args:
-        data: Input DataFrame
-    Returns:
-        String containing feature engineering suggestions
-    """
     suggestions = []
     numeric_cols = data.select_dtypes(include=[np.number]).columns
     categorical_cols = data.select_dtypes(include=['object', 'category']).columns
@@ -118,70 +94,110 @@ def suggest_features(data: pd.DataFrame) -> str:
     return '\n'.join(suggestions)
-# Streamlit App
 def main():
     st.title("Data Analysis Assistant")
     st.write("Upload your dataset and get automated analysis with natural language interaction.")
     uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
-    if uploaded_file is not None:
-        data = pd.read_csv(uploaded_file)
-        st.session_state['data'] = data
-        # Initialize agent
-        agent = CodeAgent(
-            tools=[analyze_basic_stats, generate_correlation_matrix,
-                   analyze_categorical_columns, suggest_features],
-            model=GroqModel(),
-            additional_authorized_imports=["pandas", "numpy", "matplotlib", "seaborn"]
-        )
-        # Analysis options
-        analysis_type = st.selectbox(
-            "Choose analysis type",
-            ["Basic Statistics", "Correlation Analysis", "Categorical Analysis",
-             "Feature Engineering", "Custom Question"]
-        )
-        if analysis_type == "Basic Statistics":
-            result = agent.run(
-                f"Analyze and explain the basic statistics of this dataset. "
-                f"Dataset info: {data.info()}\n"
-                f"Use the analyze_basic_stats tool and provide natural language explanations."
             )
-            st.write(result)
-        elif analysis_type == "Correlation Analysis":
-            correlation_plot = agent.run(
-                "Generate and explain correlations between numerical variables. "
-                "Use the generate_correlation_matrix tool."
-            )
-            if correlation_plot:
-                st.image(f"data:image/png;base64,{correlation_plot}")
-        elif analysis_type == "Categorical Analysis":
-            result = agent.run(
-                "Analyze categorical variables in the dataset. "
-                "Use the analyze_categorical_columns tool and explain the findings."
-            )
-            st.write(result)
-        elif analysis_type == "Feature Engineering":
-            result = agent.run(
-                "Suggest potential feature engineering steps for this dataset. "
-                "Use the suggest_features tool and explain your suggestions."
-            )
-            st.write(result)
-        elif analysis_type == "Custom Question":
-            question = st.text_input("What would you like to know about your data?")
-            if question:
-                result = agent.run(
-                    f"Answer this question about the dataset: {question}\n"
-                    f"Use appropriate tools to analyze and explain."
-                )
-                st.write(result)
 if __name__ == "__main__":
     main()

 import base64
 import io
 class GroqModel:
     def __init__(self, model_name="llama2-70b-4096"):
         self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
         )
         return response.choices[0].message.content
+# Tool functions remain unchanged
 @tool
 def analyze_basic_stats(data: pd.DataFrame) -> str:
+    """Calculate basic statistics for numerical columns."""
     stats = {}
     numeric_cols = data.select_dtypes(include=[np.number]).columns
 @tool
 def generate_correlation_matrix(data: pd.DataFrame) -> str:
+    """Generate correlation matrix visualization for numerical columns."""
     numeric_data = data.select_dtypes(include=[np.number])
     plt.figure(figsize=(10, 8))
 @tool
 def analyze_categorical_columns(data: pd.DataFrame) -> str:
+    """Analyze categorical columns in the dataset."""
     categorical_cols = data.select_dtypes(include=['object', 'category']).columns
     analysis = {}
 @tool
 def suggest_features(data: pd.DataFrame) -> str:
+    """Suggest potential feature engineering steps."""
     suggestions = []
     numeric_cols = data.select_dtypes(include=[np.number]).columns
     categorical_cols = data.select_dtypes(include=['object', 'category']).columns
     return '\n'.join(suggestions)
+def initialize_session_state():
+    """Initialize session state variables"""
+    if 'data' not in st.session_state:
+        st.session_state['data'] = None
+    if 'agent' not in st.session_state:
+        st.session_state['agent'] = None
+    if 'file_uploaded' not in st.session_state:
+        st.session_state['file_uploaded'] = False
+    if 'processing' not in st.session_state:
+        st.session_state['processing'] = False
 def main():
     st.title("Data Analysis Assistant")
     st.write("Upload your dataset and get automated analysis with natural language interaction.")
+    # Initialize session state
+    initialize_session_state()
+    # File uploader with error handling
     uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+    try:
+        if uploaded_file is not None and not st.session_state['file_uploaded']:
+            # Show loading spinner while processing the file
+            with st.spinner('Loading and processing your data...'):
+                try:
+                    data = pd.read_csv(uploaded_file)
+                    st.session_state['data'] = data
+                    st.session_state['file_uploaded'] = True
+                    # Initialize agent
+                    st.session_state['agent'] = CodeAgent(
+                        tools=[analyze_basic_stats, generate_correlation_matrix,
+                               analyze_categorical_columns, suggest_features],
+                        model=GroqModel(),
+                        additional_authorized_imports=["pandas", "numpy", "matplotlib", "seaborn"]
+                    )
+                    # Show success message
+                    st.success(f'Successfully loaded dataset with {data.shape[0]} rows and {data.shape[1]} columns')
+                    # Display data preview
+                    st.subheader("Data Preview")
+                    st.dataframe(data.head())
+                except Exception as e:
+                    st.error(f"Error loading file: {str(e)}")
+                    st.session_state['file_uploaded'] = False
+                    return
+        # Only show analysis options if data is loaded
+        if st.session_state['file_uploaded'] and st.session_state['data'] is not None:
+            # Analysis options
+            analysis_type = st.selectbox(
+                "Choose analysis type",
+                ["Basic Statistics", "Correlation Analysis", "Categorical Analysis",
+                 "Feature Engineering", "Custom Question"]
             )
+            # Process analysis with loading indicators
+            if analysis_type:
+                with st.spinner(f'Performing {analysis_type.lower()}...'):
+                    if analysis_type == "Basic Statistics":
+                        result = st.session_state['agent'].run(
+                            f"Analyze and explain the basic statistics of this dataset. "
+                            f"Dataset info: {st.session_state['data'].info()}\n"
+                            f"Use the analyze_basic_stats tool and provide natural language explanations."
+                        )
+                        st.write(result)
+                    elif analysis_type == "Correlation Analysis":
+                        correlation_plot = st.session_state['agent'].run(
+                            "Generate and explain correlations between numerical variables. "
+                            "Use the generate_correlation_matrix tool."
+                        )
+                        if correlation_plot:
+                            st.image(f"data:image/png;base64,{correlation_plot}")
+                    elif analysis_type == "Categorical Analysis":
+                        result = st.session_state['agent'].run(
+                            "Analyze categorical variables in the dataset. "
+                            "Use the analyze_categorical_columns tool and explain the findings."
+                        )
+                        st.write(result)
+                    elif analysis_type == "Feature Engineering":
+                        result = st.session_state['agent'].run(
+                            "Suggest potential feature engineering steps for this dataset. "
+                            "Use the suggest_features tool and explain your suggestions."
+                        )
+                        st.write(result)
+                    elif analysis_type == "Custom Question":
+                        question = st.text_input("What would you like to know about your data?")
+                        if question:
+                            result = st.session_state['agent'].run(
+                                f"Answer this question about the dataset: {question}\n"
+                                f"Use appropriate tools to analyze and explain."
+                            )
+                            st.write(result)
+    except Exception as e:
+        st.error(f"An error occurred: {str(e)}")
+        st.session_state['file_uploaded'] = False
 if __name__ == "__main__":
     main()