girishwangikar commited on
Commit
1b4aded
·
verified ·
1 Parent(s): 0c7ce43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # app.py
2
  import streamlit as st
3
  import pandas as pd
4
  import numpy as np
@@ -13,7 +12,7 @@ import tempfile
13
  import base64
14
  import io
15
 
16
- # Custom Groq Model Class for smolagents integration
17
  class GroqModel:
18
  def __init__(self, model_name="llama2-70b-4096"):
19
  self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
@@ -30,15 +29,14 @@ class GroqModel:
30
  )
31
  return response.choices[0].message.content
32
 
33
- # Analysis Tools
34
  @tool
35
- def analyze_basic_stats(data: pd.DataFrame) -> Dict:
36
  """Calculate basic statistics for numerical columns.
37
 
38
  Args:
39
  data: Input DataFrame
40
  Returns:
41
- Dictionary containing basic statistics
42
  """
43
  stats = {}
44
  numeric_cols = data.select_dtypes(include=[np.number]).columns
@@ -52,7 +50,7 @@ def analyze_basic_stats(data: pd.DataFrame) -> Dict:
52
  'missing': data[col].isnull().sum()
53
  }
54
 
55
- return stats
56
 
57
  @tool
58
  def generate_correlation_matrix(data: pd.DataFrame) -> str:
@@ -75,13 +73,13 @@ def generate_correlation_matrix(data: pd.DataFrame) -> str:
75
  return base64.b64encode(buf.getvalue()).decode()
76
 
77
  @tool
78
- def analyze_categorical_columns(data: pd.DataFrame) -> Dict:
79
  """Analyze categorical columns in the dataset.
80
 
81
  Args:
82
  data: Input DataFrame
83
  Returns:
84
- Dictionary containing categorical analysis
85
  """
86
  categorical_cols = data.select_dtypes(include=['object', 'category']).columns
87
  analysis = {}
@@ -93,16 +91,16 @@ def analyze_categorical_columns(data: pd.DataFrame) -> Dict:
93
  'missing': data[col].isnull().sum()
94
  }
95
 
96
- return analysis
97
 
98
  @tool
99
- def suggest_features(data: pd.DataFrame) -> List[str]:
100
  """Suggest potential feature engineering steps.
101
 
102
  Args:
103
  data: Input DataFrame
104
  Returns:
105
- List of feature engineering suggestions
106
  """
107
  suggestions = []
108
  numeric_cols = data.select_dtypes(include=[np.number]).columns
@@ -118,7 +116,7 @@ def suggest_features(data: pd.DataFrame) -> List[str]:
118
  if data[col].skew() > 1 or data[col].skew() < -1:
119
  suggestions.append(f"Consider log transformation for {col} due to skewness")
120
 
121
- return suggestions
122
 
123
  # Streamlit App
124
  def main():
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
 
12
  import base64
13
  import io
14
 
15
+ # Custom Groq Model Class remains unchanged
16
  class GroqModel:
17
  def __init__(self, model_name="llama2-70b-4096"):
18
  self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
29
  )
30
  return response.choices[0].message.content
31
 
 
32
  @tool
33
+ def analyze_basic_stats(data: pd.DataFrame) -> str:
34
  """Calculate basic statistics for numerical columns.
35
 
36
  Args:
37
  data: Input DataFrame
38
  Returns:
39
+ String containing formatted basic statistics
40
  """
41
  stats = {}
42
  numeric_cols = data.select_dtypes(include=[np.number]).columns
 
50
  'missing': data[col].isnull().sum()
51
  }
52
 
53
+ return str(stats)
54
 
55
  @tool
56
  def generate_correlation_matrix(data: pd.DataFrame) -> str:
 
73
  return base64.b64encode(buf.getvalue()).decode()
74
 
75
  @tool
76
+ def analyze_categorical_columns(data: pd.DataFrame) -> str:
77
  """Analyze categorical columns in the dataset.
78
 
79
  Args:
80
  data: Input DataFrame
81
  Returns:
82
+ String containing formatted categorical analysis
83
  """
84
  categorical_cols = data.select_dtypes(include=['object', 'category']).columns
85
  analysis = {}
 
91
  'missing': data[col].isnull().sum()
92
  }
93
 
94
+ return str(analysis)
95
 
96
  @tool
97
+ def suggest_features(data: pd.DataFrame) -> str:
98
  """Suggest potential feature engineering steps.
99
 
100
  Args:
101
  data: Input DataFrame
102
  Returns:
103
+ String containing feature engineering suggestions
104
  """
105
  suggestions = []
106
  numeric_cols = data.select_dtypes(include=[np.number]).columns
 
116
  if data[col].skew() > 1 or data[col].skew() < -1:
117
  suggestions.append(f"Consider log transformation for {col} due to skewness")
118
 
119
+ return '\n'.join(suggestions)
120
 
121
  # Streamlit App
122
  def main():