Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
# app.py
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
import numpy as np
|
@@ -13,7 +12,7 @@ import tempfile
|
|
13 |
import base64
|
14 |
import io
|
15 |
|
16 |
-
# Custom Groq Model Class
|
17 |
class GroqModel:
|
18 |
def __init__(self, model_name="llama2-70b-4096"):
|
19 |
self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
@@ -30,15 +29,14 @@ class GroqModel:
|
|
30 |
)
|
31 |
return response.choices[0].message.content
|
32 |
|
33 |
-
# Analysis Tools
|
34 |
@tool
|
35 |
-
def analyze_basic_stats(data: pd.DataFrame) ->
|
36 |
"""Calculate basic statistics for numerical columns.
|
37 |
|
38 |
Args:
|
39 |
data: Input DataFrame
|
40 |
Returns:
|
41 |
-
|
42 |
"""
|
43 |
stats = {}
|
44 |
numeric_cols = data.select_dtypes(include=[np.number]).columns
|
@@ -52,7 +50,7 @@ def analyze_basic_stats(data: pd.DataFrame) -> Dict:
|
|
52 |
'missing': data[col].isnull().sum()
|
53 |
}
|
54 |
|
55 |
-
return stats
|
56 |
|
57 |
@tool
|
58 |
def generate_correlation_matrix(data: pd.DataFrame) -> str:
|
@@ -75,13 +73,13 @@ def generate_correlation_matrix(data: pd.DataFrame) -> str:
|
|
75 |
return base64.b64encode(buf.getvalue()).decode()
|
76 |
|
77 |
@tool
|
78 |
-
def analyze_categorical_columns(data: pd.DataFrame) ->
|
79 |
"""Analyze categorical columns in the dataset.
|
80 |
|
81 |
Args:
|
82 |
data: Input DataFrame
|
83 |
Returns:
|
84 |
-
|
85 |
"""
|
86 |
categorical_cols = data.select_dtypes(include=['object', 'category']).columns
|
87 |
analysis = {}
|
@@ -93,16 +91,16 @@ def analyze_categorical_columns(data: pd.DataFrame) -> Dict:
|
|
93 |
'missing': data[col].isnull().sum()
|
94 |
}
|
95 |
|
96 |
-
return analysis
|
97 |
|
98 |
@tool
|
99 |
-
def suggest_features(data: pd.DataFrame) ->
|
100 |
"""Suggest potential feature engineering steps.
|
101 |
|
102 |
Args:
|
103 |
data: Input DataFrame
|
104 |
Returns:
|
105 |
-
|
106 |
"""
|
107 |
suggestions = []
|
108 |
numeric_cols = data.select_dtypes(include=[np.number]).columns
|
@@ -118,7 +116,7 @@ def suggest_features(data: pd.DataFrame) -> List[str]:
|
|
118 |
if data[col].skew() > 1 or data[col].skew() < -1:
|
119 |
suggestions.append(f"Consider log transformation for {col} due to skewness")
|
120 |
|
121 |
-
return suggestions
|
122 |
|
123 |
# Streamlit App
|
124 |
def main():
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
|
|
12 |
import base64
|
13 |
import io
|
14 |
|
15 |
+
# Custom Groq Model Class remains unchanged
|
16 |
class GroqModel:
|
17 |
def __init__(self, model_name="llama2-70b-4096"):
|
18 |
self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
|
|
|
29 |
)
|
30 |
return response.choices[0].message.content
|
31 |
|
|
|
32 |
@tool
|
33 |
+
def analyze_basic_stats(data: pd.DataFrame) -> str:
|
34 |
"""Calculate basic statistics for numerical columns.
|
35 |
|
36 |
Args:
|
37 |
data: Input DataFrame
|
38 |
Returns:
|
39 |
+
String containing formatted basic statistics
|
40 |
"""
|
41 |
stats = {}
|
42 |
numeric_cols = data.select_dtypes(include=[np.number]).columns
|
|
|
50 |
'missing': data[col].isnull().sum()
|
51 |
}
|
52 |
|
53 |
+
return str(stats)
|
54 |
|
55 |
@tool
|
56 |
def generate_correlation_matrix(data: pd.DataFrame) -> str:
|
|
|
73 |
return base64.b64encode(buf.getvalue()).decode()
|
74 |
|
75 |
@tool
|
76 |
+
def analyze_categorical_columns(data: pd.DataFrame) -> str:
|
77 |
"""Analyze categorical columns in the dataset.
|
78 |
|
79 |
Args:
|
80 |
data: Input DataFrame
|
81 |
Returns:
|
82 |
+
String containing formatted categorical analysis
|
83 |
"""
|
84 |
categorical_cols = data.select_dtypes(include=['object', 'category']).columns
|
85 |
analysis = {}
|
|
|
91 |
'missing': data[col].isnull().sum()
|
92 |
}
|
93 |
|
94 |
+
return str(analysis)
|
95 |
|
96 |
@tool
|
97 |
+
def suggest_features(data: pd.DataFrame) -> str:
|
98 |
"""Suggest potential feature engineering steps.
|
99 |
|
100 |
Args:
|
101 |
data: Input DataFrame
|
102 |
Returns:
|
103 |
+
String containing feature engineering suggestions
|
104 |
"""
|
105 |
suggestions = []
|
106 |
numeric_cols = data.select_dtypes(include=[np.number]).columns
|
|
|
116 |
if data[col].skew() > 1 or data[col].skew() < -1:
|
117 |
suggestions.append(f"Consider log transformation for {col} due to skewness")
|
118 |
|
119 |
+
return '\n'.join(suggestions)
|
120 |
|
121 |
# Streamlit App
|
122 |
def main():
|