pwc-india commited on
Commit
0940575
1 Parent(s): 4ded81c

Update pages/2DATA PROFILER.py

Browse files
Files changed (1) hide show
  1. pages/2DATA PROFILER.py +190 -188
pages/2DATA PROFILER.py CHANGED
@@ -55,203 +55,205 @@ st.markdown("""
55
  </style>
56
  """, unsafe_allow_html=True)
57
 
58
- ######
59
- def main():
60
- # st.title('PAGE TITLE') # Change this for each page
61
- sidebar()
62
- ########
63
-
64
- def load_dataframe_to_sqlserver(df, table_name, connection_string):
65
- # Establish a connection to the database
66
- conn = pyodbc.connect(connection_string)
67
- cursor = conn.cursor()
68
-
69
- # Drop table if it exists
70
- drop_table_sql = f"IF OBJECT_ID('{table_name}', 'U') IS NOT NULL DROP TABLE {table_name}"
71
-
72
- try:
73
- cursor.execute(drop_table_sql)
74
- conn.commit()
75
- except Exception as e:
76
- st.error(f"Error dropping table. Please try with a different name.")
77
-
78
- # Create table SQL statement based on DataFrame columns and types
79
- create_table_sql = f"CREATE TABLE {table_name} ("
80
- for column in df.columns:
81
- dtype = str(df[column].dtype)
82
- sql_dtype = 'NVARCHAR(MAX)'
83
- create_table_sql += f"{column} {sql_dtype}, "
84
- create_table_sql = create_table_sql.rstrip(', ') + ')'
85
-
86
- try:
87
- # Execute table creation
88
- cursor.execute(create_table_sql)
89
- conn.commit()
90
- except Exception as e:
91
- st.error(f"Error Creating table. Please try with a different name.")
92
-
93
- # Insert DataFrame data into the table using bulk insert
94
- insert_sql = f"INSERT INTO {table_name} ({', '.join(df.columns)}) VALUES ({', '.join(['?' for _ in df.columns])})"
95
-
96
- try:
97
- # Using `fast_executemany` for bulk inserts
98
- cursor.fast_executemany = True
99
- cursor.executemany(insert_sql, df.values.tolist())
100
- conn.commit()
101
- st.success(f"Data Imported with table name: '{table_name}' successfully.")
102
- except Exception as e:
103
- st.error(f"Error Inserting Data. Please try with a different name.")
104
-
105
- cursor.close()
106
- conn.close()
107
-
108
-
109
- def clear_cache():
110
- keys = list(st.session_state.keys())
111
- for key in keys:
112
- st.session_state.pop(key)
113
-
114
- def set_bg_hack(main_bg):
115
- '''
116
- A function to unpack an image from root folder and set as bg.
117
-
118
- Returns
119
- -------
120
- The background.
121
- '''
122
- # set bg name
123
- main_bg_ext = "png"
124
-
125
- st.markdown(
126
- f"""
127
- <style>
128
- .stApp {{
129
- background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
130
- background-size: cover
131
- }}
132
- </style>
133
- """,
134
- unsafe_allow_html=True
135
- )
136
- #set_bg_hack("bg2.png")
137
- header_style = """
138
- <style>
139
- .header {
140
- color: black; /* Soft dark gray text color for readability */
141
- width: 103%;
142
- font-size: 60px; /* Large font size */
143
- font-weight: bold; /* Bold text */
144
- line-height: 1.2; /* Improved readability */
145
- margin-bottom: 30px; /* Add some space below the header */
146
- padding: 20px; /* Add padding for better spacing */
147
- background-image:
148
- linear-gradient(to right, rgba(255, 140, 0, 0.3) 25%, transparent 75%), /* Darker orange with higher opacity */
149
- linear-gradient(to bottom, rgba(255, 140, 0, 0.3) 15%, transparent 75%),
150
- linear-gradient(to left, rgba(255, 140, 0, 0.3) 25%, transparent 55%),
151
- linear-gradient(to top, rgba(255, 140, 0, 0.3) 25%, transparent 95%);
152
- background-blend-mode: overlay;
153
- background-size: 250px 350px;
154
- border-radius: 10px; /* Add border radius for rounded corners */
155
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); /* Add shadow for depth */
156
- }
157
- </style>
158
- """
159
 
 
 
160
 
 
 
 
 
 
161
 
 
 
 
 
 
 
 
162
 
 
 
 
 
 
 
 
 
 
163
 
164
- content_style = """
165
- <style>
166
- .content {
167
- font-size: 40px; /* Larger font size for content */
168
- line-height: 1.6; /* Improved readability */
169
- width: 103%;
170
- padding: 10px; /* Add padding for better spacing */
171
- margin-bottom: 20px;
172
- background-color: sky-blue; /* Background color for the header */
173
- border-radius: 10px; /* Add border radius for rounded corners */
174
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); /* Add shadow for depth */
175
- }
176
- </style>
177
- """
178
 
179
- small_style = """
180
- <style>
181
- .small {
182
- color: black;
183
- font-size: 30px; /* Larger font size for content */
184
- line-height: 1.6; /* Improved readability */
185
- width: 100%;
186
- padding: 10px; /* Add padding for better spacing */
187
- margin-bottom: 10px;
188
- background-color: white; /* Background color for the header */
189
- border-radius: 10px; /* Add border radius for rounded corners */
190
- }
191
- </style>
192
- """
193
 
194
- def update_column_dtype(df, column_name, dtype):
195
- error_entries = pd.DataFrame()
196
- flag = None
197
- if dtype == 'System Detected':
198
- pass
199
- elif dtype == 'int64':
200
- try:
201
- df[column_name] = df[column_name].astype('int64')
202
- except ValueError:
203
- error_entries = df[~df[column_name].apply(lambda x: str(x).isdigit())]
204
- st.error('Unable to convert some entries to integer. Please Clean the column.')
205
- elif dtype == 'float64/numeric':
206
- try:
207
- df[column_name] = df[column_name].astype('float64')
208
- except ValueError:
209
- error_entries = df[pd.to_numeric(df[column_name], errors='coerce').isna()]
210
- st.error('Unable to convert some entries to float. Please Clean the column.')
211
- elif dtype == 'id':
212
- try:
213
- df[column_name] = df[column_name].astype('int64')
214
- except ValueError:
215
- error_entries = df[~df[column_name].apply(lambda x: str(x).isdigit())]
216
- st.error('Unable to convert some entries to id. Please Clean the column.')
217
- elif dtype == 'categorical/string':
218
- df[column_name] = df[column_name].astype('category')
219
- elif dtype == 'datetime':
220
- try:
221
- df[column_name] = pd.to_datetime(df[column_name], errors='raise', infer_datetime_format=True)
222
- except ValueError:
223
- error_entries = df[pd.to_datetime(df[column_name], errors='coerce', infer_datetime_format=True).isna()]
224
- custom_format = st.text_input("Please provide the datetime format (e.g., %Y-%m-%d):")
225
- if custom_format:
226
- try:
227
- df[column_name] = pd.to_datetime(df[column_name], errors='raise', format=custom_format)
228
- except ValueError:
229
- error_entries = df[pd.to_datetime(df[column_name], errors='coerce', format=custom_format).isna()]
230
- st.error('Unable to parse datetime with the provided format. Please Clean the column.')
231
- elif dtype == 'email':
232
- df[column_name] = df[column_name].astype('category')
233
- flag= 'email'
234
- elif dtype == 'phone_number':
235
- df[column_name] = df[column_name].astype('category')
236
- flag= 'phone_number'
237
 
238
- return df, error_entries, flag
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
- def convert_to_special_representation(value):
241
- value = str(value)
242
- special_chars = set("!@#$%^&*()_+-=[]{}|;:,.<>?`~")
243
- result = ''
244
- for char in value:
245
- if char.isdigit():
246
- result += 'N'
247
- elif char.isalpha():
248
- result += 'A'
249
- elif char in special_chars:
250
- result += char
251
- else:
252
- # Handle other characters as needed
253
- result += char
254
- return result
 
 
 
 
 
 
 
 
255
  with st.container(border=True):
256
  st.subheader('SELECT TABLE')
257
  metadata = SingleTableMetadata()
 
55
  </style>
56
  """, unsafe_allow_html=True)
57
 
58
+
59
+
60
+ def load_dataframe_to_sqlserver(df, table_name, connection_string):
61
+ # Establish a connection to the database
62
+ conn = pyodbc.connect(connection_string)
63
+ cursor = conn.cursor()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ # Drop table if it exists
66
+ drop_table_sql = f"IF OBJECT_ID('{table_name}', 'U') IS NOT NULL DROP TABLE {table_name}"
67
 
68
+ try:
69
+ cursor.execute(drop_table_sql)
70
+ conn.commit()
71
+ except Exception as e:
72
+ st.error(f"Error dropping table. Please try with a different name.")
73
 
74
+ # Create table SQL statement based on DataFrame columns and types
75
+ create_table_sql = f"CREATE TABLE {table_name} ("
76
+ for column in df.columns:
77
+ dtype = str(df[column].dtype)
78
+ sql_dtype = 'NVARCHAR(MAX)'
79
+ create_table_sql += f"{column} {sql_dtype}, "
80
+ create_table_sql = create_table_sql.rstrip(', ') + ')'
81
 
82
+ try:
83
+ # Execute table creation
84
+ cursor.execute(create_table_sql)
85
+ conn.commit()
86
+ except Exception as e:
87
+ st.error(f"Error Creating table. Please try with a different name.")
88
+
89
+ # Insert DataFrame data into the table using bulk insert
90
+ insert_sql = f"INSERT INTO {table_name} ({', '.join(df.columns)}) VALUES ({', '.join(['?' for _ in df.columns])})"
91
 
92
+ try:
93
+ # Using `fast_executemany` for bulk inserts
94
+ cursor.fast_executemany = True
95
+ cursor.executemany(insert_sql, df.values.tolist())
96
+ conn.commit()
97
+ st.success(f"Data Imported with table name: '{table_name}' successfully.")
98
+ except Exception as e:
99
+ st.error(f"Error Inserting Data. Please try with a different name.")
 
 
 
 
 
 
100
 
101
+ cursor.close()
102
+ conn.close()
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+
105
+ def clear_cache():
106
+ keys = list(st.session_state.keys())
107
+ for key in keys:
108
+ st.session_state.pop(key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
+ def set_bg_hack(main_bg):
111
+ '''
112
+ A function to unpack an image from root folder and set as bg.
113
+
114
+ Returns
115
+ -------
116
+ The background.
117
+ '''
118
+ # set bg name
119
+ main_bg_ext = "png"
120
+
121
+ st.markdown(
122
+ f"""
123
+ <style>
124
+ .stApp {{
125
+ background: url(data:image/{main_bg_ext};base64,{base64.b64encode(open(main_bg, "rb").read()).decode()});
126
+ background-size: cover
127
+ }}
128
+ </style>
129
+ """,
130
+ unsafe_allow_html=True
131
+ )
132
+ #set_bg_hack("bg2.png")
133
+ header_style = """
134
+ <style>
135
+ .header {
136
+ color: black; /* Soft dark gray text color for readability */
137
+ width: 103%;
138
+ font-size: 60px; /* Large font size */
139
+ font-weight: bold; /* Bold text */
140
+ line-height: 1.2; /* Improved readability */
141
+ margin-bottom: 30px; /* Add some space below the header */
142
+ padding: 20px; /* Add padding for better spacing */
143
+ background-image:
144
+ linear-gradient(to right, rgba(255, 140, 0, 0.3) 25%, transparent 75%), /* Darker orange with higher opacity */
145
+ linear-gradient(to bottom, rgba(255, 140, 0, 0.3) 15%, transparent 75%),
146
+ linear-gradient(to left, rgba(255, 140, 0, 0.3) 25%, transparent 55%),
147
+ linear-gradient(to top, rgba(255, 140, 0, 0.3) 25%, transparent 95%);
148
+ background-blend-mode: overlay;
149
+ background-size: 250px 350px;
150
+ border-radius: 10px; /* Add border radius for rounded corners */
151
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); /* Add shadow for depth */
152
+ }
153
+ </style>
154
+ """
155
+
156
+
157
+
158
+
159
+
160
+ content_style = """
161
+ <style>
162
+ .content {
163
+ font-size: 40px; /* Larger font size for content */
164
+ line-height: 1.6; /* Improved readability */
165
+ width: 103%;
166
+ padding: 10px; /* Add padding for better spacing */
167
+ margin-bottom: 20px;
168
+ background-color: sky-blue; /* Background color for the header */
169
+ border-radius: 10px; /* Add border radius for rounded corners */
170
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); /* Add shadow for depth */
171
+ }
172
+ </style>
173
+ """
174
+
175
+ small_style = """
176
+ <style>
177
+ .small {
178
+ color: black;
179
+ font-size: 30px; /* Larger font size for content */
180
+ line-height: 1.6; /* Improved readability */
181
+ width: 100%;
182
+ padding: 10px; /* Add padding for better spacing */
183
+ margin-bottom: 10px;
184
+ background-color: white; /* Background color for the header */
185
+ border-radius: 10px; /* Add border radius for rounded corners */
186
+ }
187
+ </style>
188
+ """
189
+
190
+ def update_column_dtype(df, column_name, dtype):
191
+ error_entries = pd.DataFrame()
192
+ flag = None
193
+ if dtype == 'System Detected':
194
+ pass
195
+ elif dtype == 'int64':
196
+ try:
197
+ df[column_name] = df[column_name].astype('int64')
198
+ except ValueError:
199
+ error_entries = df[~df[column_name].apply(lambda x: str(x).isdigit())]
200
+ st.error('Unable to convert some entries to integer. Please Clean the column.')
201
+ elif dtype == 'float64/numeric':
202
+ try:
203
+ df[column_name] = df[column_name].astype('float64')
204
+ except ValueError:
205
+ error_entries = df[pd.to_numeric(df[column_name], errors='coerce').isna()]
206
+ st.error('Unable to convert some entries to float. Please Clean the column.')
207
+ elif dtype == 'id':
208
+ try:
209
+ df[column_name] = df[column_name].astype('int64')
210
+ except ValueError:
211
+ error_entries = df[~df[column_name].apply(lambda x: str(x).isdigit())]
212
+ st.error('Unable to convert some entries to id. Please Clean the column.')
213
+ elif dtype == 'categorical/string':
214
+ df[column_name] = df[column_name].astype('category')
215
+ elif dtype == 'datetime':
216
+ try:
217
+ df[column_name] = pd.to_datetime(df[column_name], errors='raise', infer_datetime_format=True)
218
+ except ValueError:
219
+ error_entries = df[pd.to_datetime(df[column_name], errors='coerce', infer_datetime_format=True).isna()]
220
+ custom_format = st.text_input("Please provide the datetime format (e.g., %Y-%m-%d):")
221
+ if custom_format:
222
+ try:
223
+ df[column_name] = pd.to_datetime(df[column_name], errors='raise', format=custom_format)
224
+ except ValueError:
225
+ error_entries = df[pd.to_datetime(df[column_name], errors='coerce', format=custom_format).isna()]
226
+ st.error('Unable to parse datetime with the provided format. Please Clean the column.')
227
+ elif dtype == 'email':
228
+ df[column_name] = df[column_name].astype('category')
229
+ flag= 'email'
230
+ elif dtype == 'phone_number':
231
+ df[column_name] = df[column_name].astype('category')
232
+ flag= 'phone_number'
233
 
234
+ return df, error_entries, flag
235
+
236
+ def convert_to_special_representation(value):
237
+ value = str(value)
238
+ special_chars = set("!@#$%^&*()_+-=[]{}|;:,.<>?`~")
239
+ result = ''
240
+ for char in value:
241
+ if char.isdigit():
242
+ result += 'N'
243
+ elif char.isalpha():
244
+ result += 'A'
245
+ elif char in special_chars:
246
+ result += char
247
+ else:
248
+ # Handle other characters as needed
249
+ result += char
250
+ return result
251
+
252
+ ######
253
+ def main():
254
+ # st.title('PAGE TITLE') # Change this for each page
255
+ sidebar()
256
+ ########
257
  with st.container(border=True):
258
  st.subheader('SELECT TABLE')
259
  metadata = SingleTableMetadata()