Quazim0t0 commited on
Commit
6d4e0a3
ยท
verified ยท
1 Parent(s): a5b666f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -304
app.py CHANGED
@@ -2,8 +2,8 @@ import os
2
  import gradio as gr
3
  from sqlalchemy import text
4
  from smolagents import tool, CodeAgent, HfApiModel
5
- import spaces
6
  import pandas as pd
 
7
  from database import (
8
  engine,
9
  create_dynamic_table,
@@ -12,12 +12,14 @@ from database import (
12
  get_table_schema
13
  )
14
 
 
 
 
 
 
15
  def get_data_table():
16
- """
17
- Fetches all data from the current table and returns it as a Pandas DataFrame.
18
- """
19
  try:
20
- # Get list of tables
21
  with engine.connect() as con:
22
  tables = con.execute(text(
23
  "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
@@ -26,355 +28,147 @@ def get_data_table():
26
  if not tables:
27
  return pd.DataFrame()
28
 
29
- # Use the first table found
30
  table_name = tables[0][0]
31
 
32
  with engine.connect() as con:
33
  result = con.execute(text(f"SELECT * FROM {table_name}"))
34
  rows = result.fetchall()
35
-
36
- if not rows:
37
- return pd.DataFrame()
38
-
39
  columns = result.keys()
40
- df = pd.DataFrame(rows, columns=columns)
41
- return df
42
 
43
  except Exception as e:
44
  return pd.DataFrame({"Error": [str(e)]})
45
 
46
- def get_table_info():
47
- """
48
- Gets the current table name and column information.
49
- Returns:
50
- tuple: (table_name, list of column names, column info)
51
- """
52
  try:
53
- # Get list of tables
54
- with engine.connect() as con:
55
- tables = con.execute(text(
56
- "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
57
- )).fetchall()
58
-
59
- if not tables:
60
- return None, [], {}
61
-
62
- # Use the first table found
63
- table_name = tables[0][0]
64
 
65
- # Get column information
66
- with engine.connect() as con:
67
- columns = con.execute(text(f"PRAGMA table_info({table_name})")).fetchall()
68
-
69
- # Extract column names and types
70
- column_names = [col[1] for col in columns]
71
- column_info = {
72
- col[1]: {
73
- 'type': col[2],
74
- 'is_primary': bool(col[5])
75
- }
76
- for col in columns
77
- }
78
 
79
- return table_name, column_names, column_info
 
 
 
80
 
81
- except Exception as e:
82
- print(f"Error getting table info: {str(e)}")
83
- return None, [], {}
84
-
85
- def process_sql_file(file_path):
86
- """
87
- Process an SQL file and execute its contents.
88
- """
89
- try:
90
- # Read the SQL file
91
- with open(file_path, 'r') as file:
92
- sql_content = file.read()
93
-
94
- # Replace AUTO_INCREMENT with AUTOINCREMENT for SQLite compatibility
95
- sql_content = sql_content.replace('AUTO_INCREMENT', 'AUTOINCREMENT')
96
-
97
- # Split into individual statements
98
- statements = [stmt.strip() for stmt in sql_content.split(';') if stmt.strip()]
99
 
100
- # Clear existing database
101
- clear_database()
 
 
102
 
103
- # Execute each statement
104
- with engine.begin() as conn:
105
- for statement in statements:
106
- if statement.strip():
107
- conn.execute(text(statement))
108
-
109
- return True, "SQL file successfully executed!"
110
 
111
- except Exception as e:
112
- return False, f"Error processing SQL file: {str(e)}"
113
-
114
- def process_csv_file(file_path):
115
- """
116
- Process a CSV file and load it into the database.
117
- """
118
- try:
119
- # Read the CSV file
120
- df = pd.read_csv(file_path)
121
-
122
- if len(df.columns) == 0:
123
- return False, "Error: File contains no columns"
124
-
125
- # Clear existing database and create new table
126
  clear_database()
127
  table = create_dynamic_table(df)
 
128
 
129
- # Convert DataFrame to list of dictionaries and insert
130
- records = df.to_dict('records')
131
- insert_rows_into_table(records, table)
132
-
133
- return True, "CSV file successfully loaded!"
134
-
135
- except Exception as e:
136
- return False, f"Error processing CSV file: {str(e)}"
137
-
138
- def process_uploaded_file(file):
139
- """
140
- Process the uploaded file (either SQL or CSV).
141
- """
142
- try:
143
- if file is None:
144
- return False, "Please upload a file."
145
-
146
- # Get file extension
147
- file_ext = os.path.splitext(file)[1].lower()
148
-
149
- if file_ext == '.sql':
150
- return process_sql_file(file)
151
- elif file_ext == '.csv':
152
- return process_csv_file(file)
153
- else:
154
- return False, "Error: Unsupported file type. Please upload either a .sql or .csv file."
155
-
156
- except Exception as e:
157
- return False, f"Error processing file: {str(e)}"
158
-
159
- @tool
160
- def sql_engine(query: str) -> str:
161
- """
162
- Executes an SQL query and returns formatted results.
163
-
164
- Args:
165
- query: The SQL query string to execute on the database. Must be a valid SELECT query.
166
-
167
- Returns:
168
- str: The formatted query results as a string.
169
- """
170
- try:
171
- with engine.connect() as con:
172
- rows = con.execute(text(query)).fetchall()
173
-
174
- if not rows:
175
- return "No results found."
176
-
177
- if len(rows) == 1 and len(rows[0]) == 1:
178
- return str(rows[0][0])
179
-
180
- return "\n".join([", ".join(map(str, row)) for row in rows])
181
 
182
  except Exception as e:
183
- return f"Error: {str(e)}"
184
-
185
- agent = CodeAgent(
186
- tools=[sql_engine],
187
- model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
188
- )
189
-
190
- def query_sql(user_query: str) -> str:
191
- """
192
- Converts natural language input to an SQL query using CodeAgent.
193
- """
194
- table_name, column_names, column_info = get_table_info()
195
-
196
- if not table_name:
197
- return "Error: No data table exists. Please upload a file first."
198
-
199
- schema_info = (
200
- f"The database has a table named '{table_name}' with the following columns:\n"
201
- + "\n".join([
202
- f"- {col} ({info['type']}){' primary key' if info['is_primary'] else ''}"
203
- for col, info in column_info.items()
204
- ])
205
- + "\n\nGenerate a valid SQL SELECT query using ONLY these column names.\n"
206
- "The table name is '" + table_name + "'.\n"
207
- "If column names contain spaces, they must be quoted.\n"
208
- "You can use aggregate functions like COUNT, AVG, SUM, etc.\n"
209
- "DO NOT explain your reasoning, and DO NOT return anything other than the SQL query itself."
210
- )
211
-
212
- # Get SQL from the agent
213
- generated_sql = agent.run(f"{schema_info} Convert this request into SQL: {user_query}")
214
-
215
- if not isinstance(generated_sql, str):
216
- return "Error: Invalid query generated"
217
 
218
- # Clean up the SQL
219
- if generated_sql.isnumeric(): # If the agent returned just a number
220
- return generated_sql
221
-
222
- # Extract just the SQL query if there's additional text
223
- sql_lines = [line for line in generated_sql.split('\n') if 'select' in line.lower()]
224
- if sql_lines:
225
- generated_sql = sql_lines[0]
226
-
227
- # Remove any trailing semicolons
228
- generated_sql = generated_sql.strip().rstrip(';')
229
-
230
- # Fix table names
231
- for wrong_name in ['table_name', 'customers', 'main']:
232
- if wrong_name in generated_sql:
233
- generated_sql = generated_sql.replace(wrong_name, table_name)
234
 
235
- # Add quotes around column names that need them
236
- for col in column_names:
237
- if ' ' in col: # If column name contains spaces
238
- if col in generated_sql and f'"{col}"' not in generated_sql and f'`{col}`' not in generated_sql:
239
- generated_sql = generated_sql.replace(col, f'"{col}"')
 
 
 
 
 
 
 
240
 
 
 
241
  try:
242
- # Execute the query
243
- result = sql_engine(generated_sql)
 
244
 
245
- # Try to format as number if possible
246
- try:
247
- float_result = float(result)
248
- return f"{float_result:,.0f}" # Format with commas, no decimals
249
- except ValueError:
250
- return result
251
-
252
- except Exception as e:
253
- if str(e).startswith("(sqlite3.OperationalError) near"):
254
- # If it's a SQL syntax error, return the raw result
255
- return generated_sql
256
- return f"Error executing query: {str(e)}"
257
-
258
- # Create the Gradio interface
259
- with gr.Blocks() as demo:
260
- with gr.Group() as upload_group:
261
- gr.Markdown("""
262
- # CSVAgent
263
-
264
- Upload your data file to begin.
265
 
266
- ### Supported File Types:
267
- - CSV (.csv): CSV file with headers that will be automatically converted to a table
268
 
269
- ### CSV Requirements:
270
- - Must include headers
271
- - First column will be used as the primary key
272
- - Column types will be automatically detected
273
- - Sample CSV Files: https://github.com/datablist/sample-csv-files
274
- ### Based on ZennyKenny's SqlAgent
275
-
276
- ### SQL to CSV File Conversion
277
- https://tableconvert.com/sql-to-csv
278
- - Will work on the handling of SQL files soon.
279
 
 
 
 
 
 
280
 
281
- ### Try it out! Upload a CSV file and then ask a question about the data!
282
- - There is issues with the UI displaying the answer correctly, some questions such as "How many Customers are located in Korea?"
283
- The right answer will appear in the logs, but throws an error on the "Results" section.
284
- """)
285
 
286
- file_input = gr.File(
287
- label="Upload Data File",
288
- file_types=[".csv", ".sql"],
289
- type="filepath"
290
- )
291
- status = gr.Textbox(label="Status", interactive=False)
292
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  with gr.Group(visible=False) as query_group:
294
  with gr.Row():
295
  with gr.Column(scale=1):
296
- user_input = gr.Textbox(label="Ask a question about the data")
297
- query_output = gr.Textbox(label="Result")
298
-
299
  with gr.Column(scale=2):
300
- gr.Markdown("### Current Data")
301
- data_table = gr.Dataframe(
302
- value=None,
303
- label="Data Table",
304
- interactive=False
305
- )
306
-
307
- schema_display = gr.Markdown(value="Loading schema...")
308
- refresh_btn = gr.Button("Refresh Data")
309
-
310
- def handle_upload(file_obj):
311
- if file_obj is None:
312
- return (
313
- "Please upload a file.",
314
- None,
315
- "No schema available",
316
- gr.update(visible=True),
317
- gr.update(visible=False)
318
- )
319
-
320
- success, message = process_uploaded_file(file_obj)
321
- if success:
322
- df = get_data_table()
323
- _, _, column_info = get_table_info()
324
- schema = "\n".join([
325
- f"- {col} ({info['type']}){' primary key' if info['is_primary'] else ''}"
326
- for col, info in column_info.items()
327
- ])
328
- return (
329
- message,
330
- df,
331
- f"### Current Schema:\n```\n{schema}\n```",
332
- gr.update(visible=False),
333
- gr.update(visible=True)
334
- )
335
- return (
336
- message,
337
- None,
338
- "No schema available",
339
- gr.update(visible=True),
340
- gr.update(visible=False)
341
- )
342
-
343
- def refresh_data():
344
- df = get_data_table()
345
- _, _, column_info = get_table_info()
346
- schema = "\n".join([
347
- f"- {col} ({info['type']}){' primary key' if info['is_primary'] else ''}"
348
- for col, info in column_info.items()
349
- ])
350
- return df, f"### Current Schema:\n```\n{schema}\n```"
351
 
352
  # Event handlers
353
  file_input.upload(
354
  fn=handle_upload,
355
  inputs=file_input,
356
- outputs=[
357
- status,
358
- data_table,
359
- schema_display,
360
- upload_group,
361
- query_group
362
- ]
363
  )
364
 
365
- user_input.change(
366
- fn=query_sql,
367
  inputs=user_input,
368
  outputs=query_output
369
  )
370
 
371
  refresh_btn.click(
372
- fn=refresh_data,
373
  outputs=[data_table, schema_display]
374
  )
375
 
376
  if __name__ == "__main__":
377
- demo.launch(
378
- server_name="0.0.0.0",
379
- server_port=7860
380
- )
 
2
  import gradio as gr
3
  from sqlalchemy import text
4
  from smolagents import tool, CodeAgent, HfApiModel
 
5
  import pandas as pd
6
+ from io import StringIO
7
  from database import (
8
  engine,
9
  create_dynamic_table,
 
12
  get_table_schema
13
  )
14
 
15
+ # Initialize the AI agent
16
+ agent = CodeAgent(
17
+ model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
18
+ )
19
+
20
  def get_data_table():
21
+ """Fetch and return the current table data as DataFrame"""
 
 
22
  try:
 
23
  with engine.connect() as con:
24
  tables = con.execute(text(
25
  "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
 
28
  if not tables:
29
  return pd.DataFrame()
30
 
 
31
  table_name = tables[0][0]
32
 
33
  with engine.connect() as con:
34
  result = con.execute(text(f"SELECT * FROM {table_name}"))
35
  rows = result.fetchall()
 
 
 
 
36
  columns = result.keys()
37
+
38
+ return pd.DataFrame(rows, columns=columns) if rows else pd.DataFrame()
39
 
40
  except Exception as e:
41
  return pd.DataFrame({"Error": [str(e)]})
42
 
43
+ def process_txt_file(file_path):
44
+ """Analyze text file and convert to structured table"""
 
 
 
 
45
  try:
46
+ with open(file_path, 'r') as f:
47
+ content = f.read()
 
 
 
 
 
 
 
 
 
48
 
49
+ # First pass: Structure detection
50
+ structure_prompt = f"""
51
+ Analyze this text and convert it into a structured table format:
52
+ {content}
 
 
 
 
 
 
 
 
 
53
 
54
+ Return ONLY valid CSV format with appropriate headers.
55
+ Maintain original data types and relationships.
56
+ """
57
+ csv_output = agent.run(structure_prompt)
58
 
59
+ # Convert to DataFrame
60
+ df = pd.read_csv(StringIO(csv_output))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ # Second pass: Data validation
63
+ validation_prompt = f"""
64
+ Validate this structured data:
65
+ {df.head().to_csv()}
66
 
67
+ Fix any formatting issues and return corrected CSV.
68
+ """
69
+ corrected_csv = agent.run(validation_prompt)
70
+ df = pd.read_csv(StringIO(corrected_csv))
 
 
 
71
 
72
+ # Clear existing data and create new table
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  clear_database()
74
  table = create_dynamic_table(df)
75
+ insert_rows_into_table(df.to_dict('records'), table)
76
 
77
+ return True, "Text analyzed successfully!", df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  except Exception as e:
80
+ return False, f"Error: {str(e)}", pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ def handle_upload(file_obj):
83
+ """Handle file upload and processing"""
84
+ if file_obj is None:
85
+ return "Please upload a text file.", None, "No schema", gr.update(visible=True), gr.update(visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ success, message, df = process_txt_file(file_obj)
88
+ if success:
89
+ column_info = {col: {'type': str(df[col].dtype)} for col in df.columns}
90
+ schema = "\n".join([f"- {col} ({info['type']})" for col, info in column_info.items()])
91
+ return (
92
+ message,
93
+ df,
94
+ f"### Detected Schema:\n```\n{schema}\n```",
95
+ gr.update(visible=False),
96
+ gr.update(visible=True)
97
+ )
98
+ return message, None, "No schema", gr.update(visible=True), gr.update(visible=False)
99
 
100
+ def query_analysis(user_query: str) -> str:
101
+ """Handle natural language queries about the data"""
102
  try:
103
+ df = get_data_table()
104
+ if df.empty:
105
+ return "No data available. Upload a text file first."
106
 
107
+ analysis_prompt = f"""
108
+ Analyze this dataset:
109
+ {df.head().to_csv()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
+ Question: {user_query}
 
112
 
113
+ Provide a detailed answer considering:
114
+ - Data patterns and relationships
115
+ - Statistical measures where applicable
116
+ - Clear numerical formatting
117
+ - Natural language explanations
 
 
 
 
 
118
 
119
+ Structure your response with:
120
+ 1. Direct answer first
121
+ 2. Supporting analysis
122
+ 3. Data references
123
+ """
124
 
125
+ return agent.run(analysis_prompt)
 
 
 
126
 
127
+ except Exception as e:
128
+ return f"Analysis error: {str(e)}"
 
 
 
 
129
 
130
+ # Create Gradio interface
131
+ with gr.Blocks() as demo:
132
+ with gr.Group() as upload_group:
133
+ gr.Markdown("""
134
+ # Text Data Analyzer
135
+ Upload any text document containing structured information:
136
+ - Reports
137
+ - Log files
138
+ - Research data
139
+ - Meeting notes with tabular content
140
+ """)
141
+ file_input = gr.File(label="Upload Text File", file_types=[".txt"], type="filepath")
142
+ status = gr.Textbox(label="Processing Status", interactive=False)
143
+
144
  with gr.Group(visible=False) as query_group:
145
  with gr.Row():
146
  with gr.Column(scale=1):
147
+ user_input = gr.Textbox(label="Ask about the data")
148
+ query_output = gr.Markdown(label="Analysis Results")
 
149
  with gr.Column(scale=2):
150
+ gr.Markdown("### Extracted Data Preview")
151
+ data_table = gr.Dataframe(interactive=False)
152
+ schema_display = gr.Markdown()
153
+ refresh_btn = gr.Button("Refresh View")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  # Event handlers
156
  file_input.upload(
157
  fn=handle_upload,
158
  inputs=file_input,
159
+ outputs=[status, data_table, schema_display, upload_group, query_group]
 
 
 
 
 
 
160
  )
161
 
162
+ user_input.submit(
163
+ fn=query_analysis,
164
  inputs=user_input,
165
  outputs=query_output
166
  )
167
 
168
  refresh_btn.click(
169
+ fn=lambda: (get_data_table(), "Schema refreshed"),
170
  outputs=[data_table, schema_display]
171
  )
172
 
173
  if __name__ == "__main__":
174
+ demo.launch(server_name="0.0.0.0", server_port=7860)