Quazim0t0 commited on
Commit
e1e2089
ยท
verified ยท
1 Parent(s): 8550c12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -46
app.py CHANGED
@@ -12,9 +12,9 @@ from database import (
12
  get_table_schema
13
  )
14
 
15
- # Initialize the AI agent
16
  agent = CodeAgent(
17
- tools=[], # Required parameter even if empty
18
  model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
19
  )
20
 
@@ -30,7 +30,7 @@ def get_data_table():
30
  return pd.DataFrame()
31
 
32
  table_name = tables[0][0]
33
-
34
  with engine.connect() as con:
35
  result = con.execute(text(f"SELECT * FROM {table_name}"))
36
  rows = result.fetchall()
@@ -42,35 +42,67 @@ def get_data_table():
42
  return pd.DataFrame({"Error": [str(e)]})
43
 
44
  def process_txt_file(file_path):
45
- """Analyze text file and convert to structured table"""
46
  try:
47
- with open(file_path, 'r') as f:
48
  content = f.read()
49
 
50
- # First pass: Structure detection
51
  structure_prompt = f"""
52
- Analyze this text and convert it into a structured table format:
53
  {content}
54
 
55
- Return ONLY valid CSV format with appropriate headers.
56
- Maintain original data types and relationships.
 
 
 
 
 
 
57
  """
58
  csv_output = agent.run(structure_prompt)
59
 
60
- # Convert to DataFrame
61
- df = pd.read_csv(StringIO(csv_output))
62
-
63
- # Second pass: Data validation
64
- validation_prompt = f"""
65
- Validate this structured data:
 
 
 
 
 
 
 
 
 
 
 
 
66
  {df.head().to_csv()}
67
 
68
- Fix any formatting issues and return corrected CSV.
 
 
 
 
 
 
69
  """
70
- corrected_csv = agent.run(validation_prompt)
71
- df = pd.read_csv(StringIO(corrected_csv))
72
 
73
- # Clear existing data and create new table
 
 
 
 
 
 
 
 
 
74
  clear_database()
75
  table = create_dynamic_table(df)
76
  insert_rows_into_table(df.to_dict('records'), table)
@@ -78,7 +110,7 @@ def process_txt_file(file_path):
78
  return True, "Text analyzed successfully!", df
79
 
80
  except Exception as e:
81
- return False, f"Error: {str(e)}", pd.DataFrame()
82
 
83
  def handle_upload(file_obj):
84
  """Handle file upload and processing"""
@@ -87,11 +119,11 @@ def handle_upload(file_obj):
87
 
88
  success, message, df = process_txt_file(file_obj)
89
  if success:
90
- column_info = {col: {'type': str(df[col].dtype)} for col in df.columns}
91
- schema = "\n".join([f"- {col} ({info['type']})" for col, info in column_info.items()])
92
  return (
93
  message,
94
- df,
95
  f"### Detected Schema:\n```\n{schema}\n```",
96
  gr.update(visible=False),
97
  gr.update(visible=True)
@@ -99,47 +131,45 @@ def handle_upload(file_obj):
99
  return message, None, "No schema", gr.update(visible=True), gr.update(visible=False)
100
 
101
  def query_analysis(user_query: str) -> str:
102
- """Handle natural language queries about the data"""
103
  try:
104
  df = get_data_table()
105
  if df.empty:
106
- return "No data available. Upload a text file first."
107
 
108
  analysis_prompt = f"""
109
- Analyze this dataset:
110
  {df.head().to_csv()}
111
 
112
  Question: {user_query}
113
 
114
- Provide a detailed answer considering:
115
- - Data patterns and relationships
116
- - Statistical measures where applicable
117
- - Clear numerical formatting
118
- - Natural language explanations
119
-
120
- Structure your response with:
121
  1. Direct answer first
122
- 2. Supporting analysis
123
  3. Data references
 
 
 
124
  """
125
 
126
  return agent.run(analysis_prompt)
127
 
128
  except Exception as e:
129
- return f"Analysis error: {str(e)}"
130
 
131
- # Create Gradio interface
132
  with gr.Blocks() as demo:
133
  with gr.Group() as upload_group:
134
  gr.Markdown("""
135
  # Text Data Analyzer
136
- Upload any text document containing structured information:
137
- - Reports
138
- - Log files
139
- - Research data
140
- - Meeting notes with tabular content
141
  """)
142
- file_input = gr.File(label="Upload Text File", file_types=[".txt"], type="filepath")
 
 
 
 
143
  status = gr.Textbox(label="Processing Status", interactive=False)
144
 
145
  with gr.Group(visible=False) as query_group:
@@ -149,9 +179,13 @@ with gr.Blocks() as demo:
149
  query_output = gr.Markdown(label="Analysis Results")
150
  with gr.Column(scale=2):
151
  gr.Markdown("### Extracted Data Preview")
152
- data_table = gr.Dataframe(interactive=False)
 
 
 
 
153
  schema_display = gr.Markdown()
154
- refresh_btn = gr.Button("Refresh View")
155
 
156
  # Event handlers
157
  file_input.upload(
@@ -167,9 +201,13 @@ with gr.Blocks() as demo:
167
  )
168
 
169
  refresh_btn.click(
170
- fn=lambda: (get_data_table(), "Schema refreshed"),
171
  outputs=[data_table, schema_display]
172
  )
173
 
174
  if __name__ == "__main__":
175
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
12
  get_table_schema
13
  )
14
 
15
+ # Initialize the AI agent with empty tools list
16
  agent = CodeAgent(
17
+ tools=[],
18
  model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
19
  )
20
 
 
30
  return pd.DataFrame()
31
 
32
  table_name = tables[0][0]
33
+
34
  with engine.connect() as con:
35
  result = con.execute(text(f"SELECT * FROM {table_name}"))
36
  rows = result.fetchall()
 
42
  return pd.DataFrame({"Error": [str(e)]})
43
 
44
  def process_txt_file(file_path):
45
+ """Analyze text file and convert to structured table with enhanced error handling"""
46
  try:
47
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
48
  content = f.read()
49
 
50
+ # Structure detection with strict CSV formatting rules
51
  structure_prompt = f"""
52
+ Convert this text into valid CSV format:
53
  {content}
54
 
55
+ Requirements:
56
+ 1. First row must be headers
57
+ 2. Consistent columns per row
58
+ 3. Quote fields with commas
59
+ 4. Maintain original data types
60
+ 5. Handle missing values as 'N/A'
61
+
62
+ Return ONLY the CSV content.
63
  """
64
  csv_output = agent.run(structure_prompt)
65
 
66
+ # Flexible CSV parsing
67
+ try:
68
+ df = pd.read_csv(
69
+ StringIO(csv_output),
70
+ on_bad_lines='warn',
71
+ quotechar='"',
72
+ encoding_errors='ignore',
73
+ dtype=str
74
+ ).dropna(how='all')
75
+ except pd.errors.ParserError as pe:
76
+ return False, f"CSV Parsing Error: {str(pe)}", pd.DataFrame()
77
+
78
+ if df.empty or len(df.columns) < 1:
79
+ return False, "No structured data found", pd.DataFrame()
80
+
81
+ # Data validation and correction
82
+ correction_prompt = f"""
83
+ Clean and validate this data:
84
  {df.head().to_csv()}
85
 
86
+ Fix:
87
+ 1. Column consistency
88
+ 2. Data type mismatches
89
+ 3. Formatting errors
90
+ 4. Missing values
91
+
92
+ Return corrected CSV.
93
  """
94
+ corrected_csv = agent.run(correction_prompt)
 
95
 
96
+ try:
97
+ df = pd.read_csv(
98
+ StringIO(corrected_csv),
99
+ keep_default_na=False,
100
+ dtype=str
101
+ )
102
+ except Exception as e:
103
+ return False, f"Validation failed: {str(e)}", pd.DataFrame()
104
+
105
+ # Database operations
106
  clear_database()
107
  table = create_dynamic_table(df)
108
  insert_rows_into_table(df.to_dict('records'), table)
 
110
  return True, "Text analyzed successfully!", df
111
 
112
  except Exception as e:
113
+ return False, f"Processing error: {str(e)}", pd.DataFrame()
114
 
115
  def handle_upload(file_obj):
116
  """Handle file upload and processing"""
 
119
 
120
  success, message, df = process_txt_file(file_obj)
121
  if success:
122
+ column_info = {col: {'type': 'text'} for col in df.columns}
123
+ schema = "\n".join([f"- {col} ({'text'})" for col in df.columns])
124
  return (
125
  message,
126
+ df.head(10),
127
  f"### Detected Schema:\n```\n{schema}\n```",
128
  gr.update(visible=False),
129
  gr.update(visible=True)
 
131
  return message, None, "No schema", gr.update(visible=True), gr.update(visible=False)
132
 
133
  def query_analysis(user_query: str) -> str:
134
+ """Handle natural language queries with data context"""
135
  try:
136
  df = get_data_table()
137
  if df.empty:
138
+ return "Please upload and process a file first."
139
 
140
  analysis_prompt = f"""
141
+ Analyze this data:
142
  {df.head().to_csv()}
143
 
144
  Question: {user_query}
145
 
146
+ Response requirements:
 
 
 
 
 
 
147
  1. Direct answer first
148
+ 2. Numerical formatting (e.g., 1,000)
149
  3. Data references
150
+ 4. Error checking
151
+
152
+ Return in Markdown format.
153
  """
154
 
155
  return agent.run(analysis_prompt)
156
 
157
  except Exception as e:
158
+ return f"Query error: {str(e)}"
159
 
160
+ # Gradio interface setup
161
  with gr.Blocks() as demo:
162
  with gr.Group() as upload_group:
163
  gr.Markdown("""
164
  # Text Data Analyzer
165
+ Upload text documents containing structured information:
166
+ - Reports - Logs - Research data - Meeting notes
 
 
 
167
  """)
168
+ file_input = gr.File(
169
+ label="Upload Text File (.txt)",
170
+ file_types=[".txt"],
171
+ type="filepath"
172
+ )
173
  status = gr.Textbox(label="Processing Status", interactive=False)
174
 
175
  with gr.Group(visible=False) as query_group:
 
179
  query_output = gr.Markdown(label="Analysis Results")
180
  with gr.Column(scale=2):
181
  gr.Markdown("### Extracted Data Preview")
182
+ data_table = gr.Dataframe(
183
+ max_rows=10,
184
+ wrap=True,
185
+ interactive=False
186
+ )
187
  schema_display = gr.Markdown()
188
+ refresh_btn = gr.Button("Refresh Data View")
189
 
190
  # Event handlers
191
  file_input.upload(
 
201
  )
202
 
203
  refresh_btn.click(
204
+ fn=lambda: (get_data_table().head(10), "Schema refreshed"),
205
  outputs=[data_table, schema_display]
206
  )
207
 
208
  if __name__ == "__main__":
209
+ demo.launch(
210
+ server_name="0.0.0.0",
211
+ server_port=7860,
212
+ show_error=True
213
+ )