Quazim0t0 commited on
Commit
04b6f60
ยท
verified ยท
1 Parent(s): e1e2089

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -51
app.py CHANGED
@@ -12,7 +12,7 @@ from database import (
12
  get_table_schema
13
  )
14
 
15
- # Initialize the AI agent with empty tools list
16
  agent = CodeAgent(
17
  tools=[],
18
  model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
@@ -42,12 +42,12 @@ def get_data_table():
42
  return pd.DataFrame({"Error": [str(e)]})
43
 
44
  def process_txt_file(file_path):
45
- """Analyze text file and convert to structured table with enhanced error handling"""
46
  try:
47
  with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
48
  content = f.read()
49
 
50
- # Structure detection with strict CSV formatting rules
51
  structure_prompt = f"""
52
  Convert this text into valid CSV format:
53
  {content}
@@ -55,59 +55,33 @@ def process_txt_file(file_path):
55
  Requirements:
56
  1. First row must be headers
57
  2. Consistent columns per row
58
- 3. Quote fields with commas
59
- 4. Maintain original data types
60
- 5. Handle missing values as 'N/A'
61
 
62
  Return ONLY the CSV content.
63
  """
64
  csv_output = agent.run(structure_prompt)
65
 
66
- # Flexible CSV parsing
67
  try:
68
  df = pd.read_csv(
69
  StringIO(csv_output),
70
  on_bad_lines='warn',
71
- quotechar='"',
72
- encoding_errors='ignore',
73
- dtype=str
74
  ).dropna(how='all')
75
  except pd.errors.ParserError as pe:
76
  return False, f"CSV Parsing Error: {str(pe)}", pd.DataFrame()
77
 
78
- if df.empty or len(df.columns) < 1:
79
  return False, "No structured data found", pd.DataFrame()
80
 
81
- # Data validation and correction
82
- correction_prompt = f"""
83
- Clean and validate this data:
84
- {df.head().to_csv()}
85
-
86
- Fix:
87
- 1. Column consistency
88
- 2. Data type mismatches
89
- 3. Formatting errors
90
- 4. Missing values
91
-
92
- Return corrected CSV.
93
- """
94
- corrected_csv = agent.run(correction_prompt)
95
-
96
- try:
97
- df = pd.read_csv(
98
- StringIO(corrected_csv),
99
- keep_default_na=False,
100
- dtype=str
101
- )
102
- except Exception as e:
103
- return False, f"Validation failed: {str(e)}", pd.DataFrame()
104
-
105
  # Database operations
106
  clear_database()
107
  table = create_dynamic_table(df)
108
  insert_rows_into_table(df.to_dict('records'), table)
109
 
110
- return True, "Text analyzed successfully!", df
111
 
112
  except Exception as e:
113
  return False, f"Processing error: {str(e)}", pd.DataFrame()
@@ -120,10 +94,10 @@ def handle_upload(file_obj):
120
  success, message, df = process_txt_file(file_obj)
121
  if success:
122
  column_info = {col: {'type': 'text'} for col in df.columns}
123
- schema = "\n".join([f"- {col} ({'text'})" for col in df.columns])
124
  return (
125
  message,
126
- df.head(10),
127
  f"### Detected Schema:\n```\n{schema}\n```",
128
  gr.update(visible=False),
129
  gr.update(visible=True)
@@ -131,7 +105,7 @@ def handle_upload(file_obj):
131
  return message, None, "No schema", gr.update(visible=True), gr.update(visible=False)
132
 
133
  def query_analysis(user_query: str) -> str:
134
- """Handle natural language queries with data context"""
135
  try:
136
  df = get_data_table()
137
  if df.empty:
@@ -143,13 +117,12 @@ def query_analysis(user_query: str) -> str:
143
 
144
  Question: {user_query}
145
 
146
- Response requirements:
147
- 1. Direct answer first
148
- 2. Numerical formatting (e.g., 1,000)
149
  3. Data references
150
- 4. Error checking
151
 
152
- Return in Markdown format.
153
  """
154
 
155
  return agent.run(analysis_prompt)
@@ -162,11 +135,10 @@ with gr.Blocks() as demo:
162
  with gr.Group() as upload_group:
163
  gr.Markdown("""
164
  # Text Data Analyzer
165
- Upload text documents containing structured information:
166
- - Reports - Logs - Research data - Meeting notes
167
  """)
168
  file_input = gr.File(
169
- label="Upload Text File (.txt)",
170
  file_types=[".txt"],
171
  type="filepath"
172
  )
@@ -180,12 +152,12 @@ with gr.Blocks() as demo:
180
  with gr.Column(scale=2):
181
  gr.Markdown("### Extracted Data Preview")
182
  data_table = gr.Dataframe(
183
- max_rows=10,
184
- wrap=True,
185
- interactive=False
186
  )
187
  schema_display = gr.Markdown()
188
- refresh_btn = gr.Button("Refresh Data View")
189
 
190
  # Event handlers
191
  file_input.upload(
 
12
  get_table_schema
13
  )
14
 
15
+ # Initialize the AI agent with required parameters
16
  agent = CodeAgent(
17
  tools=[],
18
  model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
 
42
  return pd.DataFrame({"Error": [str(e)]})
43
 
44
  def process_txt_file(file_path):
45
+ """Analyze text file and convert to structured table"""
46
  try:
47
  with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
48
  content = f.read()
49
 
50
+ # Structure detection with enhanced prompts
51
  structure_prompt = f"""
52
  Convert this text into valid CSV format:
53
  {content}
 
55
  Requirements:
56
  1. First row must be headers
57
  2. Consistent columns per row
58
+ 3. Quote fields containing commas
59
+ 4. Maintain original data relationships
 
60
 
61
  Return ONLY the CSV content.
62
  """
63
  csv_output = agent.run(structure_prompt)
64
 
65
+ # Robust CSV parsing
66
  try:
67
  df = pd.read_csv(
68
  StringIO(csv_output),
69
  on_bad_lines='warn',
70
+ dtype=str,
71
+ encoding_errors='ignore'
 
72
  ).dropna(how='all')
73
  except pd.errors.ParserError as pe:
74
  return False, f"CSV Parsing Error: {str(pe)}", pd.DataFrame()
75
 
76
+ if df.empty or len(df.columns) == 0:
77
  return False, "No structured data found", pd.DataFrame()
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # Database operations
80
  clear_database()
81
  table = create_dynamic_table(df)
82
  insert_rows_into_table(df.to_dict('records'), table)
83
 
84
+ return True, "Text analyzed successfully!", df.head(10)
85
 
86
  except Exception as e:
87
  return False, f"Processing error: {str(e)}", pd.DataFrame()
 
94
  success, message, df = process_txt_file(file_obj)
95
  if success:
96
  column_info = {col: {'type': 'text'} for col in df.columns}
97
+ schema = "\n".join([f"- {col} (text)" for col in df.columns])
98
  return (
99
  message,
100
+ df,
101
  f"### Detected Schema:\n```\n{schema}\n```",
102
  gr.update(visible=False),
103
  gr.update(visible=True)
 
105
  return message, None, "No schema", gr.update(visible=True), gr.update(visible=False)
106
 
107
  def query_analysis(user_query: str) -> str:
108
+ """Handle natural language queries about the data"""
109
  try:
110
  df = get_data_table()
111
  if df.empty:
 
117
 
118
  Question: {user_query}
119
 
120
+ Provide:
121
+ 1. Direct answer
122
+ 2. Numerical formatting
123
  3. Data references
 
124
 
125
+ Use Markdown formatting.
126
  """
127
 
128
  return agent.run(analysis_prompt)
 
135
  with gr.Group() as upload_group:
136
  gr.Markdown("""
137
  # Text Data Analyzer
138
+ Upload unstructured text files to analyze and query their data
 
139
  """)
140
  file_input = gr.File(
141
+ label="Upload Text File",
142
  file_types=[".txt"],
143
  type="filepath"
144
  )
 
152
  with gr.Column(scale=2):
153
  gr.Markdown("### Extracted Data Preview")
154
  data_table = gr.Dataframe(
155
+ label="Structured Data",
156
+ interactive=False,
157
+ height=400
158
  )
159
  schema_display = gr.Markdown()
160
+ refresh_btn = gr.Button("Refresh View")
161
 
162
  # Event handlers
163
  file_input.upload(