Update app.py
Browse files
app.py
CHANGED
@@ -12,9 +12,9 @@ from database import (
|
|
12 |
get_table_schema
|
13 |
)
|
14 |
|
15 |
-
# Initialize the AI agent
|
16 |
agent = CodeAgent(
|
17 |
-
tools=[],
|
18 |
model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
|
19 |
)
|
20 |
|
@@ -30,7 +30,7 @@ def get_data_table():
|
|
30 |
return pd.DataFrame()
|
31 |
|
32 |
table_name = tables[0][0]
|
33 |
-
|
34 |
with engine.connect() as con:
|
35 |
result = con.execute(text(f"SELECT * FROM {table_name}"))
|
36 |
rows = result.fetchall()
|
@@ -42,35 +42,67 @@ def get_data_table():
|
|
42 |
return pd.DataFrame({"Error": [str(e)]})
|
43 |
|
44 |
def process_txt_file(file_path):
|
45 |
-
"""Analyze text file and convert to structured table"""
|
46 |
try:
|
47 |
-
with open(file_path, 'r') as f:
|
48 |
content = f.read()
|
49 |
|
50 |
-
#
|
51 |
structure_prompt = f"""
|
52 |
-
|
53 |
{content}
|
54 |
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
"""
|
58 |
csv_output = agent.run(structure_prompt)
|
59 |
|
60 |
-
#
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
{df.head().to_csv()}
|
67 |
|
68 |
-
Fix
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
"""
|
70 |
-
corrected_csv = agent.run(
|
71 |
-
df = pd.read_csv(StringIO(corrected_csv))
|
72 |
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
clear_database()
|
75 |
table = create_dynamic_table(df)
|
76 |
insert_rows_into_table(df.to_dict('records'), table)
|
@@ -78,7 +110,7 @@ def process_txt_file(file_path):
|
|
78 |
return True, "Text analyzed successfully!", df
|
79 |
|
80 |
except Exception as e:
|
81 |
-
return False, f"
|
82 |
|
83 |
def handle_upload(file_obj):
|
84 |
"""Handle file upload and processing"""
|
@@ -87,11 +119,11 @@ def handle_upload(file_obj):
|
|
87 |
|
88 |
success, message, df = process_txt_file(file_obj)
|
89 |
if success:
|
90 |
-
column_info = {col: {'type':
|
91 |
-
schema = "\n".join([f"- {col} ({
|
92 |
return (
|
93 |
message,
|
94 |
-
df,
|
95 |
f"### Detected Schema:\n```\n{schema}\n```",
|
96 |
gr.update(visible=False),
|
97 |
gr.update(visible=True)
|
@@ -99,47 +131,45 @@ def handle_upload(file_obj):
|
|
99 |
return message, None, "No schema", gr.update(visible=True), gr.update(visible=False)
|
100 |
|
101 |
def query_analysis(user_query: str) -> str:
|
102 |
-
"""Handle natural language queries
|
103 |
try:
|
104 |
df = get_data_table()
|
105 |
if df.empty:
|
106 |
-
return "
|
107 |
|
108 |
analysis_prompt = f"""
|
109 |
-
Analyze this
|
110 |
{df.head().to_csv()}
|
111 |
|
112 |
Question: {user_query}
|
113 |
|
114 |
-
|
115 |
-
- Data patterns and relationships
|
116 |
-
- Statistical measures where applicable
|
117 |
-
- Clear numerical formatting
|
118 |
-
- Natural language explanations
|
119 |
-
|
120 |
-
Structure your response with:
|
121 |
1. Direct answer first
|
122 |
-
2.
|
123 |
3. Data references
|
|
|
|
|
|
|
124 |
"""
|
125 |
|
126 |
return agent.run(analysis_prompt)
|
127 |
|
128 |
except Exception as e:
|
129 |
-
return f"
|
130 |
|
131 |
-
#
|
132 |
with gr.Blocks() as demo:
|
133 |
with gr.Group() as upload_group:
|
134 |
gr.Markdown("""
|
135 |
# Text Data Analyzer
|
136 |
-
Upload
|
137 |
-
- Reports
|
138 |
-
- Log files
|
139 |
-
- Research data
|
140 |
-
- Meeting notes with tabular content
|
141 |
""")
|
142 |
-
file_input = gr.File(
|
|
|
|
|
|
|
|
|
143 |
status = gr.Textbox(label="Processing Status", interactive=False)
|
144 |
|
145 |
with gr.Group(visible=False) as query_group:
|
@@ -149,9 +179,13 @@ with gr.Blocks() as demo:
|
|
149 |
query_output = gr.Markdown(label="Analysis Results")
|
150 |
with gr.Column(scale=2):
|
151 |
gr.Markdown("### Extracted Data Preview")
|
152 |
-
data_table = gr.Dataframe(
|
|
|
|
|
|
|
|
|
153 |
schema_display = gr.Markdown()
|
154 |
-
refresh_btn = gr.Button("Refresh View")
|
155 |
|
156 |
# Event handlers
|
157 |
file_input.upload(
|
@@ -167,9 +201,13 @@ with gr.Blocks() as demo:
|
|
167 |
)
|
168 |
|
169 |
refresh_btn.click(
|
170 |
-
fn=lambda: (get_data_table(), "Schema refreshed"),
|
171 |
outputs=[data_table, schema_display]
|
172 |
)
|
173 |
|
174 |
if __name__ == "__main__":
|
175 |
-
demo.launch(
|
|
|
|
|
|
|
|
|
|
12 |
get_table_schema
|
13 |
)
|
14 |
|
15 |
+
# Initialize the AI agent with empty tools list
|
16 |
agent = CodeAgent(
|
17 |
+
tools=[],
|
18 |
model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
|
19 |
)
|
20 |
|
|
|
30 |
return pd.DataFrame()
|
31 |
|
32 |
table_name = tables[0][0]
|
33 |
+
|
34 |
with engine.connect() as con:
|
35 |
result = con.execute(text(f"SELECT * FROM {table_name}"))
|
36 |
rows = result.fetchall()
|
|
|
42 |
return pd.DataFrame({"Error": [str(e)]})
|
43 |
|
44 |
def process_txt_file(file_path):
|
45 |
+
"""Analyze text file and convert to structured table with enhanced error handling"""
|
46 |
try:
|
47 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
48 |
content = f.read()
|
49 |
|
50 |
+
# Structure detection with strict CSV formatting rules
|
51 |
structure_prompt = f"""
|
52 |
+
Convert this text into valid CSV format:
|
53 |
{content}
|
54 |
|
55 |
+
Requirements:
|
56 |
+
1. First row must be headers
|
57 |
+
2. Consistent columns per row
|
58 |
+
3. Quote fields with commas
|
59 |
+
4. Maintain original data types
|
60 |
+
5. Handle missing values as 'N/A'
|
61 |
+
|
62 |
+
Return ONLY the CSV content.
|
63 |
"""
|
64 |
csv_output = agent.run(structure_prompt)
|
65 |
|
66 |
+
# Flexible CSV parsing
|
67 |
+
try:
|
68 |
+
df = pd.read_csv(
|
69 |
+
StringIO(csv_output),
|
70 |
+
on_bad_lines='warn',
|
71 |
+
quotechar='"',
|
72 |
+
encoding_errors='ignore',
|
73 |
+
dtype=str
|
74 |
+
).dropna(how='all')
|
75 |
+
except pd.errors.ParserError as pe:
|
76 |
+
return False, f"CSV Parsing Error: {str(pe)}", pd.DataFrame()
|
77 |
+
|
78 |
+
if df.empty or len(df.columns) < 1:
|
79 |
+
return False, "No structured data found", pd.DataFrame()
|
80 |
+
|
81 |
+
# Data validation and correction
|
82 |
+
correction_prompt = f"""
|
83 |
+
Clean and validate this data:
|
84 |
{df.head().to_csv()}
|
85 |
|
86 |
+
Fix:
|
87 |
+
1. Column consistency
|
88 |
+
2. Data type mismatches
|
89 |
+
3. Formatting errors
|
90 |
+
4. Missing values
|
91 |
+
|
92 |
+
Return corrected CSV.
|
93 |
"""
|
94 |
+
corrected_csv = agent.run(correction_prompt)
|
|
|
95 |
|
96 |
+
try:
|
97 |
+
df = pd.read_csv(
|
98 |
+
StringIO(corrected_csv),
|
99 |
+
keep_default_na=False,
|
100 |
+
dtype=str
|
101 |
+
)
|
102 |
+
except Exception as e:
|
103 |
+
return False, f"Validation failed: {str(e)}", pd.DataFrame()
|
104 |
+
|
105 |
+
# Database operations
|
106 |
clear_database()
|
107 |
table = create_dynamic_table(df)
|
108 |
insert_rows_into_table(df.to_dict('records'), table)
|
|
|
110 |
return True, "Text analyzed successfully!", df
|
111 |
|
112 |
except Exception as e:
|
113 |
+
return False, f"Processing error: {str(e)}", pd.DataFrame()
|
114 |
|
115 |
def handle_upload(file_obj):
|
116 |
"""Handle file upload and processing"""
|
|
|
119 |
|
120 |
success, message, df = process_txt_file(file_obj)
|
121 |
if success:
|
122 |
+
column_info = {col: {'type': 'text'} for col in df.columns}
|
123 |
+
schema = "\n".join([f"- {col} ({'text'})" for col in df.columns])
|
124 |
return (
|
125 |
message,
|
126 |
+
df.head(10),
|
127 |
f"### Detected Schema:\n```\n{schema}\n```",
|
128 |
gr.update(visible=False),
|
129 |
gr.update(visible=True)
|
|
|
131 |
return message, None, "No schema", gr.update(visible=True), gr.update(visible=False)
|
132 |
|
133 |
def query_analysis(user_query: str) -> str:
|
134 |
+
"""Handle natural language queries with data context"""
|
135 |
try:
|
136 |
df = get_data_table()
|
137 |
if df.empty:
|
138 |
+
return "Please upload and process a file first."
|
139 |
|
140 |
analysis_prompt = f"""
|
141 |
+
Analyze this data:
|
142 |
{df.head().to_csv()}
|
143 |
|
144 |
Question: {user_query}
|
145 |
|
146 |
+
Response requirements:
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
1. Direct answer first
|
148 |
+
2. Numerical formatting (e.g., 1,000)
|
149 |
3. Data references
|
150 |
+
4. Error checking
|
151 |
+
|
152 |
+
Return in Markdown format.
|
153 |
"""
|
154 |
|
155 |
return agent.run(analysis_prompt)
|
156 |
|
157 |
except Exception as e:
|
158 |
+
return f"Query error: {str(e)}"
|
159 |
|
160 |
+
# Gradio interface setup
|
161 |
with gr.Blocks() as demo:
|
162 |
with gr.Group() as upload_group:
|
163 |
gr.Markdown("""
|
164 |
# Text Data Analyzer
|
165 |
+
Upload text documents containing structured information:
|
166 |
+
- Reports - Logs - Research data - Meeting notes
|
|
|
|
|
|
|
167 |
""")
|
168 |
+
file_input = gr.File(
|
169 |
+
label="Upload Text File (.txt)",
|
170 |
+
file_types=[".txt"],
|
171 |
+
type="filepath"
|
172 |
+
)
|
173 |
status = gr.Textbox(label="Processing Status", interactive=False)
|
174 |
|
175 |
with gr.Group(visible=False) as query_group:
|
|
|
179 |
query_output = gr.Markdown(label="Analysis Results")
|
180 |
with gr.Column(scale=2):
|
181 |
gr.Markdown("### Extracted Data Preview")
|
182 |
+
data_table = gr.Dataframe(
|
183 |
+
max_rows=10,
|
184 |
+
wrap=True,
|
185 |
+
interactive=False
|
186 |
+
)
|
187 |
schema_display = gr.Markdown()
|
188 |
+
refresh_btn = gr.Button("Refresh Data View")
|
189 |
|
190 |
# Event handlers
|
191 |
file_input.upload(
|
|
|
201 |
)
|
202 |
|
203 |
refresh_btn.click(
|
204 |
+
fn=lambda: (get_data_table().head(10), "Schema refreshed"),
|
205 |
outputs=[data_table, schema_display]
|
206 |
)
|
207 |
|
208 |
if __name__ == "__main__":
|
209 |
+
demo.launch(
|
210 |
+
server_name="0.0.0.0",
|
211 |
+
server_port=7860,
|
212 |
+
show_error=True
|
213 |
+
)
|