Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ from database import (
|
|
12 |
get_table_schema
|
13 |
)
|
14 |
|
15 |
-
# Initialize the AI agent with
|
16 |
agent = CodeAgent(
|
17 |
tools=[],
|
18 |
model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
|
@@ -42,12 +42,12 @@ def get_data_table():
|
|
42 |
return pd.DataFrame({"Error": [str(e)]})
|
43 |
|
44 |
def process_txt_file(file_path):
|
45 |
-
"""Analyze text file and convert to structured table
|
46 |
try:
|
47 |
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
48 |
content = f.read()
|
49 |
|
50 |
-
# Structure detection with
|
51 |
structure_prompt = f"""
|
52 |
Convert this text into valid CSV format:
|
53 |
{content}
|
@@ -55,59 +55,33 @@ def process_txt_file(file_path):
|
|
55 |
Requirements:
|
56 |
1. First row must be headers
|
57 |
2. Consistent columns per row
|
58 |
-
3. Quote fields
|
59 |
-
4. Maintain original data
|
60 |
-
5. Handle missing values as 'N/A'
|
61 |
|
62 |
Return ONLY the CSV content.
|
63 |
"""
|
64 |
csv_output = agent.run(structure_prompt)
|
65 |
|
66 |
-
#
|
67 |
try:
|
68 |
df = pd.read_csv(
|
69 |
StringIO(csv_output),
|
70 |
on_bad_lines='warn',
|
71 |
-
|
72 |
-
encoding_errors='ignore'
|
73 |
-
dtype=str
|
74 |
).dropna(how='all')
|
75 |
except pd.errors.ParserError as pe:
|
76 |
return False, f"CSV Parsing Error: {str(pe)}", pd.DataFrame()
|
77 |
|
78 |
-
if df.empty or len(df.columns)
|
79 |
return False, "No structured data found", pd.DataFrame()
|
80 |
|
81 |
-
# Data validation and correction
|
82 |
-
correction_prompt = f"""
|
83 |
-
Clean and validate this data:
|
84 |
-
{df.head().to_csv()}
|
85 |
-
|
86 |
-
Fix:
|
87 |
-
1. Column consistency
|
88 |
-
2. Data type mismatches
|
89 |
-
3. Formatting errors
|
90 |
-
4. Missing values
|
91 |
-
|
92 |
-
Return corrected CSV.
|
93 |
-
"""
|
94 |
-
corrected_csv = agent.run(correction_prompt)
|
95 |
-
|
96 |
-
try:
|
97 |
-
df = pd.read_csv(
|
98 |
-
StringIO(corrected_csv),
|
99 |
-
keep_default_na=False,
|
100 |
-
dtype=str
|
101 |
-
)
|
102 |
-
except Exception as e:
|
103 |
-
return False, f"Validation failed: {str(e)}", pd.DataFrame()
|
104 |
-
|
105 |
# Database operations
|
106 |
clear_database()
|
107 |
table = create_dynamic_table(df)
|
108 |
insert_rows_into_table(df.to_dict('records'), table)
|
109 |
|
110 |
-
return True, "Text analyzed successfully!", df
|
111 |
|
112 |
except Exception as e:
|
113 |
return False, f"Processing error: {str(e)}", pd.DataFrame()
|
@@ -120,10 +94,10 @@ def handle_upload(file_obj):
|
|
120 |
success, message, df = process_txt_file(file_obj)
|
121 |
if success:
|
122 |
column_info = {col: {'type': 'text'} for col in df.columns}
|
123 |
-
schema = "\n".join([f"- {col} (
|
124 |
return (
|
125 |
message,
|
126 |
-
df
|
127 |
f"### Detected Schema:\n```\n{schema}\n```",
|
128 |
gr.update(visible=False),
|
129 |
gr.update(visible=True)
|
@@ -131,7 +105,7 @@ def handle_upload(file_obj):
|
|
131 |
return message, None, "No schema", gr.update(visible=True), gr.update(visible=False)
|
132 |
|
133 |
def query_analysis(user_query: str) -> str:
|
134 |
-
"""Handle natural language queries
|
135 |
try:
|
136 |
df = get_data_table()
|
137 |
if df.empty:
|
@@ -143,13 +117,12 @@ def query_analysis(user_query: str) -> str:
|
|
143 |
|
144 |
Question: {user_query}
|
145 |
|
146 |
-
|
147 |
-
1. Direct answer
|
148 |
-
2. Numerical formatting
|
149 |
3. Data references
|
150 |
-
4. Error checking
|
151 |
|
152 |
-
|
153 |
"""
|
154 |
|
155 |
return agent.run(analysis_prompt)
|
@@ -162,11 +135,10 @@ with gr.Blocks() as demo:
|
|
162 |
with gr.Group() as upload_group:
|
163 |
gr.Markdown("""
|
164 |
# Text Data Analyzer
|
165 |
-
Upload text
|
166 |
-
- Reports - Logs - Research data - Meeting notes
|
167 |
""")
|
168 |
file_input = gr.File(
|
169 |
-
label="Upload Text File
|
170 |
file_types=[".txt"],
|
171 |
type="filepath"
|
172 |
)
|
@@ -180,12 +152,12 @@ with gr.Blocks() as demo:
|
|
180 |
with gr.Column(scale=2):
|
181 |
gr.Markdown("### Extracted Data Preview")
|
182 |
data_table = gr.Dataframe(
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
)
|
187 |
schema_display = gr.Markdown()
|
188 |
-
refresh_btn = gr.Button("Refresh
|
189 |
|
190 |
# Event handlers
|
191 |
file_input.upload(
|
|
|
12 |
get_table_schema
|
13 |
)
|
14 |
|
15 |
+
# Initialize the AI agent with required parameters
|
16 |
agent = CodeAgent(
|
17 |
tools=[],
|
18 |
model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
|
|
|
42 |
return pd.DataFrame({"Error": [str(e)]})
|
43 |
|
44 |
def process_txt_file(file_path):
|
45 |
+
"""Analyze text file and convert to structured table"""
|
46 |
try:
|
47 |
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
48 |
content = f.read()
|
49 |
|
50 |
+
# Structure detection with enhanced prompts
|
51 |
structure_prompt = f"""
|
52 |
Convert this text into valid CSV format:
|
53 |
{content}
|
|
|
55 |
Requirements:
|
56 |
1. First row must be headers
|
57 |
2. Consistent columns per row
|
58 |
+
3. Quote fields containing commas
|
59 |
+
4. Maintain original data relationships
|
|
|
60 |
|
61 |
Return ONLY the CSV content.
|
62 |
"""
|
63 |
csv_output = agent.run(structure_prompt)
|
64 |
|
65 |
+
# Robust CSV parsing
|
66 |
try:
|
67 |
df = pd.read_csv(
|
68 |
StringIO(csv_output),
|
69 |
on_bad_lines='warn',
|
70 |
+
dtype=str,
|
71 |
+
encoding_errors='ignore'
|
|
|
72 |
).dropna(how='all')
|
73 |
except pd.errors.ParserError as pe:
|
74 |
return False, f"CSV Parsing Error: {str(pe)}", pd.DataFrame()
|
75 |
|
76 |
+
if df.empty or len(df.columns) == 0:
|
77 |
return False, "No structured data found", pd.DataFrame()
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
# Database operations
|
80 |
clear_database()
|
81 |
table = create_dynamic_table(df)
|
82 |
insert_rows_into_table(df.to_dict('records'), table)
|
83 |
|
84 |
+
return True, "Text analyzed successfully!", df.head(10)
|
85 |
|
86 |
except Exception as e:
|
87 |
return False, f"Processing error: {str(e)}", pd.DataFrame()
|
|
|
94 |
success, message, df = process_txt_file(file_obj)
|
95 |
if success:
|
96 |
column_info = {col: {'type': 'text'} for col in df.columns}
|
97 |
+
schema = "\n".join([f"- {col} (text)" for col in df.columns])
|
98 |
return (
|
99 |
message,
|
100 |
+
df,
|
101 |
f"### Detected Schema:\n```\n{schema}\n```",
|
102 |
gr.update(visible=False),
|
103 |
gr.update(visible=True)
|
|
|
105 |
return message, None, "No schema", gr.update(visible=True), gr.update(visible=False)
|
106 |
|
107 |
def query_analysis(user_query: str) -> str:
|
108 |
+
"""Handle natural language queries about the data"""
|
109 |
try:
|
110 |
df = get_data_table()
|
111 |
if df.empty:
|
|
|
117 |
|
118 |
Question: {user_query}
|
119 |
|
120 |
+
Provide:
|
121 |
+
1. Direct answer
|
122 |
+
2. Numerical formatting
|
123 |
3. Data references
|
|
|
124 |
|
125 |
+
Use Markdown formatting.
|
126 |
"""
|
127 |
|
128 |
return agent.run(analysis_prompt)
|
|
|
135 |
with gr.Group() as upload_group:
|
136 |
gr.Markdown("""
|
137 |
# Text Data Analyzer
|
138 |
+
Upload unstructured text files to analyze and query their data
|
|
|
139 |
""")
|
140 |
file_input = gr.File(
|
141 |
+
label="Upload Text File",
|
142 |
file_types=[".txt"],
|
143 |
type="filepath"
|
144 |
)
|
|
|
152 |
with gr.Column(scale=2):
|
153 |
gr.Markdown("### Extracted Data Preview")
|
154 |
data_table = gr.Dataframe(
|
155 |
+
label="Structured Data",
|
156 |
+
interactive=False,
|
157 |
+
height=400
|
158 |
)
|
159 |
schema_display = gr.Markdown()
|
160 |
+
refresh_btn = gr.Button("Refresh View")
|
161 |
|
162 |
# Event handlers
|
163 |
file_input.upload(
|