File size: 6,433 Bytes
6a0ec6a 237bccb f3a5662 1767e22 6d4e0a3 f3a5662 237bccb f3a5662 237bccb 91561ce 7ee1156 6d4e0a3 e1e2089 6d4e0a3 a573881 6d4e0a3 a573881 a5b666f a573881 a5b666f a573881 e1e2089 237bccb 6d4e0a3 a5b666f a573881 6d4e0a3 04b6f60 08d132d e1e2089 6d4e0a3 a5b666f 6d4e0a3 e1e2089 6d4e0a3 a5b666f e1e2089 04b6f60 e1e2089 6d4e0a3 a5b666f e1e2089 04b6f60 e1e2089 04b6f60 e1e2089 08d132d 6d4e0a3 a5b666f 04b6f60 a5b666f 08d132d e1e2089 237bccb 6d4e0a3 f3a5662 a5b666f 6d4e0a3 04b6f60 f3a5662 6d4e0a3 04b6f60 6d4e0a3 f3a5662 6d4e0a3 f3a5662 f776bb6 6d4e0a3 04b6f60 08d132d 6d4e0a3 e1e2089 a5b666f 6d4e0a3 e1e2089 6d4e0a3 a5b666f 6d4e0a3 a5b666f 04b6f60 6d4e0a3 e1e2089 04b6f60 6d4e0a3 a5b666f 6d4e0a3 a5b666f 6d4e0a3 e1e2089 811c7ec f3a5662 e1e2089 6d4e0a3 04b6f60 6d4e0a3 e1e2089 04b6f60 e1e2089 6d4e0a3 6d10b4f 91561ce 3df9eeb f3a5662 6d4e0a3 3df9eeb 6d4e0a3 e1e2089 04b6f60 7ee1156 e1e2089 f3a5662 6d4e0a3 04b6f60 237bccb d870c12 811c7ec 3df9eeb f3a5662 d870c12 f3a5662 6d4e0a3 a5b666f d870c12 3df9eeb e1e2089 3df9eeb 811c7ec f3a5662 6a0ec6a e1e2089 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
import os
import gradio as gr
from sqlalchemy import text
from smolagents import CodeAgent, HfApiModel
import pandas as pd
from io import StringIO
import tempfile
from database import (
engine,
create_dynamic_table,
clear_database,
insert_rows_into_table
)
# Initialize the AI agent
agent = CodeAgent(
tools=[],
model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
)
def get_data_table():
"""Fetch and return the current table data as DataFrame"""
try:
with engine.connect() as con:
tables = con.execute(text(
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
)).fetchall()
if not tables:
return pd.DataFrame()
table_name = tables[0][0]
with engine.connect() as con:
result = con.execute(text(f"SELECT * FROM {table_name}"))
rows = result.fetchall()
columns = result.keys()
return pd.DataFrame(rows, columns=columns) if rows else pd.DataFrame()
except Exception as e:
return pd.DataFrame({"Error": [str(e)]})
def process_txt_file(file_path):
"""Analyze text file and convert to structured table"""
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
structure_prompt = f"""
Convert this text into valid CSV format:
{content}
Requirements:
1. First row must be headers
2. Consistent columns per row
3. Quote fields containing commas
4. Maintain original data relationships
Return ONLY the CSV content.
"""
csv_output = agent.run(structure_prompt)
try:
df = pd.read_csv(
StringIO(csv_output),
on_bad_lines='warn',
dtype=str,
encoding_errors='ignore'
).dropna(how='all')
except pd.errors.ParserError as pe:
return False, f"CSV Parsing Error: {str(pe)}", pd.DataFrame()
if df.empty or len(df.columns) == 0:
return False, "No structured data found", pd.DataFrame()
clear_database()
table = create_dynamic_table(df)
insert_rows_into_table(df.to_dict('records'), table)
return True, "Text analyzed successfully!", df.head(10)
except Exception as e:
return False, f"Processing error: {str(e)}", pd.DataFrame()
def handle_upload(file_obj):
"""Handle file upload and processing"""
if file_obj is None:
return [
"Please upload a text file.",
None,
"No schema",
gr.update(visible=True),
gr.update(visible=False),
gr.update(visible=False)
]
success, message, df = process_txt_file(file_obj)
if success:
schema = "\n".join([f"- {col} (text)" for col in df.columns])
return [
message,
df,
f"### Detected Schema:\n```\n{schema}\n```",
gr.update(visible=False),
gr.update(visible=True),
gr.update(visible=True)
]
return [
message,
None,
"No schema",
gr.update(visible=True),
gr.update(visible=False),
gr.update(visible=False)
]
def query_analysis(user_query: str) -> str:
"""Handle natural language queries about the data"""
try:
df = get_data_table()
if df.empty:
return "Please upload and process a file first."
analysis_prompt = f"""
Analyze this data:
{df.head().to_csv()}
Question: {user_query}
Provide:
1. Direct answer
2. Numerical formatting
3. Data references
Use Markdown formatting.
"""
return agent.run(analysis_prompt)
except Exception as e:
return f"Query error: {str(e)}"
def download_csv():
"""Generate CSV file for download"""
df = get_data_table()
if not df.empty:
temp_dir = tempfile.gettempdir()
file_path = os.path.join(temp_dir, "processed_data.csv")
df.to_csv(file_path, index=False)
return file_path
return None
# Gradio interface setup
with gr.Blocks() as demo:
with gr.Group() as upload_group:
gr.Markdown("""
# Text Data Analyzer
Upload unstructured text files to analyze and query their data
""")
file_input = gr.File(
label="Upload Text File",
file_types=[".txt"],
type="filepath"
)
status = gr.Textbox(label="Processing Status", interactive=False)
with gr.Group(visible=False) as query_group:
with gr.Row():
with gr.Column(scale=1):
with gr.Row():
user_input = gr.Textbox(label="Ask about the data", scale=4)
submit_btn = gr.Button("Submit", scale=1)
query_output = gr.Markdown(label="Analysis Results")
with gr.Column(scale=2):
gr.Markdown("### Extracted Data Preview")
data_table = gr.Dataframe(
label="Structured Data",
interactive=False
)
download_btn = gr.DownloadButton(
"Download as CSV",
visible=False
)
schema_display = gr.Markdown()
refresh_btn = gr.Button("Refresh View")
# Event handlers
file_input.upload(
fn=handle_upload,
inputs=file_input,
outputs=[status, data_table, schema_display, upload_group, query_group, download_btn]
)
submit_btn.click(
fn=query_analysis,
inputs=user_input,
outputs=query_output
)
user_input.submit(
fn=query_analysis,
inputs=user_input,
outputs=query_output
)
refresh_btn.click(
fn=lambda: (get_data_table().head(10), "Schema refreshed"),
outputs=[data_table, schema_display]
)
download_btn.click(
fn=download_csv,
outputs=download_btn
)
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True
) |