TXTAgent

Sleeping

App Files Files Community

Quazim0t0 commited on 19 days ago

Commit

e1e2089

verified ·

1 Parent(s): 8550c12

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -46

app.py CHANGED Viewed

@@ -12,9 +12,9 @@ from database import (
     get_table_schema
 )
-# Initialize the AI agent
 agent = CodeAgent(
-    tools=[],  # Required parameter even if empty
     model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
 )
@@ -30,7 +30,7 @@ def get_data_table():
             return pd.DataFrame()
         table_name = tables[0][0]
         with engine.connect() as con:
             result = con.execute(text(f"SELECT * FROM {table_name}"))
             rows = result.fetchall()
@@ -42,35 +42,67 @@ def get_data_table():
         return pd.DataFrame({"Error": [str(e)]})
 def process_txt_file(file_path):
-    """Analyze text file and convert to structured table"""
     try:
-        with open(file_path, 'r') as f:
             content = f.read()
-        # First pass: Structure detection
         structure_prompt = f"""
-        Analyze this text and convert it into a structured table format:
         {content}
-        Return ONLY valid CSV format with appropriate headers.
-        Maintain original data types and relationships.
         """
         csv_output = agent.run(structure_prompt)
-        # Convert to DataFrame
-        df = pd.read_csv(StringIO(csv_output))
-        # Second pass: Data validation
-        validation_prompt = f"""
-        Validate this structured data:
         {df.head().to_csv()}
-        Fix any formatting issues and return corrected CSV.
         """
-        corrected_csv = agent.run(validation_prompt)
-        df = pd.read_csv(StringIO(corrected_csv))
-        # Clear existing data and create new table
         clear_database()
         table = create_dynamic_table(df)
         insert_rows_into_table(df.to_dict('records'), table)
@@ -78,7 +110,7 @@ def process_txt_file(file_path):
         return True, "Text analyzed successfully!", df
     except Exception as e:
-        return False, f"Error: {str(e)}", pd.DataFrame()
 def handle_upload(file_obj):
     """Handle file upload and processing"""
@@ -87,11 +119,11 @@ def handle_upload(file_obj):
     success, message, df = process_txt_file(file_obj)
     if success:
-        column_info = {col: {'type': str(df[col].dtype)} for col in df.columns}
-        schema = "\n".join([f"- {col} ({info['type']})" for col, info in column_info.items()])
         return (
             message,
-            df,
             f"### Detected Schema:\n```\n{schema}\n```",
             gr.update(visible=False),
             gr.update(visible=True)
@@ -99,47 +131,45 @@ def handle_upload(file_obj):
     return message, None, "No schema", gr.update(visible=True), gr.update(visible=False)
 def query_analysis(user_query: str) -> str:
-    """Handle natural language queries about the data"""
     try:
         df = get_data_table()
         if df.empty:
-            return "No data available. Upload a text file first."
         analysis_prompt = f"""
-        Analyze this dataset:
         {df.head().to_csv()}
         Question: {user_query}
-        Provide a detailed answer considering:
-        - Data patterns and relationships
-        - Statistical measures where applicable
-        - Clear numerical formatting
-        - Natural language explanations
-        Structure your response with:
         1. Direct answer first
-        2. Supporting analysis
         3. Data references
         """
         return agent.run(analysis_prompt)
     except Exception as e:
-        return f"Analysis error: {str(e)}"
-# Create Gradio interface
 with gr.Blocks() as demo:
     with gr.Group() as upload_group:
         gr.Markdown("""
         # Text Data Analyzer
-        Upload any text document containing structured information:
-        - Reports
-        - Log files
-        - Research data
-        - Meeting notes with tabular content
         """)
-        file_input = gr.File(label="Upload Text File", file_types=[".txt"], type="filepath")
         status = gr.Textbox(label="Processing Status", interactive=False)
     with gr.Group(visible=False) as query_group:
@@ -149,9 +179,13 @@ with gr.Blocks() as demo:
                 query_output = gr.Markdown(label="Analysis Results")
             with gr.Column(scale=2):
                 gr.Markdown("### Extracted Data Preview")
-                data_table = gr.Dataframe(interactive=False)
         schema_display = gr.Markdown()
-        refresh_btn = gr.Button("Refresh View")
     # Event handlers
     file_input.upload(
@@ -167,9 +201,13 @@ with gr.Blocks() as demo:
     )
     refresh_btn.click(
-        fn=lambda: (get_data_table(), "Schema refreshed"),
         outputs=[data_table, schema_display]
     )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

     get_table_schema
 )
+# Initialize the AI agent with empty tools list
 agent = CodeAgent(
+    tools=[],
     model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
 )
             return pd.DataFrame()
         table_name = tables[0][0]
         with engine.connect() as con:
             result = con.execute(text(f"SELECT * FROM {table_name}"))
             rows = result.fetchall()
         return pd.DataFrame({"Error": [str(e)]})
 def process_txt_file(file_path):
+    """Analyze text file and convert to structured table with enhanced error handling"""
     try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
             content = f.read()
+        # Structure detection with strict CSV formatting rules
         structure_prompt = f"""
+        Convert this text into valid CSV format:
         {content}
+        Requirements:
+        1. First row must be headers
+        2. Consistent columns per row
+        3. Quote fields with commas
+        4. Maintain original data types
+        5. Handle missing values as 'N/A'
+        Return ONLY the CSV content.
         """
         csv_output = agent.run(structure_prompt)
+        # Flexible CSV parsing
+        try:
+            df = pd.read_csv(
+                StringIO(csv_output),
+                on_bad_lines='warn',
+                quotechar='"',
+                encoding_errors='ignore',
+                dtype=str
+            ).dropna(how='all')
+        except pd.errors.ParserError as pe:
+            return False, f"CSV Parsing Error: {str(pe)}", pd.DataFrame()
+        if df.empty or len(df.columns) < 1:
+            return False, "No structured data found", pd.DataFrame()
+        # Data validation and correction
+        correction_prompt = f"""
+        Clean and validate this data:
         {df.head().to_csv()}
+        Fix:
+        1. Column consistency
+        2. Data type mismatches
+        3. Formatting errors
+        4. Missing values
+        Return corrected CSV.
         """
+        corrected_csv = agent.run(correction_prompt)
+        try:
+            df = pd.read_csv(
+                StringIO(corrected_csv),
+                keep_default_na=False,
+                dtype=str
+            )
+        except Exception as e:
+            return False, f"Validation failed: {str(e)}", pd.DataFrame()
+        # Database operations
         clear_database()
         table = create_dynamic_table(df)
         insert_rows_into_table(df.to_dict('records'), table)
         return True, "Text analyzed successfully!", df
     except Exception as e:
+        return False, f"Processing error: {str(e)}", pd.DataFrame()
 def handle_upload(file_obj):
     """Handle file upload and processing"""
     success, message, df = process_txt_file(file_obj)
     if success:
+        column_info = {col: {'type': 'text'} for col in df.columns}
+        schema = "\n".join([f"- {col} ({'text'})" for col in df.columns])
         return (
             message,
+            df.head(10),
             f"### Detected Schema:\n```\n{schema}\n```",
             gr.update(visible=False),
             gr.update(visible=True)
     return message, None, "No schema", gr.update(visible=True), gr.update(visible=False)
 def query_analysis(user_query: str) -> str:
+    """Handle natural language queries with data context"""
     try:
         df = get_data_table()
         if df.empty:
+            return "Please upload and process a file first."
         analysis_prompt = f"""
+        Analyze this data:
         {df.head().to_csv()}
         Question: {user_query}
+        Response requirements:
         1. Direct answer first
+        2. Numerical formatting (e.g., 1,000)
         3. Data references
+        4. Error checking
+        Return in Markdown format.
         """
         return agent.run(analysis_prompt)
     except Exception as e:
+        return f"Query error: {str(e)}"
+# Gradio interface setup
 with gr.Blocks() as demo:
     with gr.Group() as upload_group:
         gr.Markdown("""
         # Text Data Analyzer
+        Upload text documents containing structured information:
+        - Reports - Logs - Research data - Meeting notes
         """)
+        file_input = gr.File(
+            label="Upload Text File (.txt)",
+            file_types=[".txt"],
+            type="filepath"
+        )
         status = gr.Textbox(label="Processing Status", interactive=False)
     with gr.Group(visible=False) as query_group:
                 query_output = gr.Markdown(label="Analysis Results")
             with gr.Column(scale=2):
                 gr.Markdown("### Extracted Data Preview")
+                data_table = gr.Dataframe(
+                    max_rows=10,
+                    wrap=True,
+                    interactive=False
+                )
         schema_display = gr.Markdown()
+        refresh_btn = gr.Button("Refresh Data View")
     # Event handlers
     file_input.upload(
     )
     refresh_btn.click(
+        fn=lambda: (get_data_table().head(10), "Schema refreshed"),
         outputs=[data_table, schema_display]
     )
 if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )