TXTAgent

Sleeping

App Files Files Community

Quazim0t0 commited on 19 days ago

Commit

04b6f60

verified ·

1 Parent(s): e1e2089

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -51

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ from database import (
     get_table_schema
 )
-# Initialize the AI agent with empty tools list
 agent = CodeAgent(
     tools=[],
     model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
@@ -42,12 +42,12 @@ def get_data_table():
         return pd.DataFrame({"Error": [str(e)]})
 def process_txt_file(file_path):
-    """Analyze text file and convert to structured table with enhanced error handling"""
     try:
         with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
             content = f.read()
-        # Structure detection with strict CSV formatting rules
         structure_prompt = f"""
         Convert this text into valid CSV format:
         {content}
@@ -55,59 +55,33 @@ def process_txt_file(file_path):
         Requirements:
         1. First row must be headers
         2. Consistent columns per row
-        3. Quote fields with commas
-        4. Maintain original data types
-        5. Handle missing values as 'N/A'
         Return ONLY the CSV content.
         """
         csv_output = agent.run(structure_prompt)
-        # Flexible CSV parsing
         try:
             df = pd.read_csv(
                 StringIO(csv_output),
                 on_bad_lines='warn',
-                quotechar='"',
-                encoding_errors='ignore',
-                dtype=str
             ).dropna(how='all')
         except pd.errors.ParserError as pe:
             return False, f"CSV Parsing Error: {str(pe)}", pd.DataFrame()
-        if df.empty or len(df.columns) < 1:
             return False, "No structured data found", pd.DataFrame()
-        # Data validation and correction
-        correction_prompt = f"""
-        Clean and validate this data:
-        {df.head().to_csv()}
-        Fix:
-        1. Column consistency
-        2. Data type mismatches
-        3. Formatting errors
-        4. Missing values
-        Return corrected CSV.
-        """
-        corrected_csv = agent.run(correction_prompt)
-        try:
-            df = pd.read_csv(
-                StringIO(corrected_csv),
-                keep_default_na=False,
-                dtype=str
-            )
-        except Exception as e:
-            return False, f"Validation failed: {str(e)}", pd.DataFrame()
         # Database operations
         clear_database()
         table = create_dynamic_table(df)
         insert_rows_into_table(df.to_dict('records'), table)
-        return True, "Text analyzed successfully!", df
     except Exception as e:
         return False, f"Processing error: {str(e)}", pd.DataFrame()
@@ -120,10 +94,10 @@ def handle_upload(file_obj):
     success, message, df = process_txt_file(file_obj)
     if success:
         column_info = {col: {'type': 'text'} for col in df.columns}
-        schema = "\n".join([f"- {col} ({'text'})" for col in df.columns])
         return (
             message,
-            df.head(10),
             f"### Detected Schema:\n```\n{schema}\n```",
             gr.update(visible=False),
             gr.update(visible=True)
@@ -131,7 +105,7 @@ def handle_upload(file_obj):
     return message, None, "No schema", gr.update(visible=True), gr.update(visible=False)
 def query_analysis(user_query: str) -> str:
-    """Handle natural language queries with data context"""
     try:
         df = get_data_table()
         if df.empty:
@@ -143,13 +117,12 @@ def query_analysis(user_query: str) -> str:
         Question: {user_query}
-        Response requirements:
-        1. Direct answer first
-        2. Numerical formatting (e.g., 1,000)
         3. Data references
-        4. Error checking
-        Return in Markdown format.
         """
         return agent.run(analysis_prompt)
@@ -162,11 +135,10 @@ with gr.Blocks() as demo:
     with gr.Group() as upload_group:
         gr.Markdown("""
         # Text Data Analyzer
-        Upload text documents containing structured information:
-        - Reports - Logs - Research data - Meeting notes
         """)
         file_input = gr.File(
-            label="Upload Text File (.txt)",
             file_types=[".txt"],
             type="filepath"
         )
@@ -180,12 +152,12 @@ with gr.Blocks() as demo:
             with gr.Column(scale=2):
                 gr.Markdown("### Extracted Data Preview")
                 data_table = gr.Dataframe(
-                    max_rows=10,
-                    wrap=True,
-                    interactive=False
                 )
         schema_display = gr.Markdown()
-        refresh_btn = gr.Button("Refresh Data View")
     # Event handlers
     file_input.upload(

     get_table_schema
 )
+# Initialize the AI agent with required parameters
 agent = CodeAgent(
     tools=[],
     model=HfApiModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"),
         return pd.DataFrame({"Error": [str(e)]})
 def process_txt_file(file_path):
+    """Analyze text file and convert to structured table"""
     try:
         with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
             content = f.read()
+        # Structure detection with enhanced prompts
         structure_prompt = f"""
         Convert this text into valid CSV format:
         {content}
         Requirements:
         1. First row must be headers
         2. Consistent columns per row
+        3. Quote fields containing commas
+        4. Maintain original data relationships
         Return ONLY the CSV content.
         """
         csv_output = agent.run(structure_prompt)
+        # Robust CSV parsing
         try:
             df = pd.read_csv(
                 StringIO(csv_output),
                 on_bad_lines='warn',
+                dtype=str,
+                encoding_errors='ignore'
             ).dropna(how='all')
         except pd.errors.ParserError as pe:
             return False, f"CSV Parsing Error: {str(pe)}", pd.DataFrame()
+        if df.empty or len(df.columns) == 0:
             return False, "No structured data found", pd.DataFrame()
         # Database operations
         clear_database()
         table = create_dynamic_table(df)
         insert_rows_into_table(df.to_dict('records'), table)
+        return True, "Text analyzed successfully!", df.head(10)
     except Exception as e:
         return False, f"Processing error: {str(e)}", pd.DataFrame()
     success, message, df = process_txt_file(file_obj)
     if success:
         column_info = {col: {'type': 'text'} for col in df.columns}
+        schema = "\n".join([f"- {col} (text)" for col in df.columns])
         return (
             message,
+            df,
             f"### Detected Schema:\n```\n{schema}\n```",
             gr.update(visible=False),
             gr.update(visible=True)
     return message, None, "No schema", gr.update(visible=True), gr.update(visible=False)
 def query_analysis(user_query: str) -> str:
+    """Handle natural language queries about the data"""
     try:
         df = get_data_table()
         if df.empty:
         Question: {user_query}
+        Provide:
+        1. Direct answer
+        2. Numerical formatting
         3. Data references
+        Use Markdown formatting.
         """
         return agent.run(analysis_prompt)
     with gr.Group() as upload_group:
         gr.Markdown("""
         # Text Data Analyzer
+        Upload unstructured text files to analyze and query their data
         """)
         file_input = gr.File(
+            label="Upload Text File",
             file_types=[".txt"],
             type="filepath"
         )
             with gr.Column(scale=2):
                 gr.Markdown("### Extracted Data Preview")
                 data_table = gr.Dataframe(
+                    label="Structured Data",
+                    interactive=False,
+                    height=400
                 )
         schema_display = gr.Markdown()
+        refresh_btn = gr.Button("Refresh View")
     # Event handlers
     file_input.upload(