Spaces:

omdivyatej
/

general_invoice_parser

Runtime error

File size: 2,412 Bytes

# app.py
import gradio as gr
import pandas as pd  # Import pandas
from ocr_request import ocr_request
import os
from dotenv import load_dotenv
import openai
import json

def process_file(files):
    response_arr = []
    # Send the uploaded file to the function from ocr_request.py
    for file in files:
        response = ocr_request(file.name)
        response_arr.append(response)

    print("Main file :", response_arr)    
 
    load_dotenv()
    # Initialize OpenAI with your API key
    openai.api_key = os.getenv("OPENAI_API_KEY")

    prompt =f"""
    you are an excellent programmer and an anlyst. Given a json array or a json, you need to analyse it and convert into a json format which can be converted in dataframe of pandas easily. 
    You have a singular task : 
    Once you have thought through, produce a json, easily convertible to a dataframe in python, which would contain invoice number, product description, predicted material, confidence. 
    Remember:You just have to share the output json, NO thought process or extra words or anything else.
    If it is a nested structure, flatten it. ONLY JSON should be in the output, not json within a list.
    

    Here is the json array/json : {json.dumps(response_arr)}
    """
    messages=[{"role": "user", "content":prompt}]
    # Use OpenAI to generate a completion using GPT-4 (replace 'gpt-4.0-turbo' with the correct engine ID once available)
    response = openai.ChatCompletion.create(
        model="gpt-4",
        max_tokens=5000,
        temperature=0,
        messages = messages
    )    
    # Extracting the result
    result = response.choices[0]["message"]["content"]
    print(result)
    print("After in min gpt")
    print(json.loads(result))

    df = pd.DataFrame(json.loads(result))
    # df = pd.DataFrame(flat_list)

    print("Df final : ", df)
    # Save the dataframe to a CSV in-memory
    
    result_csv = df.to_csv(index=False)   
    
    csv_filename = "categories.csv"
    with open(csv_filename, "w") as f:
        f.write(result_csv)
    
    return df,csv_filename  # Gradio will display this as a table



interface = gr.Interface(fn=process_file, 
                         inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
                         outputs=["dataframe",gr.outputs.File(label="Download CSV")])  # Specify "dataframe" as output type

interface.launch(share=True)