Spaces:
Runtime error
Runtime error
# app.py | |
import gradio as gr | |
import pandas as pd # Import pandas | |
from ocr_request import ocr_request | |
import os | |
from dotenv import load_dotenv | |
import openai | |
import json | |
def process_file(files): | |
response_arr = [] | |
# Send the uploaded file to the function from ocr_request.py | |
for file in files: | |
response = ocr_request(file.name) | |
response_arr.append(response) | |
print("Main file :", response_arr) | |
load_dotenv() | |
# Initialize OpenAI with your API key | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
prompt =f""" | |
you are an excellent programmer and an anlyst. Given a json array or a json, you need to analyse it and convert into a json format which can be converted in dataframe of pandas easily. | |
You have a singular task : | |
Once you have thought through, produce a json, easily convertible to a dataframe in python, which would contain invoice number, product description, predicted material, confidence. | |
Remember:You just have to share the output json, NO thought process or extra words or anything else. | |
If it is a nested structure, flatten it. ONLY JSON should be in the output, not json within a list. | |
Here is the json array/json : {json.dumps(response_arr)} | |
""" | |
messages=[{"role": "user", "content":prompt}] | |
# Use OpenAI to generate a completion using GPT-4 (replace 'gpt-4.0-turbo' with the correct engine ID once available) | |
response = openai.ChatCompletion.create( | |
model="gpt-4", | |
max_tokens=5000, | |
temperature=0, | |
messages = messages | |
) | |
# Extracting the result | |
result = response.choices[0]["message"]["content"] | |
print(result) | |
print("After in min gpt") | |
print(json.loads(result)) | |
df = pd.DataFrame(json.loads(result)) | |
# df = pd.DataFrame(flat_list) | |
print("Df final : ", df) | |
# Save the dataframe to a CSV in-memory | |
result_csv = df.to_csv(index=False) | |
csv_filename = "categories.csv" | |
with open(csv_filename, "w") as f: | |
f.write(result_csv) | |
return df,csv_filename # Gradio will display this as a table | |
interface = gr.Interface(fn=process_file, | |
inputs=gr.inputs.File(label="Upload a File", file_count='multiple'), | |
outputs=["dataframe",gr.outputs.File(label="Download CSV")]) # Specify "dataframe" as output type | |
interface.launch(share=True) | |