omdivyatej's picture
Update app.py
a7b12e1
raw
history blame
2.41 kB
# app.py
import gradio as gr
import pandas as pd # Import pandas
from ocr_request import ocr_request
import os
from dotenv import load_dotenv
import openai
import json
def process_file(files):
response_arr = []
# Send the uploaded file to the function from ocr_request.py
for file in files:
response = ocr_request(file.name)
response_arr.append(response)
print("Main file :", response_arr)
load_dotenv()
# Initialize OpenAI with your API key
openai.api_key = os.getenv("OPENAI_API_KEY")
prompt =f"""
you are an excellent programmer and an anlyst. Given a json array or a json, you need to analyse it and convert into a json format which can be converted in dataframe of pandas easily.
You have a singular task :
Once you have thought through, produce a json, easily convertible to a dataframe in python, which would contain invoice number, product description, predicted material, confidence.
Remember:You just have to share the output json, NO thought process or extra words or anything else.
If it is a nested structure, flatten it. ONLY JSON should be in the output, not json within a list.
Here is the json array/json : {json.dumps(response_arr)}
"""
messages=[{"role": "user", "content":prompt}]
# Use OpenAI to generate a completion using GPT-4 (replace 'gpt-4.0-turbo' with the correct engine ID once available)
response = openai.ChatCompletion.create(
model="gpt-4",
max_tokens=5000,
temperature=0,
messages = messages
)
# Extracting the result
result = response.choices[0]["message"]["content"]
print(result)
print("After in min gpt")
print(json.loads(result))
df = pd.DataFrame(json.loads(result))
# df = pd.DataFrame(flat_list)
print("Df final : ", df)
# Save the dataframe to a CSV in-memory
result_csv = df.to_csv(index=False)
csv_filename = "categories.csv"
with open(csv_filename, "w") as f:
f.write(result_csv)
return df,csv_filename # Gradio will display this as a table
interface = gr.Interface(fn=process_file,
inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
outputs=["dataframe",gr.outputs.File(label="Download CSV")]) # Specify "dataframe" as output type
interface.launch(share=True)