Spaces:
Runtime error
Runtime error
Commit
·
c8ec340
1
Parent(s):
6277da1
update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,10 @@
|
|
2 |
import gradio as gr
|
3 |
import pandas as pd # Import pandas
|
4 |
from ocr_request import ocr_request
|
5 |
-
import
|
|
|
|
|
|
|
6 |
|
7 |
def process_file(files):
|
8 |
response_arr = []
|
@@ -14,28 +17,52 @@ def process_file(files):
|
|
14 |
print("Main file :", response_arr)
|
15 |
|
16 |
#i= [[{'invoice_number': '349136', 'product_description': '1ST FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '3495565136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}],[{'invoice_number': '349136', 'product_description': ' FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}]]
|
17 |
-
flat_list = []
|
18 |
|
19 |
-
for item in response_arr:
|
20 |
-
|
21 |
|
22 |
-
|
23 |
-
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
print("Df final : ", df)
|
41 |
# Save the dataframe to a CSV in-memory
|
@@ -54,6 +81,6 @@ interface = gr.Interface(fn=process_file,
|
|
54 |
inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
|
55 |
outputs=["dataframe",gr.outputs.File(label="Download CSV")]) # Specify "dataframe" as output type
|
56 |
|
57 |
-
interface.launch()
|
58 |
|
59 |
|
|
|
2 |
import gradio as gr
|
3 |
import pandas as pd # Import pandas
|
4 |
from ocr_request import ocr_request
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
import openai
|
8 |
+
import json
|
9 |
|
10 |
def process_file(files):
|
11 |
response_arr = []
|
|
|
17 |
print("Main file :", response_arr)
|
18 |
|
19 |
#i= [[{'invoice_number': '349136', 'product_description': '1ST FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '3495565136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}],[{'invoice_number': '349136', 'product_description': ' FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}]]
|
20 |
+
# flat_list = []
|
21 |
|
22 |
+
# for item in response_arr:
|
23 |
+
# invoice_number = item['invoice_number']
|
24 |
|
25 |
+
# # Extracting product descriptions
|
26 |
+
# products = item.get('predictions', []) or item.get('product_description', [])
|
27 |
|
28 |
+
# for product in products:
|
29 |
+
# # Rename 'description' key to 'product_description' for uniformity across all products
|
30 |
+
# product_description = product.get('product_description', product.get('description'))
|
31 |
+
# predicted_material = product['predicted_material']
|
32 |
+
# confidence = product['confidence']
|
33 |
|
34 |
+
# flat_list.append({
|
35 |
+
# 'invoice_number': invoice_number,
|
36 |
+
# 'product_description': product_description,
|
37 |
+
# 'predicted_material': predicted_material,
|
38 |
+
# 'confidence': confidence
|
39 |
+
# })
|
40 |
|
41 |
+
load_dotenv()
|
42 |
+
# Initialize OpenAI with your API key
|
43 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
44 |
+
|
45 |
+
prompt =f"""
|
46 |
+
you are an excellent programmer and an anlyst. Given a json array or a json, you need to analyse it and convert into a json format which can be converted in dataframe of pandas easily. You have a singular task :
|
47 |
+
Once you have thought through, produce a json, easily convertible to a dataframe in python, which would contain invoice number, product description, predicted material, confidence. Remember: You just have to share the o/p json, no thought process or anything else.
|
48 |
+
|
49 |
+
Here is the json array/json : {json.dumps(response_arr)}
|
50 |
+
"""
|
51 |
+
messages=[{"role": "user", "content":prompt}]
|
52 |
+
# Use OpenAI to generate a completion using GPT-4 (replace 'gpt-4.0-turbo' with the correct engine ID once available)
|
53 |
+
response = openai.ChatCompletion.create(
|
54 |
+
model="gpt-4",
|
55 |
+
max_tokens=5000,
|
56 |
+
temperature=0,
|
57 |
+
messages = messages
|
58 |
+
)
|
59 |
+
# Extracting the result
|
60 |
+
result = response.choices[0]["message"]["content"]
|
61 |
+
print("After in min gpt")
|
62 |
+
print(json.loads(result))
|
63 |
+
|
64 |
+
df = pd.DataFrame(json.loads(result))
|
65 |
+
# df = pd.DataFrame(flat_list)
|
66 |
|
67 |
print("Df final : ", df)
|
68 |
# Save the dataframe to a CSV in-memory
|
|
|
81 |
inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
|
82 |
outputs=["dataframe",gr.outputs.File(label="Download CSV")]) # Specify "dataframe" as output type
|
83 |
|
84 |
+
interface.launch(share=True)
|
85 |
|
86 |
|