Spaces:
Runtime error
Runtime error
Commit
·
85c096c
1
Parent(s):
9d15510
requirements
Browse files- .env +1 -0
- .gitignore +5 -0
- ai_json.py +35 -0
- app.py +59 -0
- categories.csv +10 -0
- example_pg3.json +274 -0
- ocr_request.py +44 -0
- requirements.txt +0 -0
.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
OPENAI_API_KEY = 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
|
.gitignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
venv
|
2 |
+
flagged
|
3 |
+
__pycache__
|
4 |
+
.gitignore
|
5 |
+
.env
|
ai_json.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
import json
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import os
|
5 |
+
|
6 |
+
|
7 |
+
def handle_creating_json(output_1):
|
8 |
+
print("Before GPT: " , output_1)
|
9 |
+
#output_1= [{'invoice_number': '349136', 'table_data': [{'label': 'Product_Code', 'text': ''}, {'label': 'Description', 'text': '1ST FLOOR WALLS'}, {'label': 'Price', 'text': ''}, {'label': 'Line_Amount', 'text': ''}, {'label': 'Product_Code', 'text': 'CPL1216'}, {'label': 'Description', 'text': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )"}, {'label': 'Price', 'text': '139.09'}, {'label': 'Line_Amount', 'text': '1,251.81'}, {'label': 'Product_Code', 'text': 'CPL1210'}, {'label': 'Description', 'text': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75"}, {'label': 'Price', 'text': '87.56'}, {'label': 'Line_Amount', 'text': '525.36'}, {'label': 'Product_Code', 'text': 'CPCB35558'}, {'label': 'Description', 'text': "Power Column 3 1/2 X 5 1/2 - 08 '"}, {'label': 'Price', 'text': '82.51'}, {'label': 'Line_Amount', 'text': '330.04'}]}]
|
10 |
+
load_dotenv()
|
11 |
+
# Initialize OpenAI with your API key
|
12 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
13 |
+
|
14 |
+
prompt = f""" You are an excellent programmer and specialize in the construction industry, knowing everything about building a house. Given a JSON which resembles a table, you have two tasks:
|
15 |
+
1. extract product description and determine or predict whether product descriptions is Exterior Door/Finish/ Framing/Siding/Windows or Roofing. Think well and do some self reflection. Do not share your thought process with me though.
|
16 |
+
2. Once you have thought through, produce a json, easily convertible to a dataframe in python, which would contain invoice number, product description, predicted material, confidence ( b/w 0-1, your confidence score which shows how sure are you about your prediction)
|
17 |
+
Remember: You just have to share the output json, no thought process or extra words or anything else. If you are not able to identify the invoice number just write NA.
|
18 |
+
No apologies or regret. Always produce an output.
|
19 |
+
|
20 |
+
|
21 |
+
Here is the json: {json.dumps(output_1)}
|
22 |
+
"""
|
23 |
+
messages=[{"role": "user", "content":prompt}]
|
24 |
+
# Use OpenAI to generate a completion using GPT-4 (replace 'gpt-4.0-turbo' with the correct engine ID once available)
|
25 |
+
response = openai.ChatCompletion.create(
|
26 |
+
model="gpt-4",
|
27 |
+
max_tokens=5000,
|
28 |
+
temperature=0,
|
29 |
+
messages = messages
|
30 |
+
)
|
31 |
+
# Extracting the result
|
32 |
+
result = response.choices[0]["message"]["content"]
|
33 |
+
print("After gpt")
|
34 |
+
print(json.loads(result))
|
35 |
+
return json.loads(result)
|
app.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# app.py
|
2 |
+
import gradio as gr
|
3 |
+
import pandas as pd # Import pandas
|
4 |
+
from ocr_request import ocr_request
|
5 |
+
import io
|
6 |
+
|
7 |
+
def process_file(files):
|
8 |
+
response_arr = []
|
9 |
+
# Send the uploaded file to the function from ocr_request.py
|
10 |
+
for file in files:
|
11 |
+
response = ocr_request(file.name)
|
12 |
+
response_arr.append(response)
|
13 |
+
|
14 |
+
print("Main file :", response_arr)
|
15 |
+
|
16 |
+
#i= [[{'invoice_number': '349136', 'product_description': '1ST FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '3495565136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}],[{'invoice_number': '349136', 'product_description': ' FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}]]
|
17 |
+
flat_list = []
|
18 |
+
|
19 |
+
for item in response_arr:
|
20 |
+
invoice_number = item['invoice_number']
|
21 |
+
|
22 |
+
# Extracting product descriptions
|
23 |
+
products = item.get('predictions', []) or item.get('product_description', [])
|
24 |
+
|
25 |
+
for product in products:
|
26 |
+
# Rename 'description' key to 'product_description' for uniformity across all products
|
27 |
+
product_description = product.get('product_description', product.get('description'))
|
28 |
+
predicted_material = product['predicted_material']
|
29 |
+
confidence = product['confidence']
|
30 |
+
|
31 |
+
flat_list.append({
|
32 |
+
'invoice_number': invoice_number,
|
33 |
+
'product_description': product_description,
|
34 |
+
'predicted_material': predicted_material,
|
35 |
+
'confidence': confidence
|
36 |
+
})
|
37 |
+
|
38 |
+
df = pd.DataFrame(flat_list)
|
39 |
+
|
40 |
+
print("Df final : ", df)
|
41 |
+
# Save the dataframe to a CSV in-memory
|
42 |
+
|
43 |
+
result_csv = df.to_csv(index=False)
|
44 |
+
|
45 |
+
csv_filename = "categories.csv"
|
46 |
+
with open(csv_filename, "w") as f:
|
47 |
+
f.write(result_csv)
|
48 |
+
|
49 |
+
return df,csv_filename # Gradio will display this as a table
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
interface = gr.Interface(fn=process_file,
|
54 |
+
inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
|
55 |
+
outputs=["dataframe",gr.outputs.File(label="Download CSV")]) # Specify "dataframe" as output type
|
56 |
+
|
57 |
+
interface.launch(share=True)
|
58 |
+
|
59 |
+
|
categories.csv
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
invoice_number,product_description,predicted_material,confidence
|
2 |
+
349136,1ST FLOOR WALLS,Framing,0.8
|
3 |
+
349136,"11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @
|
4 |
+
8 ' )",Framing,0.9
|
5 |
+
349136,"COLUMN
|
6 |
+
11.875 X 10 ' Pro Lam 2.0 LVL 1.75",Framing,0.9
|
7 |
+
349136,Power Column 3 1/2 X 5 1/2 - 08 ',Framing,0.9
|
8 |
+
351500,QUOTE # 4323800,NA,0.0
|
9 |
+
351500,11.875 X 18 ' Pro Lam 2.0 LVL 1.75,Framing,0.9
|
10 |
+
351500,11.875 X 22 ' Pro Lam 2.0 LVL 1.75,Framing,0.9
|
example_pg3.json
ADDED
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"message": "Success",
|
3 |
+
"result": [
|
4 |
+
{
|
5 |
+
"message": "Success",
|
6 |
+
"input": "page-3.pdf",
|
7 |
+
"prediction": [
|
8 |
+
{
|
9 |
+
"id": "d263e93d-be6b-4642-b246-5bb8a90b8ba2",
|
10 |
+
"label": "table",
|
11 |
+
"xmin": 216,
|
12 |
+
"ymin": 1137,
|
13 |
+
"xmax": 2445,
|
14 |
+
"ymax": 1594,
|
15 |
+
"score": 1,
|
16 |
+
"ocr_text": "table",
|
17 |
+
"type": "table",
|
18 |
+
"cells": [],
|
19 |
+
"status": "correctly_predicted",
|
20 |
+
"page_no": 0,
|
21 |
+
"label_id": ""
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"id": "57ef19ae-73b9-4e60-92af-5d15bbe13e32",
|
25 |
+
"label": "buyer_address",
|
26 |
+
"xmin": 100,
|
27 |
+
"ymin": 582,
|
28 |
+
"xmax": 491,
|
29 |
+
"ymax": 663,
|
30 |
+
"score": 0.766381,
|
31 |
+
"ocr_text": "174 DORCHESTER ST\nBOSTON , MA , 02127",
|
32 |
+
"type": "field",
|
33 |
+
"status": "correctly_predicted",
|
34 |
+
"page_no": 0,
|
35 |
+
"label_id": "12808214-6c3c-4c6f-bcff-01ce5a7596c6"
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"id": "615d6c14-60fe-4f7c-9e84-9c5cbbc2649e",
|
39 |
+
"label": "subtotal",
|
40 |
+
"xmin": 2304,
|
41 |
+
"ymin": 2611,
|
42 |
+
"xmax": 2416,
|
43 |
+
"ymax": 2638,
|
44 |
+
"score": 0.48327795,
|
45 |
+
"ocr_text": "2,107.21",
|
46 |
+
"type": "field",
|
47 |
+
"status": "correctly_predicted",
|
48 |
+
"page_no": 0,
|
49 |
+
"label_id": "0ea778c5-d7e9-46cc-b212-577f099babd8"
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"id": "fb669362-edbe-4f49-a4f9-126f7fe255d6",
|
53 |
+
"label": "total_tax",
|
54 |
+
"xmin": 2313,
|
55 |
+
"ymin": 2707,
|
56 |
+
"xmax": 2419,
|
57 |
+
"ymax": 2732,
|
58 |
+
"score": 0.8847264,
|
59 |
+
"ocr_text": "$ 131.70",
|
60 |
+
"type": "field",
|
61 |
+
"status": "correctly_predicted",
|
62 |
+
"page_no": 0,
|
63 |
+
"label_id": "cc9cf54f-e3f6-4898-b82b-3a10002bfdc6"
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"id": "aa4c1de7-7c31-4b21-bd8a-8930adcf6cac",
|
67 |
+
"label": "invoice_date",
|
68 |
+
"xmin": 2299,
|
69 |
+
"ymin": 307,
|
70 |
+
"xmax": 2446,
|
71 |
+
"ymax": 329,
|
72 |
+
"score": 0.9948618,
|
73 |
+
"ocr_text": "04/12/2023",
|
74 |
+
"type": "field",
|
75 |
+
"status": "correctly_predicted",
|
76 |
+
"page_no": 0,
|
77 |
+
"label_id": "ded9e5da-118c-48dd-bae1-97cb95df0976"
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"id": "d99687df-518a-4e7a-bbb3-38fcb4a192f7",
|
81 |
+
"label": "net_d",
|
82 |
+
"xmin": 2323,
|
83 |
+
"ymin": 358,
|
84 |
+
"xmax": 2444,
|
85 |
+
"ymax": 379,
|
86 |
+
"score": 0.47389212,
|
87 |
+
"ocr_text": "2nd EOM",
|
88 |
+
"type": "field",
|
89 |
+
"status": "correctly_predicted",
|
90 |
+
"page_no": 0,
|
91 |
+
"label_id": "790b65b6-25ef-41d7-a8ad-a93a36d02dd4"
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"id": "6e73864b-4d34-4e49-9f4f-a010b2b771a7",
|
95 |
+
"label": "shipto_address",
|
96 |
+
"xmin": 850,
|
97 |
+
"ymin": 485,
|
98 |
+
"xmax": 1228,
|
99 |
+
"ymax": 562,
|
100 |
+
"score": 0.42755184,
|
101 |
+
"ocr_text": "BENNINGTON ST\n490 BENNINGTON ST",
|
102 |
+
"type": "field",
|
103 |
+
"status": "correctly_predicted",
|
104 |
+
"page_no": 0,
|
105 |
+
"label_id": "fc485f02-f2f1-4975-9096-579508235896"
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"id": "3b148335-3c00-4559-8e45-901f66700ebe",
|
109 |
+
"label": "shipto_address",
|
110 |
+
"xmin": 1004,
|
111 |
+
"ymin": 581,
|
112 |
+
"xmax": 1013,
|
113 |
+
"ymax": 613,
|
114 |
+
"score": 0.4113207,
|
115 |
+
"ocr_text": ",",
|
116 |
+
"type": "field",
|
117 |
+
"status": "correctly_predicted",
|
118 |
+
"page_no": 0,
|
119 |
+
"label_id": "fc485f02-f2f1-4975-9096-579508235896"
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"id": "2d7f8055-8a9d-435e-9c68-6bc86deb44bd",
|
123 |
+
"label": "currency",
|
124 |
+
"xmin": 0,
|
125 |
+
"ymin": 0,
|
126 |
+
"xmax": 0,
|
127 |
+
"ymax": 0,
|
128 |
+
"score": 1,
|
129 |
+
"ocr_text": "USD",
|
130 |
+
"type": "field",
|
131 |
+
"status": "correctly_predicted",
|
132 |
+
"page_no": 0,
|
133 |
+
"label_id": "7f166b2d-ed55-47eb-bc4b-510121dd1772"
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"id": "e11052f5-eb47-473c-914b-ced66972a18c",
|
137 |
+
"label": "seller_name",
|
138 |
+
"xmin": 951,
|
139 |
+
"ymin": 145,
|
140 |
+
"xmax": 1420,
|
141 |
+
"ymax": 183,
|
142 |
+
"score": 0.97490394,
|
143 |
+
"ocr_text": "Dartmouth Building Supply",
|
144 |
+
"type": "field",
|
145 |
+
"status": "correctly_predicted",
|
146 |
+
"page_no": 0,
|
147 |
+
"label_id": "47b8254f-2bdf-4834-89d2-8c4e824f5351"
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"id": "4b4b2b95-50ac-4ca2-bba4-2366b6cd7d15",
|
151 |
+
"label": "invoice_number",
|
152 |
+
"xmin": 2252,
|
153 |
+
"ymin": 231,
|
154 |
+
"xmax": 2446,
|
155 |
+
"ymax": 274,
|
156 |
+
"score": 0.99667007,
|
157 |
+
"ocr_text": "349136",
|
158 |
+
"type": "field",
|
159 |
+
"status": "correctly_predicted",
|
160 |
+
"page_no": 0,
|
161 |
+
"label_id": "59b94f5f-ac8f-4cb6-8c54-41cb14528f58"
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"id": "0a9f5423-bfd7-4f0a-a904-fd87fe98d8ad",
|
165 |
+
"label": "payment_due_date",
|
166 |
+
"xmin": 461,
|
167 |
+
"ymin": 2612,
|
168 |
+
"xmax": 620,
|
169 |
+
"ymax": 2635,
|
170 |
+
"score": 0.98759484,
|
171 |
+
"ocr_text": "06/30/2023",
|
172 |
+
"type": "field",
|
173 |
+
"status": "correctly_predicted",
|
174 |
+
"page_no": 0,
|
175 |
+
"label_id": "cd67bda9-5ec4-44b6-a93f-67319f4b9105"
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"id": "14c59f5a-86bc-420b-95b6-e199c9c4a7e1",
|
179 |
+
"label": "buyer_name",
|
180 |
+
"xmin": 100,
|
181 |
+
"ymin": 485,
|
182 |
+
"xmax": 460,
|
183 |
+
"ymax": 561,
|
184 |
+
"score": 0.95362574,
|
185 |
+
"ocr_text": "Elevated Construction\nLLC",
|
186 |
+
"type": "field",
|
187 |
+
"status": "correctly_predicted",
|
188 |
+
"page_no": 0,
|
189 |
+
"label_id": "59ca9238-c882-4108-842b-3ac09ea1ec32"
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"id": "ab0bff8d-8d25-42b1-8d45-ada931b131f1",
|
193 |
+
"label": "invoice_amount",
|
194 |
+
"xmin": 2288,
|
195 |
+
"ymin": 2614,
|
196 |
+
"xmax": 2303,
|
197 |
+
"ymax": 2638,
|
198 |
+
"score": 0.42010704,
|
199 |
+
"ocr_text": "$",
|
200 |
+
"type": "field",
|
201 |
+
"status": "correctly_predicted",
|
202 |
+
"page_no": 0,
|
203 |
+
"label_id": "39a22a11-370c-401c-bf6e-b4f4fb14bdea"
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"id": "ffa71a20-d784-48bb-9f5e-0af113c22375",
|
207 |
+
"label": "invoice_amount",
|
208 |
+
"xmin": 2289,
|
209 |
+
"ymin": 2808,
|
210 |
+
"xmax": 2416,
|
211 |
+
"ymax": 2833,
|
212 |
+
"score": 0.80036026,
|
213 |
+
"ocr_text": "$ 2,238.91",
|
214 |
+
"type": "field",
|
215 |
+
"status": "correctly_predicted",
|
216 |
+
"page_no": 0,
|
217 |
+
"label_id": "39a22a11-370c-401c-bf6e-b4f4fb14bdea"
|
218 |
+
},
|
219 |
+
{
|
220 |
+
"id": "e3c8f3db-b8d1-426e-a0f6-3e793a52da25",
|
221 |
+
"label": "seller_address",
|
222 |
+
"xmin": 949,
|
223 |
+
"ymin": 211,
|
224 |
+
"xmax": 1425,
|
225 |
+
"ymax": 288,
|
226 |
+
"score": 0.72980326,
|
227 |
+
"ocr_text": "958 Reed Road\nNorth Dartmouth , MA 02747",
|
228 |
+
"type": "field",
|
229 |
+
"status": "correctly_predicted",
|
230 |
+
"page_no": 0,
|
231 |
+
"label_id": "22eda91b-4613-4122-afbe-e1db97103f83"
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"id": "7c41042b-42a4-4e6b-83ea-ecb3aab57f87",
|
235 |
+
"label": "seller_phone",
|
236 |
+
"xmin": 949,
|
237 |
+
"ymin": 308,
|
238 |
+
"xmax": 1181,
|
239 |
+
"ymax": 334,
|
240 |
+
"score": 0.6113706,
|
241 |
+
"ocr_text": "508-990-2389",
|
242 |
+
"type": "field",
|
243 |
+
"status": "correctly_predicted",
|
244 |
+
"page_no": 0,
|
245 |
+
"label_id": "5e5d4507-bdab-4d74-b215-b1abf1834caf"
|
246 |
+
},
|
247 |
+
{
|
248 |
+
"id": "b1f5e03d-8d9a-4637-b261-01017200f3ab",
|
249 |
+
"label": "client_id",
|
250 |
+
"xmin": 2383,
|
251 |
+
"ymin": 408,
|
252 |
+
"xmax": 2449,
|
253 |
+
"ymax": 429,
|
254 |
+
"score": 0.81289464,
|
255 |
+
"ocr_text": "6529",
|
256 |
+
"type": "field",
|
257 |
+
"status": "correctly_predicted",
|
258 |
+
"page_no": 0,
|
259 |
+
"label_id": "913a5f4d-30f0-41fa-80fd-fe5cd49c8660"
|
260 |
+
}
|
261 |
+
],
|
262 |
+
"page": 0,
|
263 |
+
"request_file_id": "6f6d4af0-9b10-4df4-96ab-d2d60273d9bc",
|
264 |
+
"filepath": "uploadedfiles/99a96f48-fa67-461d-a17d-8475af701b17/PredictionImages/8f144b01-7377-4fd9-8cd3-474ce8fa5ec1-1.jpeg",
|
265 |
+
"id": "12d51dd1-6155-11ee-b209-f2f6095ec3b8",
|
266 |
+
"rotation": 0,
|
267 |
+
"file_url": "uploadedfiles/99a96f48-fa67-461d-a17d-8475af701b17/RawPredictions/6f6d4af0-9b10-4df4-96ab-d2d60273d9bc.pdf",
|
268 |
+
"request_metadata": "",
|
269 |
+
"processing_type": "sync",
|
270 |
+
"size": { "width": 2550, "height": 3300 }
|
271 |
+
}
|
272 |
+
]
|
273 |
+
|
274 |
+
}
|
ocr_request.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# t.py
|
2 |
+
import requests
|
3 |
+
import openai
|
4 |
+
import json
|
5 |
+
import ai_json
|
6 |
+
|
7 |
+
|
8 |
+
def ocr_request(file_path):
|
9 |
+
url = 'https://app.nanonets.com/api/v2/OCR/Model/99a96f48-fa67-461d-a17d-8475af701b17/LabelFile/?async=false'
|
10 |
+
data = {'file': open(file_path, 'rb')}
|
11 |
+
response = requests.post(url, auth=requests.auth.HTTPBasicAuth('12ac2745-5e44-11ee-bb98-ea6b2bf28c31', ''), files=data)
|
12 |
+
response = response.json()
|
13 |
+
response_data = response["result"][0]["prediction"]
|
14 |
+
|
15 |
+
for element in response_data:
|
16 |
+
if element['label'] == 'table':
|
17 |
+
table_data = element['cells']
|
18 |
+
elif element['label'] == 'invoice_number':
|
19 |
+
invoice_number = element['ocr_text']
|
20 |
+
|
21 |
+
output_1 = {
|
22 |
+
'invoice_number': invoice_number,
|
23 |
+
'table_data':table_data
|
24 |
+
}
|
25 |
+
|
26 |
+
keys_to_remove = [
|
27 |
+
'xmin', 'ymin', 'xmax', 'ymax', 'id', 'label_id', 'verification_status',
|
28 |
+
'failed_validation', 'status', 'score', 'row_label', 'col_span',
|
29 |
+
'row_span', 'row', 'col']
|
30 |
+
|
31 |
+
for item in output_1["table_data"]:
|
32 |
+
for key in keys_to_remove:
|
33 |
+
item.pop(key, None)
|
34 |
+
|
35 |
+
print("Before sending to gpt", output_1)
|
36 |
+
gpt_response = ai_json.handle_creating_json(output_1)
|
37 |
+
|
38 |
+
return gpt_response
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
|
requirements.txt
ADDED
Binary file (2.44 kB). View file
|
|