omdivyatej commited on
Commit
c8ec340
·
1 Parent(s): 6277da1

update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -19
app.py CHANGED
@@ -2,7 +2,10 @@
2
  import gradio as gr
3
  import pandas as pd # Import pandas
4
  from ocr_request import ocr_request
5
- import io
 
 
 
6
 
7
  def process_file(files):
8
  response_arr = []
@@ -14,28 +17,52 @@ def process_file(files):
14
  print("Main file :", response_arr)
15
 
16
  #i= [[{'invoice_number': '349136', 'product_description': '1ST FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '3495565136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}],[{'invoice_number': '349136', 'product_description': ' FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}]]
17
- flat_list = []
18
 
19
- for item in response_arr:
20
- invoice_number = item['invoice_number']
21
 
22
- # Extracting product descriptions
23
- products = item.get('predictions', []) or item.get('product_description', [])
24
 
25
- for product in products:
26
- # Rename 'description' key to 'product_description' for uniformity across all products
27
- product_description = product.get('product_description', product.get('description'))
28
- predicted_material = product['predicted_material']
29
- confidence = product['confidence']
30
 
31
- flat_list.append({
32
- 'invoice_number': invoice_number,
33
- 'product_description': product_description,
34
- 'predicted_material': predicted_material,
35
- 'confidence': confidence
36
- })
37
 
38
- df = pd.DataFrame(flat_list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  print("Df final : ", df)
41
  # Save the dataframe to a CSV in-memory
@@ -54,6 +81,6 @@ interface = gr.Interface(fn=process_file,
54
  inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
55
  outputs=["dataframe",gr.outputs.File(label="Download CSV")]) # Specify "dataframe" as output type
56
 
57
- interface.launch()
58
 
59
 
 
2
  import gradio as gr
3
  import pandas as pd # Import pandas
4
  from ocr_request import ocr_request
5
+ import os
6
+ from dotenv import load_dotenv
7
+ import openai
8
+ import json
9
 
10
  def process_file(files):
11
  response_arr = []
 
17
  print("Main file :", response_arr)
18
 
19
  #i= [[{'invoice_number': '349136', 'product_description': '1ST FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '3495565136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}],[{'invoice_number': '349136', 'product_description': ' FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}]]
20
+ # flat_list = []
21
 
22
+ # for item in response_arr:
23
+ # invoice_number = item['invoice_number']
24
 
25
+ # # Extracting product descriptions
26
+ # products = item.get('predictions', []) or item.get('product_description', [])
27
 
28
+ # for product in products:
29
+ # # Rename 'description' key to 'product_description' for uniformity across all products
30
+ # product_description = product.get('product_description', product.get('description'))
31
+ # predicted_material = product['predicted_material']
32
+ # confidence = product['confidence']
33
 
34
+ # flat_list.append({
35
+ # 'invoice_number': invoice_number,
36
+ # 'product_description': product_description,
37
+ # 'predicted_material': predicted_material,
38
+ # 'confidence': confidence
39
+ # })
40
 
41
+ load_dotenv()
42
+ # Initialize OpenAI with your API key
43
+ openai.api_key = os.getenv("OPENAI_API_KEY")
44
+
45
+ prompt =f"""
46
+ you are an excellent programmer and an anlyst. Given a json array or a json, you need to analyse it and convert into a json format which can be converted in dataframe of pandas easily. You have a singular task :
47
+ Once you have thought through, produce a json, easily convertible to a dataframe in python, which would contain invoice number, product description, predicted material, confidence. Remember: You just have to share the o/p json, no thought process or anything else.
48
+
49
+ Here is the json array/json : {json.dumps(response_arr)}
50
+ """
51
+ messages=[{"role": "user", "content":prompt}]
52
+ # Use OpenAI to generate a completion using GPT-4 (replace 'gpt-4.0-turbo' with the correct engine ID once available)
53
+ response = openai.ChatCompletion.create(
54
+ model="gpt-4",
55
+ max_tokens=5000,
56
+ temperature=0,
57
+ messages = messages
58
+ )
59
+ # Extracting the result
60
+ result = response.choices[0]["message"]["content"]
61
+ print("After in min gpt")
62
+ print(json.loads(result))
63
+
64
+ df = pd.DataFrame(json.loads(result))
65
+ # df = pd.DataFrame(flat_list)
66
 
67
  print("Df final : ", df)
68
  # Save the dataframe to a CSV in-memory
 
81
  inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
82
  outputs=["dataframe",gr.outputs.File(label="Download CSV")]) # Specify "dataframe" as output type
83
 
84
+ interface.launch(share=True)
85
 
86