MrFeelgoood commited on
Commit
5d57f7a
1 Parent(s): 4b491a9

Modified graphics of the UI

Browse files
Files changed (1) hide show
  1. app.py +12 -20
app.py CHANGED
@@ -205,17 +205,6 @@ def extractor_clean(text, k_words, transformer, question, total_kwords, return_t
205
 
206
 
207
 
208
- def format_output(extracted_values):
209
- output = f"Valori: {extracted_values[0][0]}\n"
210
- output += f"Totale: {extracted_values[0][1]}\n"
211
- if extracted_values[1] == True:
212
- output += "-------------------\n"
213
- output += f"Rif. Testo:\n{extracted_values[2]}"
214
- return output
215
-
216
-
217
-
218
-
219
  def pdf_ocr(file):
220
  # Convert PDF to image
221
  with tempfile.TemporaryDirectory() as path:
@@ -243,22 +232,25 @@ def pdf_ocr(file):
243
  # Call extractor_clean and format_output functions
244
  ks = ('mq', 'metri quadri', 'm2')
245
  tra = 'it5/it5-base-question-answering'
246
- quest = "Quanti metri quadri misura l'immobile?"
247
  totalK = ['totale', 'complessivo', 'complessiva']
248
 
249
  extracted_values = extractor_clean(text=text, k_words=ks, transformer=tra, question=quest, total_kwords=totalK, return_text=True)
250
- output = format_output(extracted_values=extracted_values)
251
-
252
- return output
253
 
 
254
 
255
  def ocr_interface(pdf_file):
256
  # Call the pdf_ocr function
257
- ocr_output = pdf_ocr(pdf_file.name)
258
- return ocr_output
259
 
260
 
261
  pdf_input = gr.inputs.File(label="PDF File")
262
- output_text = gr.outputs.Textbox(label="Output")
263
- iface = gr.Interface(fn=ocr_interface, inputs=pdf_input, outputs=output_text)
264
- iface.launch()
 
 
 
205
 
206
 
207
 
 
 
 
 
 
 
 
 
 
 
 
208
  def pdf_ocr(file):
209
  # Convert PDF to image
210
  with tempfile.TemporaryDirectory() as path:
 
232
  # Call extractor_clean and format_output functions
233
  ks = ('mq', 'metri quadri', 'm2')
234
  tra = 'it5/it5-base-question-answering'
235
+ quest = "Quanti metri quadri misura la superficie?"
236
  totalK = ['totale', 'complessivo', 'complessiva']
237
 
238
  extracted_values = extractor_clean(text=text, k_words=ks, transformer=tra, question=quest, total_kwords=totalK, return_text=True)
239
+ values_output = extracted_values[0][0] # Join values with '\n'
240
+ total_output = extracted_values[0][1]
241
+ text_output = extracted_values[2]
242
 
243
+ return values_output, total_output, text_output
244
 
245
  def ocr_interface(pdf_file):
246
  # Call the pdf_ocr function
247
+ values, total, text = pdf_ocr(pdf_file.name)
248
+ return values, total, text
249
 
250
 
251
  pdf_input = gr.inputs.File(label="PDF File")
252
+ values_output = gr.outputs.Textbox(label="Mq. Values")
253
+ total_output = gr.outputs.Textbox(label="Total")
254
+ text_output = gr.outputs.Textbox(label="Ref. Text")
255
+ iface = gr.Interface(fn=ocr_interface, inputs=pdf_input, title="PDF MQ EXTRACTOR", outputs=[values_output, total_output, text_output], preprocess=format_output)
256
+ iface.launch()