mkaramb commited on
Commit
eb822d4
·
verified ·
1 Parent(s): 8bea076

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -70
app.py CHANGED
@@ -1,79 +1,34 @@
1
- import pandas as pd
2
  import gradio as gr
3
- from google.api_core.client_options import ClientOptions
4
- from google.cloud import documentai_v1 as documentai
5
- from google.cloud.documentai_v1.types import RawDocument
6
- from google.cloud import translate_v2 as translate
7
  import zipfile
8
- import os
9
- import io
10
-
11
- os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
12
-
13
- # Global DataFrame declaration
14
- results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
15
-
16
- # Set your Google Cloud Document AI processor details here
17
- project_id = "herbaria-ai"
18
- location = "us"
19
- processor_id = "4307b078717a399a"
20
-
21
- def translate_text(text, target_language="en"):
22
- translate_client = translate.Client()
23
- result = translate_client.translate(text, target_language=target_language)
24
- return result["translatedText"]
25
-
26
- def batch_process_documents(file_stream, file_mime_type="image/jpeg") -> tuple:
27
- opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")
28
- client = documentai.DocumentProcessorServiceClient(client_options=opts)
29
- raw_document = RawDocument(content=file_stream.read(), mime_type=file_mime_type)
30
-
31
- name = client.processor_path(project_id, location, processor_id)
32
- request = documentai.ProcessRequest(name=name, raw_document=raw_document)
33
- result = client.process_document(request=request)
34
-
35
- extracted_text = result.document.text
36
- translated_text = translate_text(extracted_text)
37
- return extracted_text, translated_text
38
-
39
- def find_images(directory, extensions=('.jpeg', '.jpg')):
40
- for root, _, filenames in os.walk(directory):
41
- for filename in filenames:
42
- if filename.lower().endswith(extensions) and not filename.startswith('.'):
43
- yield os.path.join(root, filename)
44
-
45
- def process_zip_file(file_info):
46
- global results_df
47
- results_df = results_df.iloc[0:0] # Clear the DataFrame if re-running this cell
48
-
49
- with zipfile.ZipFile(io.BytesIO(file_info["content"]), 'r') as zip_ref:
50
- zip_ref.extractall("extracted_files")
51
-
52
- image_files = list(find_images("extracted_files"))
53
-
54
- for file_path in image_files:
55
- try:
56
- extracted_text, translated_text = batch_process_documents(open(file_path, "rb"))
57
- new_row = pd.DataFrame([{
58
- "Filename": os.path.basename(file_path),
59
- "Extracted Text": extracted_text,
60
- "Translated Text": translated_text
61
- }])
62
- results_df = pd.concat([results_df, new_row], ignore_index=True)
63
- except Exception as e:
64
- return f"An error occurred while processing {file_path}: {e}"
65
-
66
- return results_df
67
-
68
- interface = gr.Interface(fn=process_zip_file,
69
- inputs=gr.File(label="Upload ZIP File"),
70
- outputs=gr.Dataframe(label="Processed Results"),
71
- title="Document Processing and Translation")
72
-
73
  interface.launch()
74
 
75
 
76
 
 
77
  # def greet(name):
78
  # return "Hello " + name + "!!"
79
 
 
 
1
  import gradio as gr
 
 
 
 
2
  import zipfile
3
+ import pandas as pd
4
+ from io import BytesIO
5
+
6
+ def extract_image_names(zip_file):
7
+ # Use a BytesIO object to handle the file in memory
8
+ zip_bytes = BytesIO(zip_file.read())
9
+
10
+ # Open the zip file
11
+ with zipfile.ZipFile(zip_bytes, 'r') as z:
12
+ # List all files in the zip
13
+ image_names = [info.filename for info in z.infolist() if info.filename.endswith(('.png', '.jpg', '.jpeg'))]
14
+
15
+ # Convert list of names to a DataFrame
16
+ df = pd.DataFrame(image_names, columns=["Image Name"])
17
+ return df
18
+
19
+ # Define the Gradio interface
20
+ interface = gr.Interface(fn=extract_image_names,
21
+ inputs=gr.inputs.File(type="file", label="Upload a Zip File"),
22
+ outputs="dataframe",
23
+ title="Zip Image Extractor",
24
+ description="Upload a zip file to extract the names of the images.")
25
+
26
+ # Run the Gradio app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  interface.launch()
28
 
29
 
30
 
31
+
32
  # def greet(name):
33
  # return "Hello " + name + "!!"
34