Amith Adiraju commited on
Commit
9a0f501
·
1 Parent(s): ca2b51e

Copied entire codebase from working streamlit application from personal git. This application lets users upload images of item menus ( only supports indian item menus for now ); using opencv and llms, this application will explain each item in the menu in specific format.

Browse files
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.37.1
2
+ pandas==2.2.2
3
+ altair
4
+ easyocr==1.6.2
5
+ matplotlib==3.7.1
6
+ numpy==1.24.2
7
+ lorem==0.1.1
8
+ Pillow==9.5.0
9
+ nltk==3.9.1
10
+ torch==2.1.0
11
+ transformers==4.44.2
12
+ sentencepiece
src/__init__.py ADDED
File without changes
src/inference/__init__.py ADDED
File without changes
src/inference/config.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INSTRUCTION_PROMPT = """
2
+ The following text contains examples of three items and their corresponding explanations in the required format.\n
3
+
4
+ Item -> palak paneer.\n
5
+ Explanation -> Major Ingredients here: paneer ( a.k.a cottage cheese ) , palak ( spinach ).\n
6
+ How it is made: It's a savory item, made like a gravy; usually made by sauteing spices and mixing saute with boiled paneer and palak.\n
7
+ It goes well with: White basmati rice or Indian flat bread.\n
8
+ Allergens: Paneer may cause digestive discomfort and intolerance to some.\n
9
+ Food Category: Vegetarian, Vegans may not like it, as paneer is usually made from cow milk.
10
+
11
+
12
+ Item -> rumali roti.\n
13
+ Explanation -> Major Ingredients here: roti.\n
14
+ How it is made: A small soft bread, made to size of a napkin ( a.k.a 'rumal' in hindi ); usually made with a combination of whole wheat and all purpose flour.\n
15
+ It goes well with: Most indian gravies such as palak paneer, tomato curry etc.\n
16
+ Allergens: May contain gluten, which is known to cause digestive discomfort and intolerance to some.\n
17
+ Food Category: Vegetarian, Vegan.
18
+
19
+
20
+ Item -> nizami handi.\n
21
+ Explanation -> Major Ingredients here: Different veggies, makhani sauce (skimmed milk, tomato and cashew paste , indian spices), combination of nuts.\n
22
+ How it is made: Makhani sauce is added to onion-tomato based paste and bought to a boil; a Medley of veggies and gently flavored whole spices are added and boiled for small time.\n
23
+ It goes well with: Different kinds of indian flat breads, white basmati and sonamasoori rice.\n
24
+ Allergens: Presence of nuts, butter cream and makhani sauce are known to cause digestive discomfort and intolerance to some.\n
25
+ Food Category: Usually vegetarian, may include chicken or animal meat sometimes, please check with hotel.
26
+
27
+
28
+ Based on Item and explanation pairs provided above, provide similar explanation ('Major Ingredients', 'How is it made', 'It goes well with', 'Allergens' and 'Food Category') to the below item.\n
29
+ Item ->
30
+ """
31
+
32
+ DEBUG_MODE = True
33
+
34
+ DEVICE = 'cpu'
src/inference/preprocess_image.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ from typing import List, Tuple, Optional, AnyStr
4
+ import nltk
5
+ nltk.download("stopwords")
6
+ nltk.download('punkt')
7
+
8
+ from nltk.tokenize import RegexpTokenizer
9
+ from nltk.corpus import stopwords
10
+ import re
11
+
12
+
13
+ def preprocess_text(sentence: AnyStr) -> AnyStr:
14
+ sentence=sentence.lower().replace('{html}',"")
15
+ cleanr = re.compile('<.*?>')
16
+ cleantext = re.sub(cleanr, '', sentence)
17
+
18
+ rem_url=re.sub(r'http\S+', '',cleantext)
19
+ rem_num = re.sub('[0-9]+', '', rem_url)
20
+ tokenizer = RegexpTokenizer(r'\w+')
21
+
22
+ tokens = tokenizer.tokenize(rem_num)
23
+ filtered_words = [w for w in tokens if len(w) > 2 if not w in stopwords.words('english')]
24
+
25
+ return_txt = " ".join(filtered_words)
26
+
27
+ return return_txt
28
+
29
+ def image_to_np_arr(image) -> np.array:
30
+ return np.array(image)
31
+
32
+ def process_extracted_text(raw_extrc_text: List[Tuple]) -> List[AnyStr]:
33
+
34
+ output_texts = []
35
+ for _, extr_text, _ in raw_extrc_text:
36
+ # remove all numbers, special characters from a string
37
+ prcsd_txt = preprocess_text(extr_text)
38
+
39
+ if len(prcsd_txt.split(" ") ) > 2: output_texts.append(prcsd_txt)
40
+
41
+ return output_texts
src/inference/translate.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from inference.preprocess_image import (
4
+ image_to_np_arr,
5
+ process_extracted_text
6
+ )
7
+
8
+ from inference.config import INSTRUCTION_PROMPT, DEVICE
9
+ from typing import List, Tuple, Optional, AnyStr, Dict
10
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
11
+ import easyocr
12
+ import time
13
+
14
+ use_gpu = True
15
+ if DEVICE == 'cpu': use_gpu = False
16
+
17
+
18
+ # Define your extract_filter_img function
19
+ def extract_filter_img(image, text_extractor) -> Dict:
20
+
21
+ """
22
+ 1. Convert Image to numpy array
23
+ 2. Detect & Extract Text from Image - List of Tuples
24
+ 3. Process text , to filter out irrelevant text
25
+ 4. Classify only menu-related strings from detected text
26
+
27
+ """
28
+
29
+ progress_bar = st.progress(0)
30
+ status_message = st.empty()
31
+
32
+ functions_messages = [
33
+ (image_to_np_arr, 'Converting Image to required format', 'Done Converting !'),
34
+ (text_extractor.readtext, 'Extracting text from inp image', 'Done Extracting !'),
35
+ (process_extracted_text, 'Clean Raw Extracted text', 'Done Cleaning !'),
36
+ (classify_menu_text, 'Removing non-menu related text', 'Done removing !'),
37
+ ]
38
+
39
+ # Initialize variables
40
+ result = image
41
+ total_steps = len(functions_messages)
42
+ ind_add_delays = [0, 2, 3, 4]
43
+
44
+ # Loop through each function and execute it with status update
45
+ for i, (func, start_message, end_message) in enumerate(functions_messages):
46
+ status_message.write(start_message)
47
+
48
+ if i in ind_add_delays:
49
+ time.sleep(0.5)
50
+
51
+ result = func(result)
52
+
53
+ status_message.write(end_message)
54
+
55
+ # Update the progress bar
56
+ progress_bar.progress((i + 1) / total_steps)
57
+
58
+ if i in ind_add_delays:
59
+ time.sleep(0.5)
60
+
61
+ progress_bar.empty()
62
+ status_message.empty()
63
+ return result
64
+
65
+
66
+ def transcribe_menu_model(menu_texts: List[AnyStr],
67
+ text_summarizer = None,
68
+ text_tokenizer = None) -> Dict:
69
+
70
+ summarized_menu_items = {}
71
+
72
+ for mi in menu_texts:
73
+ if not text_summarizer:
74
+ raise NotImplementedError(""" """)
75
+
76
+ else:
77
+ prompt_item = INSTRUCTION_PROMPT + " " + mi + """
78
+
79
+
80
+ """
81
+ input_ids = text_tokenizer(prompt_item, return_tensors="pt").input_ids
82
+
83
+ outputs = text_summarizer.generate(input_ids,
84
+ max_new_tokens = 512
85
+ )
86
+
87
+ summarized_menu_items[mi] = text_tokenizer.decode(
88
+ outputs[0],
89
+ skip_special_tokens = True
90
+ )
91
+
92
+ return summarized_menu_items
93
+
94
+ def load_models(item_summarizer: AnyStr) -> Tuple:
95
+ text_extractor = easyocr.Reader(['en'],
96
+ gpu = use_gpu
97
+ )
98
+ tokenizer = T5Tokenizer.from_pretrained(item_summarizer)
99
+ model = T5ForConditionalGeneration.from_pretrained(item_summarizer)
100
+
101
+ return (text_extractor, tokenizer, model)
102
+
103
+ def classify_menu_text(extrc_str: List[AnyStr]) -> List[AnyStr]:
104
+ return extrc_str
105
+
src/main.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from inference.translate import (
4
+ extract_filter_img,
5
+ transcribe_menu_model,
6
+ load_models
7
+ )
8
+
9
+ from inference.config import DEBUG_MODE
10
+ from PIL import Image
11
+ import time
12
+
13
+ # Streamlit app
14
+ st.title("Image Upload and Processing")
15
+
16
+
17
+ # Using open source text detector, LLM for explaining items
18
+ text_extractor, \
19
+ item_tokenizer,item_summarizer = load_models(item_summarizer = "google/flan-t5-large")
20
+
21
+ # Streamlit function to upload an image from any device
22
+ uploaded_file = st.file_uploader("Choose an image...",
23
+ type=["jpg", "jpeg", "png"])
24
+
25
+
26
+ # Submit button
27
+ if uploaded_file is not None:
28
+ image = Image.open(uploaded_file)
29
+
30
+ # Only show if user wants to see
31
+ if st.checkbox('Show Uploaded Image'):
32
+ st.image(image,
33
+ caption='Uploaded Image',
34
+ use_column_width=True)
35
+
36
+ # Submit button
37
+ if st.button("Submit"):
38
+
39
+ msg1 = st.empty()
40
+ msg1.write("Pre-processing and extracting text out of your image ....")
41
+ st_filter = time.perf_counter()
42
+ # Call the extract_filter_img function
43
+ filtered_text = extract_filter_img(image, text_extractor)
44
+ en_filter = time.perf_counter()
45
+
46
+ msg2 = st.empty()
47
+ msg2.write("All pre-processing done, transcribing your menu items now ....")
48
+ st_trans_llm = time.perf_counter()
49
+ translated_text_dict = transcribe_menu_model(menu_texts=filtered_text,
50
+ text_tokenizer=item_tokenizer,
51
+ text_summarizer=item_summarizer
52
+ )
53
+
54
+ msg3 = st.empty()
55
+ msg3.write("Done transcribing ... ")
56
+ en_trans_llm = time.perf_counter()
57
+
58
+ msg1.empty(); msg2.empty(); msg3.empty()
59
+ st.success("Image processed successfully! " )
60
+
61
+ if DEBUG_MODE:
62
+ filter_time_sec = en_filter - st_filter
63
+ llm_time_sec = en_trans_llm - st_trans_llm
64
+ total_time_sec = filter_time_sec + llm_time_sec
65
+
66
+ st.write("Time took to extract and filter text {}".format(filter_time_sec))
67
+ st.write("Time took to summarize by LLM {}".format(llm_time_sec))
68
+ st.write('Overall time taken in seconds: {}'.format(total_time_sec))
69
+
70
+ st.table(translated_text_dict)
71
+
src/requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sentencepiece==0.2.0
2
+ transformers==4.44.2
3
+ streamlit==1.37.1
4
+ pandas==2.2.2
5
+ altair
6
+ easyocr==1.6.2
7
+ matplotlib==3.7.1
8
+ numpy==1.24.2
9
+ Pillow==9.5.0
10
+ nltk==3.9.1
11
+ torch==2.1.0