pr1
#2
by
AmithAdiraju1694
- opened
- .gitignore +1 -2
- app.py +65 -51
- inference/config.py +27 -16
- inference/preprocess_image.py +5 -80
- inference/translate.py +39 -57
- pages.py +0 -214
- utils.py +0 -15
.gitignore
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
misc.txt
|
2 |
test_cas.py
|
3 |
-
test_train_llm.py
|
4 |
-
redir_app.py
|
|
|
1 |
misc.txt
|
2 |
test_cas.py
|
3 |
+
test_train_llm.py
|
|
app.py
CHANGED
@@ -1,64 +1,78 @@
|
|
1 |
-
from utils import navigate_to
|
2 |
-
from pages import manual_input_page, image_input_page, model_inference_page
|
3 |
-
|
4 |
import streamlit as st
|
5 |
-
from streamlit import session_state as sst
|
6 |
-
import asyncio
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
# function to remove all sesion variables from sst, except page.
|
14 |
-
def reset_sst():
|
15 |
-
for key in list(sst.keys()):
|
16 |
-
if key != "page":
|
17 |
-
sst.pop(key, None)
|
18 |
|
19 |
-
#
|
20 |
-
|
21 |
-
|
22 |
-
st.title("We will explain your menu like never before!")
|
23 |
-
st.write("\n")
|
24 |
-
st.write("\n")
|
25 |
-
st.write("\n")
|
26 |
-
|
27 |
-
c1, c2= st.columns(2)
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
|
33 |
-
with c2:
|
34 |
-
# Navigate to image input page if user clicks on the button
|
35 |
-
st.button("Upload Items from Image", on_click=navigate_to, args=("ImageInput",))
|
36 |
|
37 |
-
|
|
|
|
|
38 |
|
39 |
-
#
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
"""
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
elif sst["page"] == "ImageInput":
|
58 |
-
reset_sst() # reset all session state variables before navigating to the landing page
|
59 |
-
await image_input_page() # Call the image input page function
|
60 |
|
61 |
-
|
62 |
-
|
63 |
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
|
|
|
|
2 |
|
3 |
+
from inference.translate import (
|
4 |
+
extract_filter_img,
|
5 |
+
transcribe_menu_model,
|
6 |
+
load_models
|
7 |
+
)
|
8 |
+
|
9 |
+
from inference.config import DEBUG_MODE
|
10 |
+
from PIL import Image
|
11 |
+
import time
|
12 |
+
|
13 |
+
# Streamlit app
|
14 |
+
st.title("Image Upload and Processing")
|
15 |
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
# Using open source text detector, LLM for explaining items
|
18 |
+
text_extractor, \
|
19 |
+
item_tokenizer,item_summarizer = load_models(item_summarizer = "google/flan-t5-large")
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
+
# Streamlit function to upload an image from any device
|
22 |
+
uploaded_file = st.file_uploader("Choose an image...",
|
23 |
+
type=["jpg", "jpeg", "png"])
|
24 |
|
|
|
|
|
|
|
25 |
|
26 |
+
# Submit button
|
27 |
+
if uploaded_file is not None:
|
28 |
+
image = Image.open(uploaded_file)
|
29 |
|
30 |
+
# Only show if user wants to see
|
31 |
+
if st.checkbox('Show Uploaded Image'):
|
32 |
+
st.image(image,
|
33 |
+
caption='Uploaded Image',
|
34 |
+
use_column_width=True)
|
35 |
|
36 |
+
# Submit button
|
37 |
+
if st.button("Submit"):
|
|
|
38 |
|
39 |
+
msg1 = st.empty()
|
40 |
+
msg1.write("Pre-processing and extracting text out of your image ....")
|
41 |
+
st_filter = time.perf_counter()
|
42 |
+
# Call the extract_filter_img function
|
43 |
+
filtered_text = extract_filter_img(image, text_extractor)
|
44 |
+
en_filter = time.perf_counter()
|
45 |
+
|
46 |
+
num_items_detected = len(filtered_text)
|
47 |
+
if num_items_detected == 0:
|
48 |
+
st.write("We couldn't detect any menu items ( indian for now ) from your image, please try a different image.")
|
49 |
+
|
50 |
+
elif num_items_detected > 0:
|
51 |
+
st.write(f"Detected {num_items_detected} menu items ( indian ) from your input image ... ")
|
52 |
+
|
53 |
+
msg2 = st.empty()
|
54 |
+
msg2.write("All pre-processing done, transcribing your menu items now ....")
|
55 |
+
st_trans_llm = time.perf_counter()
|
56 |
+
translated_text_dict = transcribe_menu_model(menu_texts=filtered_text,
|
57 |
+
text_tokenizer=item_tokenizer,
|
58 |
+
text_summarizer=item_summarizer
|
59 |
+
)
|
60 |
|
61 |
+
msg3 = st.empty()
|
62 |
+
msg3.write("Done transcribing ... ")
|
63 |
+
en_trans_llm = time.perf_counter()
|
|
|
|
|
|
|
|
|
64 |
|
65 |
+
msg1.empty(); msg2.empty(); msg3.empty()
|
66 |
+
st.success("Image processed successfully! " )
|
67 |
|
68 |
+
if DEBUG_MODE:
|
69 |
+
filter_time_sec = en_filter - st_filter
|
70 |
+
llm_time_sec = en_trans_llm - st_trans_llm
|
71 |
+
total_time_sec = filter_time_sec + llm_time_sec
|
72 |
+
|
73 |
+
st.write("Time took to extract and filter text {}".format(filter_time_sec))
|
74 |
+
st.write("Time took to summarize by LLM {}".format(llm_time_sec))
|
75 |
+
st.write('Overall time taken in seconds: {}'.format(total_time_sec))
|
76 |
+
|
77 |
+
st.table(translated_text_dict)
|
78 |
+
|
inference/config.py
CHANGED
@@ -1,23 +1,34 @@
|
|
1 |
-
|
2 |
-
|
3 |
|
4 |
-
|
5 |
-
|
6 |
-
|
|
|
|
|
|
|
7 |
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
def get_device():
|
14 |
-
if torch.cuda.is_available():
|
15 |
-
device = torch.device("cuda")
|
16 |
-
print(f"Using GPU: {torch.cuda.get_device_name(0)}") #get the name of the GPU being used.
|
17 |
-
else:
|
18 |
-
device = torch.device("cpu")
|
19 |
-
print("Using CPU")
|
20 |
|
21 |
-
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
DEVICE =
|
|
|
1 |
+
INSTRUCTION_PROMPT = """
|
2 |
+
The following text contains examples of three items and their corresponding explanations in the required format.\n
|
3 |
|
4 |
+
Item -> palak paneer.\n
|
5 |
+
Explanation -> Major Ingredients here: paneer ( a.k.a cottage cheese ) , palak ( spinach ).\n
|
6 |
+
How it is made: It's a savory item, made like a gravy; usually made by sauteing spices and mixing saute with boiled paneer and palak.\n
|
7 |
+
It goes well with: White basmati rice or Indian flat bread.\n
|
8 |
+
Allergens: Paneer may cause digestive discomfort and intolerance to some.\n
|
9 |
+
Food Category: Vegetarian, Vegans may not like it, as paneer is usually made from cow milk.
|
10 |
|
11 |
+
|
12 |
+
Item -> rumali roti.\n
|
13 |
+
Explanation -> Major Ingredients here: roti.\n
|
14 |
+
How it is made: A small soft bread, made to size of a napkin ( a.k.a 'rumal' in hindi ); usually made with a combination of whole wheat and all purpose flour.\n
|
15 |
+
It goes well with: Most indian gravies such as palak paneer, tomato curry etc.\n
|
16 |
+
Allergens: May contain gluten, which is known to cause digestive discomfort and intolerance to some.\n
|
17 |
+
Food Category: Vegetarian, Vegan.
|
18 |
|
19 |
|
20 |
+
Item -> nizami handi.\n
|
21 |
+
Explanation -> Major Ingredients here: Different veggies, makhani sauce (skimmed milk, tomato and cashew paste , indian spices), combination of nuts.\n
|
22 |
+
How it is made: Makhani sauce is added to onion-tomato based paste and bought to a boil; a Medley of veggies and gently flavored whole spices are added and boiled for small time.\n
|
23 |
+
It goes well with: Different kinds of indian flat breads, white basmati and sonamasoori rice.\n
|
24 |
+
Allergens: Presence of nuts, butter cream and makhani sauce are known to cause digestive discomfort and intolerance to some.\n
|
25 |
+
Food Category: Usually vegetarian, may include chicken or animal meat sometimes, please check with hotel.
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
Based on Item and explanation pairs provided above, provide similar explanation ('Major Ingredients', 'How is it made', 'It goes well with', 'Allergens' and 'Food Category') to the below item.\n
|
29 |
+
Item ->
|
30 |
+
"""
|
31 |
+
|
32 |
+
DEBUG_MODE = True
|
33 |
|
34 |
+
DEVICE = 'cpu'
|
inference/preprocess_image.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
|
2 |
import numpy as np
|
3 |
-
from typing import List, Tuple, Optional, AnyStr
|
4 |
import nltk
|
5 |
nltk.download("stopwords")
|
6 |
nltk.download('punkt')
|
@@ -11,18 +11,6 @@ import re
|
|
11 |
|
12 |
|
13 |
def preprocess_text(sentence: AnyStr) -> AnyStr:
|
14 |
-
|
15 |
-
"""
|
16 |
-
Function that pre-processes input text by removing special characters, hyper links,
|
17 |
-
numbers and by removing stop words
|
18 |
-
|
19 |
-
Parameters:
|
20 |
-
sentence: str, required -> A raw string which may have stop words, special chars etc.
|
21 |
-
|
22 |
-
Returns:
|
23 |
-
return_txt: str -> A clean string with all aforementioned, removed.
|
24 |
-
"""
|
25 |
-
|
26 |
sentence=sentence.lower().replace('{html}',"")
|
27 |
cleanr = re.compile('<.*?>')
|
28 |
cleantext = re.sub(cleanr, '', sentence)
|
@@ -39,78 +27,15 @@ def preprocess_text(sentence: AnyStr) -> AnyStr:
|
|
39 |
return return_txt
|
40 |
|
41 |
def image_to_np_arr(image) -> np.array:
|
42 |
-
|
43 |
-
"""
|
44 |
-
Function that converts a byte array image into a floating pointer numpy array.
|
45 |
-
|
46 |
-
Parameters:
|
47 |
-
inp_texts: List[str], required -> List of strings, containing item names of a menu in english.
|
48 |
-
|
49 |
-
Returns:
|
50 |
-
np.ndarray
|
51 |
-
"""
|
52 |
-
|
53 |
return np.array(image)
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
Function that processes extracted text by removing numbers and special characters,
|
58 |
-
and filters out text with less than 2 words.
|
59 |
-
|
60 |
-
Parameters:
|
61 |
-
raw_extrc_text: List[Tuple], required -> A list of tuples containing extracted text.
|
62 |
-
|
63 |
-
Returns:
|
64 |
-
List[AnyStr] -> A list of processed text strings.
|
65 |
-
"""
|
66 |
output_texts = []
|
67 |
for _, extr_text, _ in raw_extrc_text:
|
68 |
# remove all numbers, special characters from a string
|
69 |
prcsd_txt = preprocess_text(extr_text)
|
70 |
-
if len(prcsd_txt.split(" ")) >= 2:
|
71 |
-
output_texts.append(prcsd_txt)
|
72 |
-
|
73 |
-
return output_texts
|
74 |
|
75 |
-
|
76 |
|
77 |
-
|
78 |
-
headers = ["Item Name", "Major Ingredients", "Making Process", "Portion and Spice Level", "Pairs With", "Allergens", "Food Type"]
|
79 |
-
|
80 |
-
# Function to clean the strings
|
81 |
-
def clean_string(input_string):
|
82 |
-
parts = input_string.split(',')
|
83 |
-
cleaned_parts = [part.strip() for part in parts if part.strip()]
|
84 |
-
return ', '.join(cleaned_parts)
|
85 |
-
|
86 |
-
for i in range(len(gen_output)):
|
87 |
-
# Find all matches
|
88 |
-
matches = re.findall(header_pattern, gen_output[i])
|
89 |
-
|
90 |
-
# Since re.findall returns a list of tuples, we need to extract the first tuple
|
91 |
-
if matches:
|
92 |
-
result = dict(zip(headers,matches[0]))
|
93 |
-
result['Major Ingredients'] = clean_string(result['Major Ingredients'])
|
94 |
-
|
95 |
-
# if any of dictionary values strings are emtpy, replace it with string "Sorry, can't explain this."
|
96 |
-
for k in result.keys():
|
97 |
-
if len(result[k]) < 3 or any(header in result[k] for header in headers):
|
98 |
-
result[k] = "Sorry, can't explain this."
|
99 |
-
|
100 |
-
gen_output[i] = result
|
101 |
-
|
102 |
-
else:
|
103 |
-
if headers[1] in gen_output[i]:
|
104 |
-
|
105 |
-
gen_output[i] = {"May contain misleading explanation":
|
106 |
-
dots_pattern.sub('' ,
|
107 |
-
gen_output[i].split(headers[1]
|
108 |
-
)[1].strip().replace('</s>', '')
|
109 |
-
)
|
110 |
-
}
|
111 |
-
else:
|
112 |
-
gen_output[i] = {"Sorry, can't explain this item": "NA"}
|
113 |
-
|
114 |
-
gen_output[i].pop('Item Name', None)
|
115 |
-
return gen_output
|
116 |
-
|
|
|
1 |
|
2 |
import numpy as np
|
3 |
+
from typing import List, Tuple, Optional, AnyStr
|
4 |
import nltk
|
5 |
nltk.download("stopwords")
|
6 |
nltk.download('punkt')
|
|
|
11 |
|
12 |
|
13 |
def preprocess_text(sentence: AnyStr) -> AnyStr:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
sentence=sentence.lower().replace('{html}',"")
|
15 |
cleanr = re.compile('<.*?>')
|
16 |
cleantext = re.sub(cleanr, '', sentence)
|
|
|
27 |
return return_txt
|
28 |
|
29 |
def image_to_np_arr(image) -> np.array:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
return np.array(image)
|
31 |
|
32 |
+
def process_extracted_text(raw_extrc_text: List[Tuple]) -> List[AnyStr]:
|
33 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
output_texts = []
|
35 |
for _, extr_text, _ in raw_extrc_text:
|
36 |
# remove all numbers, special characters from a string
|
37 |
prcsd_txt = preprocess_text(extr_text)
|
|
|
|
|
|
|
|
|
38 |
|
39 |
+
if len(prcsd_txt.split(" ") ) > 2: output_texts.append(prcsd_txt)
|
40 |
|
41 |
+
return output_texts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inference/translate.py
CHANGED
@@ -2,54 +2,21 @@ import streamlit as st
|
|
2 |
|
3 |
from inference.preprocess_image import (
|
4 |
image_to_np_arr,
|
5 |
-
process_extracted_text
|
6 |
-
post_process_gen_outputs
|
7 |
)
|
8 |
|
9 |
-
from inference.config import
|
10 |
-
model_inf_inp_prompt,
|
11 |
-
header_pattern,
|
12 |
-
dots_pattern,
|
13 |
-
DEVICE,
|
14 |
-
model_name
|
15 |
-
)
|
16 |
from typing import List, Tuple, Optional, AnyStr, Dict
|
17 |
-
from transformers import
|
18 |
import easyocr
|
19 |
import time
|
20 |
|
21 |
use_gpu = True
|
22 |
-
if DEVICE
|
23 |
-
|
24 |
-
@st.cache_resource
|
25 |
-
def load_models(item_summarizer: AnyStr) -> Tuple:
|
26 |
-
|
27 |
-
"""
|
28 |
-
Function to load the models required for the inference process. Cached to avoid loading the models, every time the function is called.
|
29 |
-
|
30 |
-
Parameters:
|
31 |
-
item_summarizer: str, required -> The LLM model name to be used for item summarization.
|
32 |
-
|
33 |
-
Returns:
|
34 |
-
Tuple -> Tuple containing the required models for the inference process.
|
35 |
-
"""
|
36 |
-
|
37 |
-
# model to extract text from image
|
38 |
-
text_extractor = easyocr.Reader(['en'],
|
39 |
-
gpu = use_gpu
|
40 |
-
)
|
41 |
-
|
42 |
-
# tokenizer and model to generate item summary
|
43 |
-
tokenizer = AutoTokenizer.from_pretrained(item_summarizer)
|
44 |
-
model = AutoModelForCausalLM.from_pretrained(item_summarizer)
|
45 |
-
|
46 |
-
return (text_extractor, tokenizer, model)
|
47 |
-
|
48 |
-
text_extractor,item_tokenizer,item_summarizer = load_models(item_summarizer = model_name)
|
49 |
|
50 |
|
51 |
# Define your extract_filter_img function
|
52 |
-
|
53 |
|
54 |
"""
|
55 |
1. Convert Image to numpy array
|
@@ -81,8 +48,7 @@ async def extract_filter_img(image) -> Dict:
|
|
81 |
if i in ind_add_delays:
|
82 |
time.sleep(0.5)
|
83 |
|
84 |
-
|
85 |
-
else: result = func(result)
|
86 |
|
87 |
status_message.write(end_message)
|
88 |
|
@@ -97,26 +63,42 @@ async def extract_filter_img(image) -> Dict:
|
|
97 |
return result
|
98 |
|
99 |
|
100 |
-
def transcribe_menu_model(
|
|
|
|
|
101 |
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
-
|
118 |
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
def classify_menu_text(extrc_str: List[AnyStr]) -> List[AnyStr]:
|
122 |
return extrc_str
|
|
|
2 |
|
3 |
from inference.preprocess_image import (
|
4 |
image_to_np_arr,
|
5 |
+
process_extracted_text
|
|
|
6 |
)
|
7 |
|
8 |
+
from inference.config import INSTRUCTION_PROMPT, DEVICE
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
from typing import List, Tuple, Optional, AnyStr, Dict
|
10 |
+
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
11 |
import easyocr
|
12 |
import time
|
13 |
|
14 |
use_gpu = True
|
15 |
+
if DEVICE == 'cpu': use_gpu = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
|
18 |
# Define your extract_filter_img function
|
19 |
+
def extract_filter_img(image, text_extractor) -> Dict:
|
20 |
|
21 |
"""
|
22 |
1. Convert Image to numpy array
|
|
|
48 |
if i in ind_add_delays:
|
49 |
time.sleep(0.5)
|
50 |
|
51 |
+
result = func(result)
|
|
|
52 |
|
53 |
status_message.write(end_message)
|
54 |
|
|
|
63 |
return result
|
64 |
|
65 |
|
66 |
+
def transcribe_menu_model(menu_texts: List[AnyStr],
|
67 |
+
text_summarizer = None,
|
68 |
+
text_tokenizer = None) -> Dict:
|
69 |
|
70 |
+
summarized_menu_items = {}
|
71 |
+
|
72 |
+
for mi in menu_texts:
|
73 |
+
if not text_summarizer:
|
74 |
+
raise NotImplementedError(""" """)
|
75 |
+
|
76 |
+
else:
|
77 |
+
prompt_item = INSTRUCTION_PROMPT + " " + mi + """
|
78 |
+
|
79 |
+
|
80 |
+
"""
|
81 |
+
input_ids = text_tokenizer(prompt_item, return_tensors="pt").input_ids
|
82 |
+
|
83 |
+
outputs = text_summarizer.generate(input_ids,
|
84 |
+
max_new_tokens = 512
|
85 |
+
)
|
86 |
+
|
87 |
+
summarized_menu_items[mi] = text_tokenizer.decode(
|
88 |
+
outputs[0],
|
89 |
+
skip_special_tokens = True
|
90 |
+
)
|
91 |
|
92 |
+
return summarized_menu_items
|
93 |
|
94 |
+
def load_models(item_summarizer: AnyStr) -> Tuple:
|
95 |
+
text_extractor = easyocr.Reader(['en'],
|
96 |
+
gpu = use_gpu
|
97 |
+
)
|
98 |
+
tokenizer = T5Tokenizer.from_pretrained(item_summarizer)
|
99 |
+
model = T5ForConditionalGeneration.from_pretrained(item_summarizer)
|
100 |
+
|
101 |
+
return (text_extractor, tokenizer, model)
|
102 |
|
103 |
def classify_menu_text(extrc_str: List[AnyStr]) -> List[AnyStr]:
|
104 |
return extrc_str
|
pages.py
DELETED
@@ -1,214 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
from streamlit import session_state as sst
|
3 |
-
|
4 |
-
|
5 |
-
from utils import navigate_to
|
6 |
-
from inference.config import DEBUG_MODE
|
7 |
-
|
8 |
-
from inference.translate import extract_filter_img, transcribe_menu_model,classify_menu_text
|
9 |
-
from inference.preprocess_image import preprocess_text
|
10 |
-
|
11 |
-
import os
|
12 |
-
import time
|
13 |
-
import pandas as pd
|
14 |
-
from PIL import Image
|
15 |
-
from typing import List
|
16 |
-
import json
|
17 |
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
18 |
-
|
19 |
-
# Setting workers to be 70% of all available virtual cpus in system
|
20 |
-
cpu_count = os.cpu_count()
|
21 |
-
pool = ThreadPoolExecutor(max_workers=int(cpu_count*0.7) )
|
22 |
-
|
23 |
-
# Function that handles logic of explaining menu items from manual input
|
24 |
-
async def manual_input_page():
|
25 |
-
|
26 |
-
"""
|
27 |
-
Function that takes text input from user in input box of streamlit, user can add multiple text boxes and submit finally.
|
28 |
-
|
29 |
-
Parameters:
|
30 |
-
None
|
31 |
-
|
32 |
-
Returns:
|
33 |
-
List[str]: List of strings, containing item names of a menu in english.
|
34 |
-
"""
|
35 |
-
|
36 |
-
st.write("This is the Manual Input Page.")
|
37 |
-
st.write("Once done, click on 'Explain My Menu' button to get explanations for each item ... ")
|
38 |
-
|
39 |
-
inp_texts = []
|
40 |
-
num_text_boxes = st.number_input("Number of text boxes", min_value=1, step=1)
|
41 |
-
for i in range(num_text_boxes):
|
42 |
-
text_box = st.text_input(f"Food item {i+1}")
|
43 |
-
if text_box:
|
44 |
-
inp_texts.append(text_box)
|
45 |
-
|
46 |
-
if len(inp_texts) > 0:
|
47 |
-
|
48 |
-
# Show user submit button only if they have entered some text and set text in session state
|
49 |
-
sst["user_entered_items"] = inp_texts
|
50 |
-
st.button("Explain My Menu",on_click=navigate_to,args=("Inference",))
|
51 |
-
|
52 |
-
else:
|
53 |
-
st.write("Please enter some items to proceed ...")
|
54 |
-
|
55 |
-
|
56 |
-
st.button("Go back Home", on_click=navigate_to, args=("Home",))
|
57 |
-
|
58 |
-
|
59 |
-
# Function that handles logic of explaining menu items from image uploads
|
60 |
-
async def image_input_page():
|
61 |
-
"""
|
62 |
-
Function that contains content of main page i.e., image uploader and submit button to navigate to next page.
|
63 |
-
Upon submit , control goes to model inference 'page'.
|
64 |
-
|
65 |
-
Parameters:
|
66 |
-
None
|
67 |
-
|
68 |
-
Returns:
|
69 |
-
None
|
70 |
-
"""
|
71 |
-
|
72 |
-
st.write("This is the Image Input Page.")
|
73 |
-
|
74 |
-
# Streamlit function to upload an image from any device
|
75 |
-
uploaded_file = st.file_uploader("Choose an image...",
|
76 |
-
type=["jpg", "jpeg", "png"])
|
77 |
-
|
78 |
-
# Remove preivous states' value of input image if it exists
|
79 |
-
sst.pop('input_image', None)
|
80 |
-
|
81 |
-
# Submit button
|
82 |
-
if uploaded_file is not None:
|
83 |
-
image = Image.open(uploaded_file)
|
84 |
-
|
85 |
-
# Only show if user wants to see
|
86 |
-
if st.checkbox('Show Uploaded Image'):
|
87 |
-
st.image(image,
|
88 |
-
caption='Uploaded Image',
|
89 |
-
use_column_width=True)
|
90 |
-
|
91 |
-
sst["input_image"] = image
|
92 |
-
|
93 |
-
# Show user submit button only if they have uploaded an image
|
94 |
-
st.button("Translate My Menu",
|
95 |
-
on_click = navigate_to,
|
96 |
-
args = ("Inference",))
|
97 |
-
|
98 |
-
|
99 |
-
# Warning message to user
|
100 |
-
st.info("""This application is for education purposes only. It uses AI, hence it's dietary
|
101 |
-
recommendations are not to be taken as medical advice, author doesn't bear responsibility
|
102 |
-
for incorrect dietary recommendations. Please proceed with caution.
|
103 |
-
""")
|
104 |
-
|
105 |
-
# if user wants to go back, make sure to reset the session state
|
106 |
-
st.button("Go back Home", on_click=navigate_to, args=("Home",))
|
107 |
-
|
108 |
-
|
109 |
-
# Function that handles model inference
|
110 |
-
async def model_inference_page():
|
111 |
-
|
112 |
-
"""
|
113 |
-
Function that pre-processes input text from state variables, does concurrent inference
|
114 |
-
and toggles state between pages if needed.
|
115 |
-
|
116 |
-
Parameters:
|
117 |
-
None
|
118 |
-
Returns:
|
119 |
-
None
|
120 |
-
|
121 |
-
"""
|
122 |
-
|
123 |
-
second_title = st.empty()
|
124 |
-
second_title.title(" Using ML to explain your menu items ... ")
|
125 |
-
|
126 |
-
# User can either upload an image or enter text manually, we check for both
|
127 |
-
if "input_image" in sst:
|
128 |
-
image = sst["input_image"]
|
129 |
-
|
130 |
-
msg1 = st.empty()
|
131 |
-
msg1.write("Pre-processing and extracting text out of your image ....")
|
132 |
-
# Call the extract_filter_img function
|
133 |
-
filtered_text = await extract_filter_img(image)
|
134 |
-
num_items_detected = len(filtered_text)
|
135 |
-
|
136 |
-
|
137 |
-
if "user_entered_items" in sst:
|
138 |
-
user_text = sst["user_entered_items"]
|
139 |
-
st.write("Pre-processing and filtering text from user input ....")
|
140 |
-
|
141 |
-
filtered_text = [preprocess_text(ut) for ut in user_text]
|
142 |
-
|
143 |
-
num_items_detected = len(filtered_text)
|
144 |
-
|
145 |
-
|
146 |
-
# irrespective of source of user entry , we check if we have any items to process
|
147 |
-
if num_items_detected == 0:
|
148 |
-
st.write("We couldn't detect any menu items ( indian for now ) from your image, please try a different image by going back.")
|
149 |
-
|
150 |
-
elif num_items_detected > 0:
|
151 |
-
st.write(f"Detected {num_items_detected} menu items from your input image ... ")
|
152 |
-
|
153 |
-
msg2 = st.empty()
|
154 |
-
msg2.write("All pre-processing done, transcribing your menu items now ....")
|
155 |
-
st_trans_llm = time.perf_counter()
|
156 |
-
|
157 |
-
await dist_llm_inference(filtered_text)
|
158 |
-
|
159 |
-
msg3 = st.empty()
|
160 |
-
msg3.write("Done transcribing ... ")
|
161 |
-
en_trans_llm = time.perf_counter()
|
162 |
-
|
163 |
-
msg2.empty(); msg3.empty()
|
164 |
-
st.success("Image processed successfully! " )
|
165 |
-
|
166 |
-
# Some basic stats for debug mode
|
167 |
-
if DEBUG_MODE:
|
168 |
-
llm_time_sec = en_trans_llm - st_trans_llm
|
169 |
-
st.write("Time took to summarize by LLM {}".format(llm_time_sec))
|
170 |
-
|
171 |
-
|
172 |
-
# If user clicked in "translate_another" button reset all session state variables and go back to home
|
173 |
-
st.button("Go back Home", on_click=navigate_to, args=("Home",))
|
174 |
-
|
175 |
-
|
176 |
-
# Function that performs LLM inference on a single item
|
177 |
-
async def dist_llm_inference(inp_texts: List[str]) -> None:
|
178 |
-
|
179 |
-
"""
|
180 |
-
Function that performs concurrent LLM inference using threadpool. It displays
|
181 |
-
results of those threads that are done with execution, as a dynamic row to streamlit table, rather than
|
182 |
-
waiting for all threads to be done.
|
183 |
-
|
184 |
-
Parameters:
|
185 |
-
inp_texts: List[str], required -> List of strings, containing item names of a menu in english.
|
186 |
-
|
187 |
-
Returns:
|
188 |
-
None
|
189 |
-
"""
|
190 |
-
|
191 |
-
df = pd.DataFrame([('ITEM NAME', 'EXPLANATION')]
|
192 |
-
)
|
193 |
-
|
194 |
-
sl_table = st.table(df)
|
195 |
-
tp_futures = { pool.submit(transcribe_menu_model, mi): mi for mi in inp_texts }
|
196 |
-
|
197 |
-
for tpftr in as_completed(tp_futures):
|
198 |
-
|
199 |
-
item = tp_futures[tpftr]
|
200 |
-
|
201 |
-
try:
|
202 |
-
exp = tpftr.result()
|
203 |
-
|
204 |
-
|
205 |
-
sl_table.add_rows([(item,
|
206 |
-
str(exp ))
|
207 |
-
]
|
208 |
-
)
|
209 |
-
|
210 |
-
except Exception as e:
|
211 |
-
print("Could not add a new row dynamically, because of this error:", e)
|
212 |
-
|
213 |
-
return
|
214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils.py
DELETED
@@ -1,15 +0,0 @@
|
|
1 |
-
|
2 |
-
from streamlit import session_state as sst
|
3 |
-
def navigate_to(page: str) -> None:
|
4 |
-
"""
|
5 |
-
Function to set the current page in the state of streamlit. A helper for
|
6 |
-
simulating navigation in streamlit.
|
7 |
-
|
8 |
-
Parameters:
|
9 |
-
page: str, required.
|
10 |
-
|
11 |
-
Returns:
|
12 |
-
None
|
13 |
-
"""
|
14 |
-
|
15 |
-
sst["page"] = page
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|