deadshot2003 commited on
Commit
bb887c7
·
verified ·
1 Parent(s): 3be1b51

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +391 -0
app.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pymongo import MongoClient
3
+ import hashlib
4
+ import os
5
+ import easyocr
6
+ import difflib
7
+ from llama_index.core import VectorStoreIndex, Document, Settings
8
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
9
+ from llama_index.llms.mistralai import MistralAI
10
+ from translate import Translator
11
+ from dotenv import load_dotenv
12
+ import re
13
+ import ast
14
+
15
+ # Connect to MongoDB
16
+ client = MongoClient(os.getenv("MONGODB_URI"))
17
+ db = client.Health
18
+ customer_collection = db.customer
19
+ product_collection = db.product
20
+
21
+ # Initialize OCR and LlamaIndex models
22
+ reader = easyocr.Reader(['en'])
23
+ embedding_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
24
+ llm = MistralAI(api_key=os.getenv("MISTRAL_API_KEY"))
25
+
26
+ # Set the embedding model and LLM globally
27
+ Settings.embed_model = embedding_model
28
+ Settings.llm = llm
29
+
30
+ # Function to hash passwords
31
+ def hash_password(password):
32
+ return hashlib.sha256(password.encode()).hexdigest()
33
+
34
+ # Function to check hashed passwords
35
+ def check_password(stored_password, provided_password):
36
+ return stored_password == hash_password(provided_password)
37
+
38
+ # Function to calculate BMI
39
+ def calculate_bmi(weight, height):
40
+ height_in_meters = height / 100
41
+ bmi = weight / (height_in_meters ** 2)
42
+ return round(bmi, 2)
43
+
44
+ # Function to correct OCR mistakes
45
+ nutritional_terms = [
46
+ "calories", "fat", "trans fat", "saturated fat", "cholesterol",
47
+ "sodium", "carbohydrates", "sugar", "protein", "fiber", "vitamin", "iron"
48
+ ]
49
+
50
+ def correct_ocr_mistakes(text):
51
+ corrected_text = []
52
+ for word in text.split():
53
+ closest_match = difflib.get_close_matches(word.lower(), nutritional_terms, n=1, cutoff=0.7)
54
+ corrected_text.append(closest_match[0] if closest_match else word)
55
+ return ' '.join(corrected_text)
56
+
57
+ # Function to fetch user details
58
+ def fetch_user_details(email):
59
+ return customer_collection.find_one({"email": email})
60
+
61
+ # Function to analyze food label and user profile
62
+ def prepare_data_for_rag(ocr_text, user_profile):
63
+ documents = [
64
+ Document(text=f"OCR corrected text from food label: {ocr_text}"),
65
+ Document(text=f"User Profile: {user_profile}")
66
+ ]
67
+ return documents
68
+
69
+ def analyze_with_llama_index(ocr_text, user_profile):
70
+ documents = prepare_data_for_rag(ocr_text, user_profile)
71
+ index = VectorStoreIndex.from_documents(documents)
72
+
73
+ query = query = """
74
+ You are tasked with analyzing the contents of a food label and evaluating its healthiness for a specific user.
75
+
76
+ 1. **Health Rating**:
77
+ - Based on the corrected food label and the user's profile, assign a health rating on a scale from **1 to 10** (where **10** is the healthiest).
78
+ - Consider the user's dietary preferences, health goals, allergies, and activity level in your rating.
79
+ - **If the food contains any ingredients to which the user is allergic (e.g., peanuts for a nut allergy), assign a health rating of 0/10 and include a clear warning and also before doing this be double sure that the food has a substance to which the user is allergic **.
80
+ - If the user should avoid the food altogether, assign a rating from **1 to 4**.
81
+ - If the user should consume the food in moderation, assign a rating from **5 to 7**.
82
+ - If the user can consume the food frequently, assign a rating from **8 to 10**.
83
+
84
+ 2. **Health Analysis**:
85
+ - **Detailed Breakdown**: Present a statistical breakdown of the food's nutritional content (e.g., "The food contains 2% saturated fat, 12g sugar, and 10g protein"). Ensure all terms and values are correctly spelled and reflect the accurate content of the food item.
86
+ - **Personalized Evaluation**: Explain why the food item is either good or bad for the user based on their specific health profile. Identify any ingredients or nutritional aspects that align well or poorly with the user's dietary needs (e.g., "This food is high in sugar, which may not align with your goal of maintaining stable blood sugar levels"). **If the food contains an allergen, make sure to emphasize the risk for the user**.
87
+ - **Advice**: Provide guidance on whether the user should consume this food frequently, in moderation, or avoid it altogether, considering their health goals, dietary restrictions, and any allergens. **If the food contains an allergen, recommend avoiding it entirely and issue a warning in the conclusion**.
88
+
89
+ Ensure that the output is free from spelling mistakes and important points or warnings are clearly communicated with **bold keywords** to highlight relevant details.
90
+ """
91
+
92
+ query_engine = index.as_query_engine()
93
+ response = query_engine.query(query)
94
+
95
+ return response.response
96
+
97
+ # Function for food label analysis
98
+ def analyze_food_label(image_path, email):
99
+ result = reader.readtext(image_path)
100
+ ocr_text = ' '.join([res[1] for res in result])
101
+ corrected_text = correct_ocr_mistakes(ocr_text)
102
+
103
+ user = fetch_user_details(email)
104
+ if user:
105
+ user_profile = {
106
+ "BMI": user.get("bmi", "Not provided"),
107
+ "Allergies": ', '.join(user.get("allergies", [])),
108
+ "Health Conditions": user.get("health_conditions", "None"),
109
+ "Dietary Preferences": user.get("dietary_preferences", "None"),
110
+ "Activity Level": user.get("activity_level", "Moderate"),
111
+ "Health Goals": user.get("health_goals", "General well-being")
112
+ }
113
+
114
+ llama_output = analyze_with_llama_index(corrected_text, user_profile)
115
+ return llama_output
116
+ else:
117
+ return None
118
+
119
+ def update_user_profile(email, updated_data):
120
+ customer_collection.update_one({"email": email}, {"$set": updated_data})
121
+
122
+ # Function to translate text using Google Translate API
123
+ # Function to translate text using the translate library
124
+ def translate_text(text, target_lang):
125
+ translator = Translator(to_lang=target_lang)
126
+ max_length = 500
127
+ translated_text = ""
128
+
129
+ try:
130
+ # Split the text into chunks of max_length characters
131
+ chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
132
+
133
+ # Translate each chunk separately
134
+ for chunk in chunks:
135
+ translation = translator.translate(chunk)
136
+ translated_text += translation
137
+
138
+ return translated_text
139
+ except Exception as e:
140
+ return f"Translation error: {str(e)}"
141
+
142
+
143
+ # Function to update product database
144
+ def update_product_database(ocr_text, product_type, consumption_frequency):
145
+ documents = [Document(text=f"OCR text from food label: {ocr_text}")]
146
+ index = VectorStoreIndex.from_documents(documents)
147
+ query_engine = index.as_query_engine()
148
+
149
+ query = """
150
+ You are tasked with correcting and structuring the OCR text from a food label. Please:
151
+ 1. Correct any spelling mistakes or grammatical errors in the OCR text.
152
+ 2. Extract and structure the following information:
153
+ - Product Name
154
+ - Brand Name
155
+ - Weight in Grams/ML
156
+ - Nutritional information: Include the serving size (e.g., "per 100g", "per 200ml") as specified on the label. If multiple serving sizes are given, use the one that provides the most comprehensive nutritional breakdown.
157
+ - Ingredients
158
+ - Product Category
159
+ - Proprietary Claims: Include any claims such as "sugar-free", "low-fat", etc. If no such claims are present, leave this field empty.
160
+ 3. Present the information as a Python dictionary. The 'Nutritional information' should be a nested dictionary with the serving size as the key and the nutritional details as the value. Do not include any additional text, markdown formatting, or code blocks. Just return the dictionary.
161
+
162
+ Example format:
163
+ {
164
+ "Product Name": "Example Cereal",
165
+ "Brand Name": "HealthyBrands",
166
+ "Weight": "500g",
167
+ "Nutritional information": {
168
+ "per 100g": {
169
+ "Energy": "370kcal",
170
+ "Protein": "8g",
171
+ "Carbohydrates": "80g",
172
+ "Fat": "2g"
173
+ }
174
+ },
175
+ "Ingredients": "Whole grain wheat, sugar, salt",
176
+ "Product Category": "Breakfast Cereal",
177
+ "Proprietary Claims": "High in fiber, Low in fat"
178
+ }
179
+
180
+ If certain information is not available in the OCR text, use "Not specified" as the value for that key.
181
+ """
182
+
183
+ response = query_engine.query(query)
184
+
185
+ # Extract the dictionary from the response
186
+ dict_match = re.search(r'\{.*\}', response.response, re.DOTALL)
187
+ if dict_match:
188
+ try:
189
+ product_info = ast.literal_eval(dict_match.group())
190
+ except:
191
+ st.error("Failed to parse the AI response. Please try again.")
192
+ return
193
+ else:
194
+ st.error("Could not extract product information from the AI response. Please try again.")
195
+ return
196
+
197
+ product_info['product_type'] = product_type
198
+ product_info['consumption_frequency'] = consumption_frequency
199
+ product_collection.insert_one(product_info)
200
+ st.success("Product information successfully added to the database.")
201
+ # Streamlit UI
202
+ st.title("Health & Nutrition Analyzer")
203
+
204
+ # Initialize session state
205
+ if "logged_in" not in st.session_state:
206
+ st.session_state.logged_in = False
207
+ if "user_email" not in st.session_state:
208
+ st.session_state.user_email = None
209
+ if "analysis_result" not in st.session_state:
210
+ st.session_state.analysis_result = None
211
+
212
+ # Main application logic
213
+ if st.session_state.logged_in:
214
+ st.success(f"Welcome back, {st.session_state.user_email}!")
215
+
216
+ if st.button("Logout"):
217
+ st.session_state.logged_in = False
218
+ st.session_state.user_email = None
219
+ st.session_state.analysis_result = None
220
+ st.rerun()
221
+
222
+ # Profile Update Section
223
+ st.subheader("Update Your Profile")
224
+ update_profile = st.checkbox("Edit Profile")
225
+
226
+ if update_profile:
227
+ user = fetch_user_details(st.session_state.user_email)
228
+ with st.form("profile_update_form"):
229
+ name = st.text_input("Name", value=user.get('name', ''))
230
+ age = st.number_input("Age", value=user.get('age', 0), min_value=1, max_value=120)
231
+ height = st.number_input("Height (in cm)", value=user.get('height', 0), min_value=50, max_value=250)
232
+ weight = st.number_input("Weight (in kg)", value=user.get('weight', 0), min_value=10, max_value=300)
233
+ allergies = st.text_input("Allergies", value=user.get('allergies', ''))
234
+ health_conditions = st.text_input("Health Conditions", value=user.get('health_conditions', ''))
235
+ activity_level = st.selectbox("Activity Level", ["Low", "Moderate", "High"], index=["Low", "Moderate", "High"].index(user.get('activity_level', 'Moderate')))
236
+ dietary_preferences = st.multiselect("Dietary Preferences", ["Vegetarian", "Vegan", "Gluten-Free", "Keto", "Paleo", "No preference"], default=user.get('dietary_preferences', []))
237
+ health_goals = st.selectbox("Health Goals", ["Lose weight", "Gain muscle", "Maintain weight", "Improve stamina", "General well-being"], index=["Lose weight", "Gain muscle", "Maintain weight", "Improve stamina", "General well-being"].index(user.get('health_goals', 'General well-being')))
238
+
239
+ update_submitted = st.form_submit_button("Update Profile")
240
+
241
+ if update_submitted:
242
+ bmi = calculate_bmi(weight, height)
243
+ updated_data = {
244
+ "name": name,
245
+ "age": age,
246
+ "height": height,
247
+ "weight": weight,
248
+ "bmi": bmi,
249
+ "allergies": allergies,
250
+ "health_conditions": health_conditions,
251
+ "activity_level": activity_level,
252
+ "dietary_preferences": dietary_preferences,
253
+ "health_goals": health_goals
254
+ }
255
+ update_user_profile(st.session_state.user_email, updated_data)
256
+ st.success(f"Profile updated successfully! Your new BMI is {bmi}.")
257
+ st.rerun()
258
+
259
+ # Food Label Analysis Section
260
+ st.subheader("Upload Food Label for Analysis")
261
+ uploaded_file = st.file_uploader("Upload Food Label Image", type=["jpg", "jpeg", "png"])
262
+
263
+ if uploaded_file:
264
+ image_path = os.path.join("temp", uploaded_file.name)
265
+ os.makedirs("temp", exist_ok=True)
266
+ with open(image_path, "wb") as f:
267
+ f.write(uploaded_file.getbuffer())
268
+
269
+ st.image(image_path, caption="Uploaded Food Label", use_column_width=True)
270
+
271
+ if st.button("Analyze Food Label"):
272
+ with st.spinner("Analyzing food label..."):
273
+ result = analyze_food_label(image_path, st.session_state.user_email)
274
+ if result:
275
+ st.session_state.analysis_result = result
276
+ st.write(result)
277
+ else:
278
+ st.error("Error analyzing food label. Please try again.")
279
+
280
+ # Translation Section
281
+ if st.session_state.analysis_result:
282
+ st.subheader("Translate Analysis")
283
+ languages = {
284
+ "Hindi": "hi", "Bengali": "bn", "Telugu": "te", "Marathi": "mr", "Tamil": "ta",
285
+ "Urdu": "ur", "Gujarati": "gu", "Kannada": "kn", "Odia": "or", "Malayalam": "ml",
286
+ "Spanish": "es", "French": "fr", "German": "de", "Chinese": "zh", "Japanese": "ja"
287
+ }
288
+ target_lang = st.selectbox("Select language for translation:", list(languages.keys()))
289
+
290
+ if st.button("Translate"):
291
+ with st.spinner("Translating..."):
292
+ translated_result = translate_text(st.session_state.analysis_result, languages[target_lang])
293
+ if translated_result.startswith("Translation error"):
294
+ st.error(translated_result)
295
+ else:
296
+ st.subheader(f"Translated Analysis ({target_lang}):")
297
+ st.write(translated_result)
298
+
299
+ else:
300
+ # Login and Registration Section
301
+ st.subheader("Login or Register")
302
+
303
+ tab1, tab2 = st.tabs(["Login", "Register"])
304
+
305
+ with tab1:
306
+ st.subheader("User Login")
307
+ email = st.text_input("Email", key="login_email")
308
+ password = st.text_input("Password", type="password", key="login_password")
309
+
310
+ if st.button("Login"):
311
+ user = fetch_user_details(email)
312
+ if user and check_password(user['password'], password):
313
+ st.session_state.logged_in = True
314
+ st.session_state.user_email = email
315
+ st.success(f"Welcome back, {user['name']}!")
316
+ st.rerun()
317
+ else:
318
+ st.error("Invalid email or password!")
319
+
320
+ with tab2:
321
+ st.subheader("User Registration")
322
+ with st.form("registration_form"):
323
+ name = st.text_input("Name *", key="register_name")
324
+ email = st.text_input("Email *", key="register_email")
325
+ password = st.text_input("Password *", type="password", key="register_password")
326
+ confirm_password = st.text_input("Confirm Password *", type="password", key="confirm_password")
327
+ age = st.number_input("Age *", min_value=1, max_value=120, key="register_age")
328
+ height = st.number_input("Height (in cm) *", min_value=50, max_value=250, key="register_height")
329
+ weight = st.number_input("Weight (in kg) *", min_value=10, max_value=300, key="register_weight")
330
+ allergies = st.text_input("Allergies (if any)", key="register_allergies")
331
+ health_conditions = st.text_input("Health Conditions (if any)", key="register_health_conditions")
332
+ activity_level = st.selectbox("Activity Level", ["Low", "Moderate", "High"], key="register_activity_level")
333
+ dietary_preferences = st.multiselect("Dietary Preferences", ["Vegetarian", "Vegan", "Gluten-Free", "Keto", "Paleo", "No preference"], key="register_dietary_preferences")
334
+ health_goals = st.selectbox("Health Goals", ["Lose weight", "Gain muscle", "Maintain weight", "Improve stamina", "General well-being"], key="register_health_goals")
335
+
336
+ submitted = st.form_submit_button("Register")
337
+
338
+ if submitted:
339
+ if password != confirm_password:
340
+ st.error("Passwords do not match!")
341
+ else:
342
+ bmi = calculate_bmi(weight, height)
343
+ user_data = {
344
+ "name": name,
345
+ "email": email,
346
+ "password": hash_password(password),
347
+ "age": age,
348
+ "height": height,
349
+ "weight": weight,
350
+ "bmi": bmi,
351
+ "allergies": allergies,
352
+ "health_conditions": health_conditions,
353
+ "activity_level": activity_level,
354
+ "dietary_preferences": dietary_preferences,
355
+ "health_goals": health_goals
356
+ }
357
+ customer_collection.insert_one(user_data)
358
+ st.success(f"Registration successful! Your BMI is {bmi}.")
359
+
360
+ # Database Contribution Section
361
+
362
+ contribute_toggle = st.toggle("Help us? Contribute to our database")
363
+
364
+ if contribute_toggle:
365
+ st.subheader("Contribute to Food Label Database")
366
+
367
+ # Use st.form to prevent rerun on every input change
368
+ with st.form("contribution_form"):
369
+ uploaded_file = st.file_uploader("Upload Food Label Image", type=["jpg", "jpeg", "png"], key="contribute_upload")
370
+ product_type = st.selectbox("Product Type", ["Nutritional", "Regular", "Recreational"])
371
+ consumption_frequency = st.selectbox("Consumption Frequency", ["Daily", "Weekly", "Monthly"])
372
+ submit_button = st.form_submit_button("Process and Submit Label")
373
+
374
+ if submit_button and uploaded_file:
375
+ with st.spinner("Processing and submitting food label..."):
376
+ image_path = os.path.join("temp", uploaded_file.name)
377
+ os.makedirs("temp", exist_ok=True)
378
+ with open(image_path, "wb") as f:
379
+ f.write(uploaded_file.getbuffer())
380
+
381
+ # Perform OCR
382
+ result = reader.readtext(image_path)
383
+ ocr_text = ' '.join([res[1] for res in result])
384
+
385
+ # Update database
386
+ update_product_database(ocr_text, product_type, consumption_frequency)
387
+ st.success("Thank you for contributing to our database!")
388
+
389
+ # Clear the form
390
+ #st.session_state['contribute_upload'] = None
391
+ #st.experimental_rerun()