deadshot2003's picture
Create app.py
bb887c7 verified
raw
history blame
18.4 kB
import streamlit as st
from pymongo import MongoClient
import hashlib
import os
import easyocr
import difflib
from llama_index.core import VectorStoreIndex, Document, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.mistralai import MistralAI
from translate import Translator
from dotenv import load_dotenv
import re
import ast
# Connect to MongoDB
client = MongoClient(os.getenv("MONGODB_URI"))
db = client.Health
customer_collection = db.customer
product_collection = db.product
# Initialize OCR and LlamaIndex models
reader = easyocr.Reader(['en'])
embedding_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
llm = MistralAI(api_key=os.getenv("MISTRAL_API_KEY"))
# Set the embedding model and LLM globally
Settings.embed_model = embedding_model
Settings.llm = llm
# Function to hash passwords
def hash_password(password):
return hashlib.sha256(password.encode()).hexdigest()
# Function to check hashed passwords
def check_password(stored_password, provided_password):
return stored_password == hash_password(provided_password)
# Function to calculate BMI
def calculate_bmi(weight, height):
height_in_meters = height / 100
bmi = weight / (height_in_meters ** 2)
return round(bmi, 2)
# Function to correct OCR mistakes
nutritional_terms = [
"calories", "fat", "trans fat", "saturated fat", "cholesterol",
"sodium", "carbohydrates", "sugar", "protein", "fiber", "vitamin", "iron"
]
def correct_ocr_mistakes(text):
corrected_text = []
for word in text.split():
closest_match = difflib.get_close_matches(word.lower(), nutritional_terms, n=1, cutoff=0.7)
corrected_text.append(closest_match[0] if closest_match else word)
return ' '.join(corrected_text)
# Function to fetch user details
def fetch_user_details(email):
return customer_collection.find_one({"email": email})
# Function to analyze food label and user profile
def prepare_data_for_rag(ocr_text, user_profile):
documents = [
Document(text=f"OCR corrected text from food label: {ocr_text}"),
Document(text=f"User Profile: {user_profile}")
]
return documents
def analyze_with_llama_index(ocr_text, user_profile):
documents = prepare_data_for_rag(ocr_text, user_profile)
index = VectorStoreIndex.from_documents(documents)
query = query = """
You are tasked with analyzing the contents of a food label and evaluating its healthiness for a specific user.
1. **Health Rating**:
- Based on the corrected food label and the user's profile, assign a health rating on a scale from **1 to 10** (where **10** is the healthiest).
- Consider the user's dietary preferences, health goals, allergies, and activity level in your rating.
- **If the food contains any ingredients to which the user is allergic (e.g., peanuts for a nut allergy), assign a health rating of 0/10 and include a clear warning and also before doing this be double sure that the food has a substance to which the user is allergic **.
- If the user should avoid the food altogether, assign a rating from **1 to 4**.
- If the user should consume the food in moderation, assign a rating from **5 to 7**.
- If the user can consume the food frequently, assign a rating from **8 to 10**.
2. **Health Analysis**:
- **Detailed Breakdown**: Present a statistical breakdown of the food's nutritional content (e.g., "The food contains 2% saturated fat, 12g sugar, and 10g protein"). Ensure all terms and values are correctly spelled and reflect the accurate content of the food item.
- **Personalized Evaluation**: Explain why the food item is either good or bad for the user based on their specific health profile. Identify any ingredients or nutritional aspects that align well or poorly with the user's dietary needs (e.g., "This food is high in sugar, which may not align with your goal of maintaining stable blood sugar levels"). **If the food contains an allergen, make sure to emphasize the risk for the user**.
- **Advice**: Provide guidance on whether the user should consume this food frequently, in moderation, or avoid it altogether, considering their health goals, dietary restrictions, and any allergens. **If the food contains an allergen, recommend avoiding it entirely and issue a warning in the conclusion**.
Ensure that the output is free from spelling mistakes and important points or warnings are clearly communicated with **bold keywords** to highlight relevant details.
"""
query_engine = index.as_query_engine()
response = query_engine.query(query)
return response.response
# Function for food label analysis
def analyze_food_label(image_path, email):
result = reader.readtext(image_path)
ocr_text = ' '.join([res[1] for res in result])
corrected_text = correct_ocr_mistakes(ocr_text)
user = fetch_user_details(email)
if user:
user_profile = {
"BMI": user.get("bmi", "Not provided"),
"Allergies": ', '.join(user.get("allergies", [])),
"Health Conditions": user.get("health_conditions", "None"),
"Dietary Preferences": user.get("dietary_preferences", "None"),
"Activity Level": user.get("activity_level", "Moderate"),
"Health Goals": user.get("health_goals", "General well-being")
}
llama_output = analyze_with_llama_index(corrected_text, user_profile)
return llama_output
else:
return None
def update_user_profile(email, updated_data):
customer_collection.update_one({"email": email}, {"$set": updated_data})
# Function to translate text using Google Translate API
# Function to translate text using the translate library
def translate_text(text, target_lang):
translator = Translator(to_lang=target_lang)
max_length = 500
translated_text = ""
try:
# Split the text into chunks of max_length characters
chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
# Translate each chunk separately
for chunk in chunks:
translation = translator.translate(chunk)
translated_text += translation
return translated_text
except Exception as e:
return f"Translation error: {str(e)}"
# Function to update product database
def update_product_database(ocr_text, product_type, consumption_frequency):
documents = [Document(text=f"OCR text from food label: {ocr_text}")]
index = VectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
query = """
You are tasked with correcting and structuring the OCR text from a food label. Please:
1. Correct any spelling mistakes or grammatical errors in the OCR text.
2. Extract and structure the following information:
- Product Name
- Brand Name
- Weight in Grams/ML
- Nutritional information: Include the serving size (e.g., "per 100g", "per 200ml") as specified on the label. If multiple serving sizes are given, use the one that provides the most comprehensive nutritional breakdown.
- Ingredients
- Product Category
- Proprietary Claims: Include any claims such as "sugar-free", "low-fat", etc. If no such claims are present, leave this field empty.
3. Present the information as a Python dictionary. The 'Nutritional information' should be a nested dictionary with the serving size as the key and the nutritional details as the value. Do not include any additional text, markdown formatting, or code blocks. Just return the dictionary.
Example format:
{
"Product Name": "Example Cereal",
"Brand Name": "HealthyBrands",
"Weight": "500g",
"Nutritional information": {
"per 100g": {
"Energy": "370kcal",
"Protein": "8g",
"Carbohydrates": "80g",
"Fat": "2g"
}
},
"Ingredients": "Whole grain wheat, sugar, salt",
"Product Category": "Breakfast Cereal",
"Proprietary Claims": "High in fiber, Low in fat"
}
If certain information is not available in the OCR text, use "Not specified" as the value for that key.
"""
response = query_engine.query(query)
# Extract the dictionary from the response
dict_match = re.search(r'\{.*\}', response.response, re.DOTALL)
if dict_match:
try:
product_info = ast.literal_eval(dict_match.group())
except:
st.error("Failed to parse the AI response. Please try again.")
return
else:
st.error("Could not extract product information from the AI response. Please try again.")
return
product_info['product_type'] = product_type
product_info['consumption_frequency'] = consumption_frequency
product_collection.insert_one(product_info)
st.success("Product information successfully added to the database.")
# Streamlit UI
st.title("Health & Nutrition Analyzer")
# Initialize session state
if "logged_in" not in st.session_state:
st.session_state.logged_in = False
if "user_email" not in st.session_state:
st.session_state.user_email = None
if "analysis_result" not in st.session_state:
st.session_state.analysis_result = None
# Main application logic
if st.session_state.logged_in:
st.success(f"Welcome back, {st.session_state.user_email}!")
if st.button("Logout"):
st.session_state.logged_in = False
st.session_state.user_email = None
st.session_state.analysis_result = None
st.rerun()
# Profile Update Section
st.subheader("Update Your Profile")
update_profile = st.checkbox("Edit Profile")
if update_profile:
user = fetch_user_details(st.session_state.user_email)
with st.form("profile_update_form"):
name = st.text_input("Name", value=user.get('name', ''))
age = st.number_input("Age", value=user.get('age', 0), min_value=1, max_value=120)
height = st.number_input("Height (in cm)", value=user.get('height', 0), min_value=50, max_value=250)
weight = st.number_input("Weight (in kg)", value=user.get('weight', 0), min_value=10, max_value=300)
allergies = st.text_input("Allergies", value=user.get('allergies', ''))
health_conditions = st.text_input("Health Conditions", value=user.get('health_conditions', ''))
activity_level = st.selectbox("Activity Level", ["Low", "Moderate", "High"], index=["Low", "Moderate", "High"].index(user.get('activity_level', 'Moderate')))
dietary_preferences = st.multiselect("Dietary Preferences", ["Vegetarian", "Vegan", "Gluten-Free", "Keto", "Paleo", "No preference"], default=user.get('dietary_preferences', []))
health_goals = st.selectbox("Health Goals", ["Lose weight", "Gain muscle", "Maintain weight", "Improve stamina", "General well-being"], index=["Lose weight", "Gain muscle", "Maintain weight", "Improve stamina", "General well-being"].index(user.get('health_goals', 'General well-being')))
update_submitted = st.form_submit_button("Update Profile")
if update_submitted:
bmi = calculate_bmi(weight, height)
updated_data = {
"name": name,
"age": age,
"height": height,
"weight": weight,
"bmi": bmi,
"allergies": allergies,
"health_conditions": health_conditions,
"activity_level": activity_level,
"dietary_preferences": dietary_preferences,
"health_goals": health_goals
}
update_user_profile(st.session_state.user_email, updated_data)
st.success(f"Profile updated successfully! Your new BMI is {bmi}.")
st.rerun()
# Food Label Analysis Section
st.subheader("Upload Food Label for Analysis")
uploaded_file = st.file_uploader("Upload Food Label Image", type=["jpg", "jpeg", "png"])
if uploaded_file:
image_path = os.path.join("temp", uploaded_file.name)
os.makedirs("temp", exist_ok=True)
with open(image_path, "wb") as f:
f.write(uploaded_file.getbuffer())
st.image(image_path, caption="Uploaded Food Label", use_column_width=True)
if st.button("Analyze Food Label"):
with st.spinner("Analyzing food label..."):
result = analyze_food_label(image_path, st.session_state.user_email)
if result:
st.session_state.analysis_result = result
st.write(result)
else:
st.error("Error analyzing food label. Please try again.")
# Translation Section
if st.session_state.analysis_result:
st.subheader("Translate Analysis")
languages = {
"Hindi": "hi", "Bengali": "bn", "Telugu": "te", "Marathi": "mr", "Tamil": "ta",
"Urdu": "ur", "Gujarati": "gu", "Kannada": "kn", "Odia": "or", "Malayalam": "ml",
"Spanish": "es", "French": "fr", "German": "de", "Chinese": "zh", "Japanese": "ja"
}
target_lang = st.selectbox("Select language for translation:", list(languages.keys()))
if st.button("Translate"):
with st.spinner("Translating..."):
translated_result = translate_text(st.session_state.analysis_result, languages[target_lang])
if translated_result.startswith("Translation error"):
st.error(translated_result)
else:
st.subheader(f"Translated Analysis ({target_lang}):")
st.write(translated_result)
else:
# Login and Registration Section
st.subheader("Login or Register")
tab1, tab2 = st.tabs(["Login", "Register"])
with tab1:
st.subheader("User Login")
email = st.text_input("Email", key="login_email")
password = st.text_input("Password", type="password", key="login_password")
if st.button("Login"):
user = fetch_user_details(email)
if user and check_password(user['password'], password):
st.session_state.logged_in = True
st.session_state.user_email = email
st.success(f"Welcome back, {user['name']}!")
st.rerun()
else:
st.error("Invalid email or password!")
with tab2:
st.subheader("User Registration")
with st.form("registration_form"):
name = st.text_input("Name *", key="register_name")
email = st.text_input("Email *", key="register_email")
password = st.text_input("Password *", type="password", key="register_password")
confirm_password = st.text_input("Confirm Password *", type="password", key="confirm_password")
age = st.number_input("Age *", min_value=1, max_value=120, key="register_age")
height = st.number_input("Height (in cm) *", min_value=50, max_value=250, key="register_height")
weight = st.number_input("Weight (in kg) *", min_value=10, max_value=300, key="register_weight")
allergies = st.text_input("Allergies (if any)", key="register_allergies")
health_conditions = st.text_input("Health Conditions (if any)", key="register_health_conditions")
activity_level = st.selectbox("Activity Level", ["Low", "Moderate", "High"], key="register_activity_level")
dietary_preferences = st.multiselect("Dietary Preferences", ["Vegetarian", "Vegan", "Gluten-Free", "Keto", "Paleo", "No preference"], key="register_dietary_preferences")
health_goals = st.selectbox("Health Goals", ["Lose weight", "Gain muscle", "Maintain weight", "Improve stamina", "General well-being"], key="register_health_goals")
submitted = st.form_submit_button("Register")
if submitted:
if password != confirm_password:
st.error("Passwords do not match!")
else:
bmi = calculate_bmi(weight, height)
user_data = {
"name": name,
"email": email,
"password": hash_password(password),
"age": age,
"height": height,
"weight": weight,
"bmi": bmi,
"allergies": allergies,
"health_conditions": health_conditions,
"activity_level": activity_level,
"dietary_preferences": dietary_preferences,
"health_goals": health_goals
}
customer_collection.insert_one(user_data)
st.success(f"Registration successful! Your BMI is {bmi}.")
# Database Contribution Section
contribute_toggle = st.toggle("Help us? Contribute to our database")
if contribute_toggle:
st.subheader("Contribute to Food Label Database")
# Use st.form to prevent rerun on every input change
with st.form("contribution_form"):
uploaded_file = st.file_uploader("Upload Food Label Image", type=["jpg", "jpeg", "png"], key="contribute_upload")
product_type = st.selectbox("Product Type", ["Nutritional", "Regular", "Recreational"])
consumption_frequency = st.selectbox("Consumption Frequency", ["Daily", "Weekly", "Monthly"])
submit_button = st.form_submit_button("Process and Submit Label")
if submit_button and uploaded_file:
with st.spinner("Processing and submitting food label..."):
image_path = os.path.join("temp", uploaded_file.name)
os.makedirs("temp", exist_ok=True)
with open(image_path, "wb") as f:
f.write(uploaded_file.getbuffer())
# Perform OCR
result = reader.readtext(image_path)
ocr_text = ' '.join([res[1] for res in result])
# Update database
update_product_database(ocr_text, product_type, consumption_frequency)
st.success("Thank you for contributing to our database!")
# Clear the form
#st.session_state['contribute_upload'] = None
#st.experimental_rerun()