import os # from dotenv import load_dotenv import streamlit as st import PIL.Image import google.generativeai as genai from langchain.prompts import ChatPromptTemplate from langchain_community.llms import Ollama from transformers import MllamaForConditionalGeneration, AutoProcessor import torch from accelerate import init_empty_weights # Load environment variables from transformers import AutoProcessor, AutoModelForPreTraining # Configure Gemini API # genai.configure(api_key=os.getenv("gkey2")) # Define the prompt template # prompt = ChatPromptTemplate.from_messages( # [ # ("system", "You are a helpful assistant. Please respond to the user's queries."), # ("user", "Question: {question}") # ] # ) prompt="<|image|><|begin_of_text|>You are a helpful assistant. Please respond to the user's queries." # Initialize the Llama model # model = Ollama(model="llama3.2") model_id = "meta-llama/Llama-3.2-11B-Vision" # model = MllamaForConditionalGeneration.from_pretrained( # model_id, # torch_dtype=torch.bfloat16, # device_map="auto", # ) # processor = AutoProcessor.from_pretrained(model_id) processor = AutoProcessor.from_pretrained("meta-llama/Llama-3.2-11B-Vision") model = AutoModelForPreTraining.from_pretrained("meta-llama/Llama-3.2-11B-Vision") # Define function to get response from the model def get_gemin_response(input_text, img): # complete_prompt = prompt.format(question=input_text) inputs = processor(images=img, text=prompt, return_tensors="pt").to(model.device) response=model.generate(**inputs, max_new_tokens=30) # if input_text != "": # # Only generate content from input text if present # response = model.generate([input_text]) # else: # response = model.generate([img_text]) return response # Define the main function for the Streamlit app def main(): st.set_page_config(page_title='Gemini Image & Text') st.header('Gemini LLM Application') # Input text input_text = st.text_input("Input :", key='input') # Image uploader imgupload = st.file_uploader('Choose an image file', type=['jpg', 'jpeg', 'png']) # Display uploaded image and convert to text format (if needed) img_text = "" if imgupload is not None: img = PIL.Image.open(imgupload) st.image(img, caption='Uploaded Image', use_column_width=True) img_text = "Image uploaded successfully." if st.button('Generate Response'): # Ensure both inputs are provided if img is not None and input_text: # Get response from the model response = get_gemin_response(input_text, img) st.write(processor.decode(response[0])) else: st.error("Please provide both input text and an image before generating a response.") # Run the app if __name__ == "__main__": main()