Spaces:
Running
Running
import os | |
# from dotenv import load_dotenv | |
import streamlit as st | |
import PIL.Image | |
import google.generativeai as genai | |
from langchain.prompts import ChatPromptTemplate | |
from langchain_community.llms import Ollama | |
from transformers import MllamaForConditionalGeneration, AutoProcessor | |
import torch | |
from accelerate import init_empty_weights | |
# Load environment variables | |
from transformers import AutoProcessor, AutoModelForPreTraining | |
# Configure Gemini API | |
# genai.configure(api_key=os.getenv("gkey2")) | |
# Define the prompt template | |
# prompt = ChatPromptTemplate.from_messages( | |
# [ | |
# ("system", "You are a helpful assistant. Please respond to the user's queries."), | |
# ("user", "Question: {question}") | |
# ] | |
# ) | |
prompt="<|image|><|begin_of_text|>You are a helpful assistant. Please respond to the user's queries." | |
# Initialize the Llama model | |
# model = Ollama(model="llama3.2") | |
model_id = "meta-llama/Llama-3.2-11B-Vision" | |
# model = MllamaForConditionalGeneration.from_pretrained( | |
# model_id, | |
# torch_dtype=torch.bfloat16, | |
# device_map="auto", | |
# ) | |
# processor = AutoProcessor.from_pretrained(model_id) | |
processor = AutoProcessor.from_pretrained("meta-llama/Llama-3.2-11B-Vision") | |
model = AutoModelForPreTraining.from_pretrained("meta-llama/Llama-3.2-11B-Vision") | |
# Define function to get response from the model | |
def get_gemin_response(input_text, img): | |
# complete_prompt = prompt.format(question=input_text) | |
inputs = processor(images=img, text=prompt, return_tensors="pt").to(model.device) | |
response=model.generate(**inputs, max_new_tokens=30) | |
# if input_text != "": | |
# # Only generate content from input text if present | |
# response = model.generate([input_text]) | |
# else: | |
# response = model.generate([img_text]) | |
return response | |
# Define the main function for the Streamlit app | |
def main(): | |
st.set_page_config(page_title='Gemini Image & Text') | |
st.header('Gemini LLM Application') | |
# Input text | |
input_text = st.text_input("Input :", key='input') | |
# Image uploader | |
imgupload = st.file_uploader('Choose an image file', type=['jpg', 'jpeg', 'png']) | |
# Display uploaded image and convert to text format (if needed) | |
img_text = "" | |
if imgupload is not None: | |
img = PIL.Image.open(imgupload) | |
st.image(img, caption='Uploaded Image', use_column_width=True) | |
img_text = "Image uploaded successfully." | |
if st.button('Generate Response'): | |
# Ensure both inputs are provided | |
if img is not None and input_text: | |
# Get response from the model | |
response = get_gemin_response(input_text, img) | |
st.write(processor.decode(response[0])) | |
else: | |
st.error("Please provide both input text and an image before generating a response.") | |
# Run the app | |
if __name__ == "__main__": | |
main() |