|
import os |
|
import streamlit as st |
|
from huggingface_hub import login |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from PIL import Image |
|
import requests |
|
import torch |
|
|
|
|
|
huggingface_token = os.getenv("HUGGINGFACE_TOKEN") |
|
if huggingface_token: |
|
login(token=huggingface_token) |
|
else: |
|
st.error("Hugging Face token not found. Please set it in the Secrets section.") |
|
|
|
|
|
model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct" |
|
try: |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
st.success("Model loaded successfully!") |
|
except Exception as e: |
|
st.error(f"Error loading model: {str(e)}") |
|
|
|
|
|
def main(): |
|
st.title("Llama 3.2 11B Vision Model") |
|
st.write("Upload an image and enter a prompt to generate output.") |
|
|
|
|
|
image_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) |
|
prompt = st.text_area("Enter your prompt here:") |
|
|
|
if st.button("Generate Output"): |
|
if image_file and prompt: |
|
|
|
image = Image.open(image_file) |
|
st.image(image, caption="Uploaded Image", use_column_width=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
|
inputs = tokenizer(prompt, return_tensors='pt') |
|
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
model_output = model.generate(**inputs) |
|
|
|
|
|
output_text = tokenizer.decode(model_output[0], skip_special_tokens=True) |
|
st.write("Generated Output:", output_text) |
|
except Exception as e: |
|
st.error(f"Error during prediction: {str(e)}") |
|
else: |
|
st.warning("Please upload an image and enter a prompt.") |
|
|
|
if __name__ == "__main__": |
|
main() |