Spaces:
Build error
Build error
from PIL import Image as PILImage | |
import torch | |
from transformers import AutoProcessor, AutoModelForCausalLM | |
import streamlit as st | |
# Define your custom pipeline function | |
def custom_image_to_text_pipeline(image, processor, model, device): | |
# Preprocess the image | |
inputs = processor(images=image, return_tensors="pt") | |
inputs = {k: v.to(device) for k, v in inputs.items()} | |
# Generate predictions | |
output = model.generate(**inputs) | |
# Decode the output to text | |
decoded_output = processor.decode(output[0], skip_special_tokens=True) | |
return decoded_output | |
# Load your model and processor | |
device = torch.device("cuda") | |
processor = AutoProcessor.from_pretrained("HuggingFaceM4/VLM_WebSight_finetuned") | |
model = AutoModelForCausalLM.from_pretrained( | |
"HuggingFaceM4/VLM_WebSight_finetuned", | |
trust_remote_code=True, | |
torch_dtype=torch.bfloat16, | |
).to(device) | |
# Streamlit UI | |
st.title("Image to Code Converter") | |
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png"]) | |
if uploaded_file is not None: | |
# Display the uploaded image | |
st.image(uploaded_file, caption='Uploaded Image.', use_column_width=True) | |
st.write("") | |
# Convert the file to an image and process it | |
with PILImage.open(uploaded_file) as image: | |
st.write("Converting image to code...") | |
code_result = custom_image_to_text_pipeline(image, processor, model, device) | |
# Display the code | |
st.code(code_result) | |