from transformers import BlipForConditionalGeneration from transformers import AutoProcessor from PIL import Image import requests import gradio as gr url = "http://images.cocodataset.org/val2017/000000039769.jpg" image = Image.open(requests.get(url, stream=True).raw).convert("RGB") processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") image = processor(image, return_tensors="pt") generated_ids = model.generate(**image) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip() print(generated_text) def launch(input): url = input image = Image.open(requests.get(url, stream=True).raw).convert("RGB") processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") image = processor(image, return_tensors="pt") generated_ids = model.generate(**image) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip() return generated_text iface = gr.Interface(fn=launch, inputs="text", outputs="text") iface.launch()