Spaces:
Runtime error
Runtime error
File size: 4,451 Bytes
05e7387 106a6dd 7732f66 106a6dd 05e7387 106a6dd 05e7387 106a6dd 37204e2 c514b03 7732f66 106a6dd 37204e2 106a6dd 7732f66 37204e2 106a6dd 7732f66 106a6dd 7732f66 106a6dd 7732f66 c514b03 37204e2 8e769ae 37204e2 106a6dd 37204e2 c514b03 106a6dd 37204e2 106a6dd 37204e2 106a6dd 37204e2 bdc2a1b 106a6dd 37204e2 106a6dd 7732f66 106a6dd 37204e2 bdc2a1b 106a6dd 37204e2 106a6dd 7732f66 106a6dd 37204e2 bdc2a1b 106a6dd 37204e2 106a6dd 05e7387 c514b03 37204e2 c514b03 37204e2 c514b03 37204e2 c514b03 37204e2 7732f66 c514b03 bcd72e1 05e7387 c514b03 106a6dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import gradio as gr
from transformers import LlavaNextForConditionalGeneration, LlavaNextProcessor
from PIL import Image
import requests
import torch
import spaces
title = """ # ๐๐ปโโ๏ธWelcome to Tonic's๐ฆ
Falcon Vision๐๏ธLanguage Model !
"""
description = """
Falcon2-11B-vlm is an 11B parameters causal decoder-only model built by TII and trained on over 5,000B tokens of RefinedWeb enhanced with curated corpora. To bring vision capabilities, , we integrate the pretrained CLIP ViT-L/14 vision encoder with our Falcon2-11B chat-finetuned model and train with image-text data. For enhancing the VLM's perception of fine-grained details w.r.t small objects in images, we employ a dynamic encoding mechanism at high-resolution for image inputs.
### Join us :
๐TeamTonic๐ is always making cool demos! Join our active builder's ๐ ๏ธcommunity ๐ป [data:image/s3,"s3://crabby-images/52256/522564d4ced539e62a02ffa0e50bebe4602e8a65" alt="Join us on Discord"](https://discord.gg/GWpVpekp) On ๐คHuggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) Math ๐ [introspector](https://huggingface.co/introspector) On ๐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to๐ [MultiTonic](https://github.com/multitonic/)๐คBig thanks to Yuvi Sharma and all the folks at huggingface for the community grant ๐ค
"""
processor = LlavaNextProcessor.from_pretrained("tiiuae/falcon-11B-vlm", tokenizer_class='PreTrainedTokenizerFast')
model = LlavaNextForConditionalGeneration.from_pretrained("tiiuae/falcon-11B-vlm", torch_dtype=torch.bfloat16).to('cuda:0')
@spaces.GPU
def generate_paragraph(image_url):
cats_image = Image.open(requests.get(image_url, stream=True).raw)
instruction = 'Write a long paragraph about this picture.'
prompt = f"User:<image>\n{instruction} Falcon:"
inputs = processor(prompt, images=cats_image, return_tensors="pt", padding=True).to('cuda:0')
output = model.generate(**inputs, max_new_tokens=256)
generated_captions = processor.decode(output[0], skip_special_tokens=True).strip()
return generated_captions
def set_and_generate(url):
generated_paragraph = generate_paragraph(url)
return url, generated_paragraph
# Create the Gradio Blocks interface
with gr.Blocks(css=".thumbnail { width: 150px; height: 150px; object-fit: cover; }") as demo:
gr.Markdown(title)
gr.Markdown(description)
with gr.Row():
with gr.Column():
image_url_input = gr.Textbox(label="Image URL")
generate_button = gr.Button("Generate Paragraph")
# Image thumbnails acting as buttons
example_1 = gr.Button("Types of Falcons", elem_id="example_1")
example_2 = gr.Button("Camel Racing - Saudi Arabia", elem_id="example_2")
example_3 = gr.Button("Urban Scene - India", elem_id="example_3")
with gr.Column():
generated_paragraph_output = gr.Textbox(label="๐ฆ
Falcon Vision๐๏ธ")
# Wire click events
generate_button.click(generate_paragraph, inputs=image_url_input, outputs=generated_paragraph_output)
example_1.click(
lambda: set_and_generate("https://www.animalspot.net/wp-content/uploads/2020/01/Types-of-Falcons.jpg"),
outputs=[image_url_input, generated_paragraph_output]
)
example_2.click(
lambda: set_and_generate("https://www.leaders-mena.com/leaders/uploads/2023/01/The-Traditional-Camel-Racing-In-Saudi-Arabia-Unique-Sport-Activity-1024x576.jpg"),
outputs=[image_url_input, generated_paragraph_output]
)
example_3.click(
lambda: set_and_generate("http://embed.robertharding.com/embed/1161-4342.jpg"),
outputs=[image_url_input, generated_paragraph_output]
)
# Configure the CSS for thumbnails
demo.css += """
#example_1 {
background: url("https://www.animalspot.net/wp-content/uploads/2020/01/Types-of-Falcons.jpg") no-repeat center center;
background-size: cover;
}
#example_2 {
background: url("https://www.leaders-mena.com/leaders/uploads/2023/01/The-Traditional-Camel-Racing-In-Saudi-Arabia-Unique-Sport-Activity-1024x576.jpg") no-repeat center center;
background-size: cover;
}
#example_3 {
background: url("http://embed.robertharding.com/embed/1161-4342.jpg") no-repeat center center;
background-size: cover;
}
"""
# Launch the Gradio interface
demo.launch() |