Tonic commited on
Commit
106a6dd
ยท
verified ยท
1 Parent(s): 01a426b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -39
app.py CHANGED
@@ -1,19 +1,21 @@
1
  import gradio as gr
2
-
3
  from transformers import LlavaNextForConditionalGeneration, LlavaNextProcessor
4
-
5
  from PIL import Image
6
-
7
  import requests
8
-
9
  import torch
10
-
11
  import spaces
12
 
13
- # Load the processor and model
14
 
15
- processor = LlavaNextProcessor.from_pretrained("tiiuae/falcon-11B-vlm", tokenizer_class='PreTrainedTokenizerFast')
 
 
 
 
 
 
 
16
 
 
17
  model = LlavaNextForConditionalGeneration.from_pretrained("tiiuae/falcon-11B-vlm", torch_dtype=torch.bfloat16).to('cuda:0')
18
 
19
 
@@ -21,45 +23,50 @@ model = LlavaNextForConditionalGeneration.from_pretrained("tiiuae/falcon-11B-vlm
21
  def generate_paragraph(image_url):
22
 
23
  cats_image = Image.open(requests.get(image_url, stream=True).raw)
24
-
25
  instruction = 'Write a long paragraph about this picture.'
26
-
27
-
28
-
29
  prompt = f"User:<image>\n{instruction} Falcon:"
30
-
31
  inputs = processor(prompt, images=cats_image, return_tensors="pt", padding=True).to('cuda:0')
32
-
33
-
34
-
35
  output = model.generate(**inputs, max_new_tokens=256)
36
-
37
  generated_captions = processor.decode(output[0], skip_special_tokens=True).strip()
38
 
39
-
40
-
41
  return generated_captions
42
 
43
-
44
-
45
- # Define the Gradio interface
46
-
47
- interface = gr.Interface(
48
-
49
- fn=generate_paragraph,
50
-
51
- inputs=gr.Textbox(label="Image URL"),
52
-
53
- outputs=gr.Textbox(label="Generated Paragraph"),
54
-
55
- title="Image to Paragraph Generation",
56
-
57
- description="Enter the URL of an image, and the model will generate a descriptive paragraph about the image."
58
-
59
- )
60
-
61
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  # Launch the Gradio interface
64
-
65
- interface.launch()
 
1
  import gradio as gr
 
2
  from transformers import LlavaNextForConditionalGeneration, LlavaNextProcessor
 
3
  from PIL import Image
 
4
  import requests
 
5
  import torch
 
6
  import spaces
7
 
 
8
 
9
+ title = """ # ๐Ÿ™‹๐Ÿปโ€โ™‚๏ธWelcome to Tonic's๐Ÿฆ…Falcon Vision๐Ÿ‘๏ธLanguage Model !
10
+ """
11
+
12
+ description = """
13
+ Falcon2-11B-vlm is an 11B parameters causal decoder-only model built by TII and trained on over 5,000B tokens of RefinedWeb enhanced with curated corpora. To bring vision capabilities, , we integrate the pretrained CLIP ViT-L/14 vision encoder with our Falcon2-11B chat-finetuned model and train with image-text data. For enhancing the VLM's perception of fine-grained details w.r.t small objects in images, we employ a dynamic encoding mechanism at high-resolution for image inputs.
14
+
15
+ Join us : ๐ŸŒŸTeamTonic๐ŸŒŸ is always making cool demos! Join our active builder's ๐Ÿ› ๏ธcommunity ๐Ÿ‘ป [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On ๐Ÿค—Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) Math ๐Ÿ” [introspector](https://huggingface.co/introspector) On ๐ŸŒGithub: [Tonic-AI](https://github.com/tonic-ai) & contribute to๐ŸŒŸ [MultiTonic](https://github.com/multitonic/)๐Ÿค—Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant ๐Ÿค—
16
+ """
17
 
18
+ processor = LlavaNextProcessor.from_pretrained("tiiuae/falcon-11B-vlm", tokenizer_class='PreTrainedTokenizerFast')
19
  model = LlavaNextForConditionalGeneration.from_pretrained("tiiuae/falcon-11B-vlm", torch_dtype=torch.bfloat16).to('cuda:0')
20
 
21
 
 
23
  def generate_paragraph(image_url):
24
 
25
  cats_image = Image.open(requests.get(image_url, stream=True).raw)
 
26
  instruction = 'Write a long paragraph about this picture.'
 
 
 
27
  prompt = f"User:<image>\n{instruction} Falcon:"
 
28
  inputs = processor(prompt, images=cats_image, return_tensors="pt", padding=True).to('cuda:0')
 
 
 
29
  output = model.generate(**inputs, max_new_tokens=256)
 
30
  generated_captions = processor.decode(output[0], skip_special_tokens=True).strip()
31
 
 
 
32
  return generated_captions
33
 
34
+ # Function to set the URL and generate the paragraph
35
+ def set_and_generate(url):
36
+ generated_paragraph = generate_paragraph(url)
37
+ return url, generated_paragraph
38
+
39
+ # Create the Gradio Blocks interface
40
+ with gr.Blocks() as demo:
41
+ gr.Markdown(title)
42
+ gr.Markdown(description)
43
+
44
+ with gr.Row():
45
+ with gr.Column():
46
+ image_url_input = gr.Textbox(label="Image URL")
47
+ generate_button = gr.Button("Generate Paragraph")
48
+
49
+ example_1 = gr.Button("Example 1")
50
+ example_2 = gr.Button("Example 2")
51
+ example_3 = gr.Button("Example 3")
52
+
53
+ with gr.Column():
54
+ generated_paragraph_output = gr.Textbox(label="Generated Paragraph")
55
+
56
+ generate_button.click(generate_paragraph, inputs=image_url_input, outputs=generated_paragraph_output)
57
+
58
+ example_1.click(
59
+ lambda: set_and_generate("https://example.com/image1.jpg"),
60
+ outputs=[image_url_input, generated_paragraph_output]
61
+ )
62
+ example_2.click(
63
+ lambda: set_and_generate("https://example.com/image2.jpg"),
64
+ outputs=[image_url_input, generated_paragraph_output]
65
+ )
66
+ example_3.click(
67
+ lambda: set_and_generate("https://example.com/image3.jpg"),
68
+ outputs=[image_url_input, generated_paragraph_output]
69
+ )
70
 
71
  # Launch the Gradio interface
72
+ demo.launch()