SmolVLM-256M-Demo

Running on Zero

App Files Files Community

andito HF staff commited on 17 days ago

Commit

fe4fa5b

verified ·

1 Parent(s): 0b32b82

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -64

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import gradio as gr
 from transformers import AutoProcessor, AutoModelForVision2Seq, TextIteratorStreamer
 from threading import Thread
 import re
 import time
-from PIL import Image
 import torch
 import spaces
 #import subprocess
@@ -18,15 +18,14 @@ model = AutoModelForVision2Seq.from_pretrained("HuggingFaceTB/SmolVLM-Instruct-2
 @spaces.GPU
 def model_inference(
-    input_dict, history, decoding_strategy, temperature, max_new_tokens,
-    repetition_penalty, top_p
 ):
     text = input_dict["text"]
     print(input_dict["files"])
     if len(input_dict["files"]) > 1:
-      images = [Image.open(image).convert("RGB") for image in input_dict["files"]]
     elif len(input_dict["files"]) == 1:
-      images = [Image.open(input_dict["files"][0]).convert("RGB")]
     else:
       images = []
@@ -52,26 +51,19 @@ def model_inference(
     inputs = processor(text=prompt, images=[images], return_tensors="pt")
     inputs = {k: v.to("cuda") for k, v in inputs.items()}
     generation_args = {
-        "max_new_tokens": max_new_tokens,
-        "repetition_penalty": repetition_penalty,
     }
-    assert decoding_strategy in [
-        "Greedy",
-        "Top P Sampling",
-    ]
-    if decoding_strategy == "Greedy":
-        generation_args["do_sample"] = False
-    elif decoding_strategy == "Top P Sampling":
-        generation_args["temperature"] = temperature
-        generation_args["do_sample"] = True
-        generation_args["top_p"] = top_p
-    generation_args.update(inputs)
     # Generate
-    streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens= True)
-    generation_args = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
     generated_text = ""
     thread = Thread(target=model.generate, kwargs=generation_args)
@@ -99,48 +91,8 @@ examples=[
 demo = gr.ChatInterface(fn=model_inference, title="SmolVLM: Small yet Mighty 💫",
                 description="Play with [HuggingFaceTB/SmolVLM-Instruct](https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct) in this demo. To get started, upload an image and text or try one of the examples. This checkpoint works best with single turn conversations, so clear the conversation after a single turn.",
                 examples=examples,
-                textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"), stop_btn="Stop Generation", multimodal=True,
-                        additional_inputs=[gr.Radio(["Top P Sampling",
-              "Greedy"],
-          value="Greedy",
-          label="Decoding strategy",
-          #interactive=True,
-          info="Higher values is equivalent to sampling more low-probability tokens.",
-      ), gr.Slider(
-          minimum=0.0,
-          maximum=5.0,
-          value=0.4,
-          step=0.1,
-          interactive=True,
-          label="Sampling temperature",
-          info="Higher values will produce more diverse outputs.",
-      ),
-                                            gr.Slider(
-          minimum=8,
-          maximum=1024,
-          value=512,
-          step=1,
-          interactive=True,
-          label="Maximum number of new tokens to generate",
-      ), gr.Slider(
-          minimum=0.01,
-          maximum=5.0,
-          value=1.2,
-          step=0.01,
-          interactive=True,
-          label="Repetition penalty",
-          info="1.0 is equivalent to no penalty",
-      ),
-         gr.Slider(
-          minimum=0.01,
-          maximum=0.99,
-          value=0.8,
-          step=0.01,
-          interactive=True,
-          label="Top P",
-          info="Higher values is equivalent to sampling more low-probability tokens.",
-      )],cache_examples=False
                 )

 import gradio as gr
 from transformers import AutoProcessor, AutoModelForVision2Seq, TextIteratorStreamer
+from transformers.image_utils import load_image
 from threading import Thread
 import re
 import time
 import torch
 import spaces
 #import subprocess
 @spaces.GPU
 def model_inference(
+    input_dict, history
 ):
     text = input_dict["text"]
     print(input_dict["files"])
     if len(input_dict["files"]) > 1:
+      images = [load_image(image) for image in input_dict["files"]]
     elif len(input_dict["files"]) == 1:
+      images = [load_image(input_dict["files"][0])]
     else:
       images = []
     inputs = processor(text=prompt, images=[images], return_tensors="pt")
     inputs = {k: v.to("cuda") for k, v in inputs.items()}
     generation_args = {
+        "input_ids": inputs.input_ids,
+        "pixel_values": inputs.pixel_values,
+        "attention_mask": inputs.attention_mask,
+        "num_return_sequences": 1,
+        "no_repeat_ngram_size": 2,
+        "temperature": 0.7,
+        "max_new_tokens": 500,
+        "min_new_tokens": 10,
     }
     # Generate
+    streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
+    generation_args = dict(inputs, streamer=streamer, max_new_tokens=500)
     generated_text = ""
     thread = Thread(target=model.generate, kwargs=generation_args)
 demo = gr.ChatInterface(fn=model_inference, title="SmolVLM: Small yet Mighty 💫",
                 description="Play with [HuggingFaceTB/SmolVLM-Instruct](https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct) in this demo. To get started, upload an image and text or try one of the examples. This checkpoint works best with single turn conversations, so clear the conversation after a single turn.",
                 examples=examples,
+                textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"), stop_btn="Stop Generation", multimodal=True,
+                        ],cache_examples=False
                 )