SmolVLM-256M-Demo

Running on Zero

App Files Files Community

andito HF staff commited on 3 days ago

Commit

1f3cc30

1 Parent(s): f3b9c11

changed examples

Browse files

Files changed (9) hide show

.gitattributes +1 -0
app.py +10 -8
example_images/campeones.jpg +0 -0
example_images/document.jpg +0 -0
example_images/docvqa_example.png +0 -3
example_images/dogs.jpg +0 -0
example_images/examples_wat_arun.jpg +0 -0
example_images/math.jpg +0 -0
example_images/newyork.jpg +0 -0

.gitattributes CHANGED Viewed

@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 example_images/docvqa_example.png filter=lfs diff=lfs merge=lfs -text
 example_images/gaulois.png filter=lfs diff=lfs merge=lfs -text
 example_images/rococo.jpg filter=lfs diff=lfs merge=lfs -text

 example_images/docvqa_example.png filter=lfs diff=lfs merge=lfs -text
 example_images/gaulois.png filter=lfs diff=lfs merge=lfs -text
 example_images/rococo.jpg filter=lfs diff=lfs merge=lfs -text
+*.DS_Store filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -56,7 +56,6 @@ def model_inference(
         "attention_mask": inputs.attention_mask,
         "num_return_sequences": 1,
         "no_repeat_ngram_size": 2,
-        "temperature": 0.7,
         "max_new_tokens": 500,
         "min_new_tokens": 10,
     }
@@ -82,14 +81,17 @@ def model_inference(
 examples=[
-              [{"text": "What art era do these artpieces belong to?", "files": ["example_images/rococo.jpg", "example_images/rococo_1.jpg"]}, "Greedy", 0.4, 512, 1.2, 0.8],
-              [{"text": "I'm planning a visit to this temple, give me travel tips.", "files": ["example_images/examples_wat_arun.jpg"]},  "Greedy", 0.4, 512, 1.2, 0.8],
-              [{"text":  "What is the due date and the invoice date?", "files": ["example_images/examples_invoice.png"]}, "Greedy", 0.4, 512, 1.2, 0.8],
-              [{"text": "What is this UI about?", "files": ["example_images/s2w_example.png"]},   "Greedy", 0.4, 512, 1.2, 0.8],
-              [{"text":  "Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]},  "Greedy", 0.4, 512, 1.2, 0.8],
       ]
-demo = gr.ChatInterface(fn=model_inference, title="SmolVLM: Small yet Mighty 💫",
-                description="Play with [HuggingFaceTB/SmolVLM-Instruct](https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct) in this demo. To get started, upload an image and text or try one of the examples. This checkpoint works best with single turn conversations, so clear the conversation after a single turn.",
                 examples=examples,
                 textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"), stop_btn="Stop Generation", multimodal=True,
                 cache_examples=False

         "attention_mask": inputs.attention_mask,
         "num_return_sequences": 1,
         "no_repeat_ngram_size": 2,
         "max_new_tokens": 500,
         "min_new_tokens": 10,
     }
 examples=[
+              [{"text": "Describe this image.", "files": ["example_images/newyork.jpg"]}],
+              [{"text": "Describe this image.", "files": ["example_images/dogs.jpg"]}],
+              [{"text": "Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]}],
+              [{"text": "What art era do these artpieces belong to?", "files": ["example_images/rococo.jpg", "example_images/rococo_1.jpg"]}],
+              [{"text": "Describe this image.", "files": ["example_images/campeones.jpg"]}],
+              [{"text": "What does this say?", "files": ["example_images/math.jpg"]}],
+              [{"text": "What is the date in this document?", "files": ["example_images/document.jpg"]}],
+              [{"text": "What is this UI about?", "files": ["example_images/s2w_example.png"]}],
       ]
+demo = gr.ChatInterface(fn=model_inference, title="SmolVLM-250M: The Smollest VLM ever 💫",
+                description="Play with [HuggingFaceTB/SmolVLM-Instruct-250M](https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct-250M) in this demo. To get started, upload an image and text or try one of the examples. This demo doesn't use history for the chat, so every chat you start is a new conversation.",
                 examples=examples,
                 textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="multiple"), stop_btn="Stop Generation", multimodal=True,
                 cache_examples=False

example_images/campeones.jpg ADDED Viewed

example_images/document.jpg ADDED Viewed

example_images/docvqa_example.png DELETED Viewed

Git LFS Details

SHA256: b09feb5f7fbd60c089562ee1e30292a9902861af576f8824b2315e88653e2cf5
Pointer size: 132 Bytes
Size of remote file: 1.25 MB

example_images/dogs.jpg ADDED Viewed

example_images/examples_wat_arun.jpg DELETED Viewed

Binary file (786 kB)

example_images/math.jpg ADDED Viewed

example_images/newyork.jpg ADDED Viewed