moondream-05 / app.py
andito's picture
andito HF staff
changed the examples
d9061b4
raw
history blame
2.36 kB
import gradio as gr
from threading import Thread
from PIL import Image
import moondream as md
from huggingface_hub import hf_hub_download
# Download model at runtime
model_path = hf_hub_download(
repo_id="andito/moondream05",
filename="moondream-0_5b-int8.mf",
)
model = md.vl(model=model_path)
def model_inference(input_dict, history):
# Extract image from message if present
if input_dict.get("files"):
image_path = input_dict["files"][0]
if isinstance(image_path, dict) and "path" in image_path:
image_path = image_path["path"]
image = Image.open(image_path)
encoded_image = model.encode_image(image)
# If there's a question, use query
text = input_dict.get("text", "")
if text not in ["", "Caption"]:
response = model.query(encoded_image, text)["answer"]
# Otherwise generate a caption
else:
response = model.caption(encoded_image)["caption"]
return response
else:
return "Please provide an image to analyze."
examples=[
[{"text": "Caption", "files": ["example_images/demo-1.jpg"]}, []],
[{"text": "Caption", "files": ["example_images/demo-2.jpg"]}, []],
[{"text": "What art era do this artpiece belong to?", "files": ["example_images/rococo.jpg"]}, []],
[{"text": "Caption", "files": ["example_images/rococo.jpg"]}, []],
[{"text": "I'm planning a visit to this temple, give me travel tips.", "files": ["example_images/examples_wat_arun.jpg"]}, []],
[{"text": "Caption", "files": ["example_images/examples_wat_arun.jpg"]}, []],
[{"text": "Caption", "files": ["example_images/aaron.jpeg"]}, []],
]
demo = gr.ChatInterface(fn=model_inference, title="Moondream 0.5B: The World's Smallest Vision-Language Model",
description="Play with [Moondream 0.5B](https://huggingface.co/vikhyatk/moondream2) in this demo. To get started, upload an image and text or try one of the examples.",
examples=examples,
textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="single"), stop_btn="Stop Generation", multimodal=True,
additional_inputs=[], cache_examples=False)
demo.launch(debug=True)