moondream-05

Sleeping

App Files Files Community

moondream-05 / app.py

andito HF staff

adapt to moondream

ecd7421 2 months ago

raw

history blame

2.77 kB

	import gradio as gr
	from threading import Thread
	from PIL import Image
	import spaces
	import moondream as md
	#import subprocess
	#subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)


	model = md.vl(model="moondream-0_5b-int8.mf")

	def model_inference(input_dict, history):
	# Extract image from message if present
	if input_dict.get("files"):
	image_path = input_dict["files"][0]
	if isinstance(image_path, dict) and "path" in image_path:
	image_path = image_path["path"]
	image = Image.open(image_path)
	encoded_image = model.encode_image(image)

	# If there's a question, use query
	text = input_dict.get("text", "")
	if text not in ["", "Caption"]:
	response = model.query(encoded_image, text)["answer"]
	# Otherwise generate a caption
	else:
	response = model.caption(encoded_image)["caption"]

	return response
	else:
	return "Please provide an image to analyze."



	examples=[
	[{"text": "What art era do this artpiece belong to?", "files": ["example_images/rococo.jpg"]}, []],
	[{"text": "Caption", "files": ["example_images/rococo.jpg"]}, []],
	[{"text": "I'm planning a visit to this temple, give me travel tips.", "files": ["example_images/examples_wat_arun.jpg"]}, []],
	[{"text": "Caption", "files": ["example_images/examples_wat_arun.jpg"]}, []],
	[{"text": "What is the due date and the invoice date?", "files": ["example_images/examples_invoice.png"]}, []],
	[{"text": "Caption", "files": ["example_images/examples_invoice.png"]}, []],
	[{"text": "What is this UI about?", "files": ["example_images/s2w_example.png"]}, []],
	[{"text": "Caption", "files": ["example_images/s2w_example.png"]}, []],
	[{"text": "Where do the severe droughts happen according to this diagram?", "files": ["example_images/examples_weather_events.png"]}, []],
	[{"text": "Caption", "files": ["example_images/examples_weather_events.png"]}, []],
	]

	demo = gr.ChatInterface(fn=model_inference, title="Moondream 0.5B: The World's Smallest Vision-Language Model",
	description="Play with [Moondream 0.5B](https://huggingface.co/vikhyatk/moondream2) in this demo. To get started, upload an image and text or try one of the examples.",
	examples=examples,
	textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="single"), stop_btn="Stop Generation", multimodal=True,
	additional_inputs=[], cache_examples=False)

	demo.launch(debug=True)