Spaces:

nhatipoglu
/

demo-vit-v2

Sleeping

App Files Files Community

nhatipoglu commited on Sep 12, 2024

Commit

753c319

verified ·

1 Parent(s): 7bae4a0

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -10

app.py CHANGED Viewed

@@ -11,8 +11,7 @@ import re
 models = {
     "Qwen/Qwen2-VL-7B-Instruct": Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"),
-    "Qwen/Qwen2-VL-2B-Instruct": Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", torch_dtype="auto", device_map="auto"),
     "openai/clip-vit-base-patch32": CLIPModel.from_pretrained("openai/clip-vit-base-patch32"),
     "Salesforce/blip-image-captioning-base": BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 }
@@ -109,22 +108,18 @@ css = """
 with gr.Blocks(css=css) as demo:
     gr.Markdown(
     """
-    # Qwen2-VL Object Detection Demo
-    Use the Qwen2-VL models to detect objects in an image. The 7B variant seems to work much better.
-    **Usage**: Use the keyword "detect" and a description of the target (see examples below).
     """)
     with gr.Tab(label="Qwen2-VL Input"):
         with gr.Row():
             with gr.Column():
                 input_img = gr.Image(label="Input Image", type="pil")
-                model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2-VL-7B-Instruct")
-                #system_prompt = gr.Textbox(label="System Prompt", value=default_system_prompt)
-                text_input = gr.Textbox(label="User Prompt")
                 submit_btn = gr.Button(value="Submit")
             with gr.Column():
                 model_output_text = gr.Textbox(label="Model Output Text")
-                #parsed_boxes = gr.Textbox(label="Parsed Boxes")
-                #annotated_image = gr.Image(label="Annotated Image")
         gr.Examples(
             examples=[

 models = {
     "Qwen/Qwen2-VL-7B-Instruct": Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"),
+    "Qwen/Qwen2-VL-2B-Instruct": Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", torch_dtype="auto", device_map="auto"),
     "openai/clip-vit-base-patch32": CLIPModel.from_pretrained("openai/clip-vit-base-patch32"),
     "Salesforce/blip-image-captioning-base": BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 }
 with gr.Blocks(css=css) as demo:
     gr.Markdown(
     """
+    # Qwen2-VL Demo
     """)
     with gr.Tab(label="Qwen2-VL Input"):
         with gr.Row():
             with gr.Column():
                 input_img = gr.Image(label="Input Image", type="pil")
+                model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2-VL-2B-Instruct")
+                text_input = gr.Textbox(label="Prompt")
                 submit_btn = gr.Button(value="Submit")
             with gr.Column():
                 model_output_text = gr.Textbox(label="Model Output Text")
         gr.Examples(
             examples=[