Spaces:
Sleeping
Sleeping
nhatipoglu
commited on
Commit
•
753c319
1
Parent(s):
7bae4a0
Update app.py
Browse files
app.py
CHANGED
@@ -11,8 +11,7 @@ import re
|
|
11 |
|
12 |
models = {
|
13 |
"Qwen/Qwen2-VL-7B-Instruct": Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"),
|
14 |
-
"Qwen/Qwen2-VL-2B-Instruct": Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", torch_dtype="auto", device_map="auto"),
|
15 |
-
|
16 |
"openai/clip-vit-base-patch32": CLIPModel.from_pretrained("openai/clip-vit-base-patch32"),
|
17 |
"Salesforce/blip-image-captioning-base": BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
|
18 |
}
|
@@ -109,22 +108,18 @@ css = """
|
|
109 |
with gr.Blocks(css=css) as demo:
|
110 |
gr.Markdown(
|
111 |
"""
|
112 |
-
# Qwen2-VL
|
113 |
-
Use the Qwen2-VL models to detect objects in an image. The 7B variant seems to work much better.
|
114 |
-
**Usage**: Use the keyword "detect" and a description of the target (see examples below).
|
115 |
""")
|
116 |
with gr.Tab(label="Qwen2-VL Input"):
|
117 |
with gr.Row():
|
118 |
with gr.Column():
|
119 |
input_img = gr.Image(label="Input Image", type="pil")
|
120 |
-
model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2-VL-
|
121 |
-
|
122 |
-
text_input = gr.Textbox(label="User Prompt")
|
123 |
submit_btn = gr.Button(value="Submit")
|
124 |
with gr.Column():
|
125 |
model_output_text = gr.Textbox(label="Model Output Text")
|
126 |
-
|
127 |
-
#annotated_image = gr.Image(label="Annotated Image")
|
128 |
|
129 |
gr.Examples(
|
130 |
examples=[
|
|
|
11 |
|
12 |
models = {
|
13 |
"Qwen/Qwen2-VL-7B-Instruct": Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto"),
|
14 |
+
"Qwen/Qwen2-VL-2B-Instruct": Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct", torch_dtype="auto", device_map="auto"),
|
|
|
15 |
"openai/clip-vit-base-patch32": CLIPModel.from_pretrained("openai/clip-vit-base-patch32"),
|
16 |
"Salesforce/blip-image-captioning-base": BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
|
17 |
}
|
|
|
108 |
with gr.Blocks(css=css) as demo:
|
109 |
gr.Markdown(
|
110 |
"""
|
111 |
+
# Qwen2-VL Demo
|
|
|
|
|
112 |
""")
|
113 |
with gr.Tab(label="Qwen2-VL Input"):
|
114 |
with gr.Row():
|
115 |
with gr.Column():
|
116 |
input_img = gr.Image(label="Input Image", type="pil")
|
117 |
+
model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2-VL-2B-Instruct")
|
118 |
+
text_input = gr.Textbox(label="Prompt")
|
|
|
119 |
submit_btn = gr.Button(value="Submit")
|
120 |
with gr.Column():
|
121 |
model_output_text = gr.Textbox(label="Model Output Text")
|
122 |
+
|
|
|
123 |
|
124 |
gr.Examples(
|
125 |
examples=[
|