Spaces:
Running
Running
Niki Zhang
commited on
Update app.py
Browse filesUpdate prompt
Using selected objects as image input
app.py
CHANGED
@@ -173,7 +173,7 @@ def chat_input_callback(*args):
|
|
173 |
|
174 |
|
175 |
|
176 |
-
def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None):
|
177 |
if isinstance(image_input, dict): # if upload from sketcher_input, input contains image and mask
|
178 |
image_input, mask = image_input['image'], image_input['mask']
|
179 |
|
@@ -207,7 +207,7 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
|
|
207 |
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
208 |
name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
|
209 |
artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
|
210 |
-
paragraph = get_image_gpt(openai_api_key, new_image_path,"
|
211 |
|
212 |
state = [(None, 'Received new image, resize it to width {} and height {}: '.format(image_input.size[0], image_input.size[1]))]
|
213 |
|
@@ -272,15 +272,16 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
272 |
|
273 |
generated_caption = text
|
274 |
print(generated_caption)
|
|
|
275 |
|
276 |
-
yield state, state, click_state, image_input, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state
|
277 |
|
278 |
|
279 |
|
280 |
|
281 |
def submit_caption(image_input, state, generated_caption, text_refiner, visual_chatgpt, enable_wiki, length, sentiment, factuality, language,
|
282 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
283 |
-
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key):
|
284 |
print("state",state)
|
285 |
|
286 |
click_index = click_index_state
|
@@ -305,10 +306,10 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
305 |
}
|
306 |
|
307 |
prompt_list = [
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
|
313 |
|
314 |
if mapped_value != -1:
|
@@ -319,14 +320,13 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
319 |
sentiment=controls['sentiment'],
|
320 |
language=controls['language']
|
321 |
)
|
322 |
-
prompt+="You should generate a descriptive, coherent and human-like paragraph"
|
323 |
|
324 |
else:
|
325 |
print("error prompting")
|
326 |
prompt = "Invalid focus type."
|
327 |
|
328 |
if controls['factuality'] == "Imagination":
|
329 |
-
prompt += "
|
330 |
|
331 |
print("Prompt:", prompt)
|
332 |
print("click",click_index)
|
@@ -343,7 +343,9 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
343 |
|
344 |
|
345 |
if not args.disable_gpt and text_refiner:
|
346 |
-
|
|
|
|
|
347 |
state = state + [(None, f"Wiki: {paragraph}")]
|
348 |
state = state + [(None, f"Focus_Caption: {focus_info}")]
|
349 |
print("new_cap",focus_info)
|
@@ -601,6 +603,7 @@ def create_ui():
|
|
601 |
input_mask_state = gr.State(np.zeros((1, 1)))
|
602 |
input_points_state = gr.State([])
|
603 |
input_labels_state = gr.State([])
|
|
|
604 |
|
605 |
|
606 |
|
@@ -874,7 +877,7 @@ def create_ui():
|
|
874 |
image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
|
875 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state
|
876 |
],
|
877 |
-
outputs=[chatbot, state, click_state, image_input, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state],
|
878 |
show_progress=False, queue=True
|
879 |
)
|
880 |
|
@@ -884,7 +887,7 @@ def create_ui():
|
|
884 |
inputs=[
|
885 |
image_input, state, generated_caption, text_refiner, visual_chatgpt, enable_wiki, length, sentiment, factuality, language,
|
886 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
887 |
-
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key
|
888 |
],
|
889 |
outputs=[
|
890 |
chatbot, state, image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,
|
|
|
173 |
|
174 |
|
175 |
|
176 |
+
def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None,language="English"):
|
177 |
if isinstance(image_input, dict): # if upload from sketcher_input, input contains image and mask
|
178 |
image_input, mask = image_input['image'], image_input['mask']
|
179 |
|
|
|
207 |
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
208 |
name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
|
209 |
artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
|
210 |
+
paragraph = get_image_gpt(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
|
211 |
|
212 |
state = [(None, 'Received new image, resize it to width {} and height {}: '.format(image_input.size[0], image_input.size[1]))]
|
213 |
|
|
|
272 |
|
273 |
generated_caption = text
|
274 |
print(generated_caption)
|
275 |
+
print("new crop save",new_crop_save_path)
|
276 |
|
277 |
+
yield state, state, click_state, image_input, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path
|
278 |
|
279 |
|
280 |
|
281 |
|
282 |
def submit_caption(image_input, state, generated_caption, text_refiner, visual_chatgpt, enable_wiki, length, sentiment, factuality, language,
|
283 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
284 |
+
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key,new_crop_save_path):
|
285 |
print("state",state)
|
286 |
|
287 |
click_index = click_index_state
|
|
|
306 |
}
|
307 |
|
308 |
prompt_list = [
|
309 |
+
'Wiki_caption: {Wiki_caption}, you have to generate a caption according to the image and wiki caption. Around {length} words of {sentiment} sentiment in {language}.',
|
310 |
+
'Wiki_caption: {Wiki_caption}, you have to select sentences from wiki caption that describe the surrounding objects that may be associated with the picture object. Around {length} words of {sentiment} sentiment in {language}.',
|
311 |
+
'Wiki_caption: {Wiki_caption}. You have to choose sentences from the wiki caption that describe unrelated objects to the image. Around {length} words of {sentiment} sentiment in {language}.'
|
312 |
+
]
|
313 |
|
314 |
|
315 |
if mapped_value != -1:
|
|
|
320 |
sentiment=controls['sentiment'],
|
321 |
language=controls['language']
|
322 |
)
|
|
|
323 |
|
324 |
else:
|
325 |
print("error prompting")
|
326 |
prompt = "Invalid focus type."
|
327 |
|
328 |
if controls['factuality'] == "Imagination":
|
329 |
+
prompt += "Assuming that I am someone who has viewed a lot of art and has a lot of experience viewing art. Explain artistic features (composition, color, style, or use of light) and discuss the symbolism of the content and its influence on later artistic movements"
|
330 |
|
331 |
print("Prompt:", prompt)
|
332 |
print("click",click_index)
|
|
|
343 |
|
344 |
|
345 |
if not args.disable_gpt and text_refiner:
|
346 |
+
print("new crop save",new_crop_save_path)
|
347 |
+
focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
|
348 |
+
|
349 |
state = state + [(None, f"Wiki: {paragraph}")]
|
350 |
state = state + [(None, f"Focus_Caption: {focus_info}")]
|
351 |
print("new_cap",focus_info)
|
|
|
603 |
input_mask_state = gr.State(np.zeros((1, 1)))
|
604 |
input_points_state = gr.State([])
|
605 |
input_labels_state = gr.State([])
|
606 |
+
new_crop_save_path = gr.State(None)
|
607 |
|
608 |
|
609 |
|
|
|
877 |
image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
|
878 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state
|
879 |
],
|
880 |
+
outputs=[chatbot, state, click_state, image_input, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path],
|
881 |
show_progress=False, queue=True
|
882 |
)
|
883 |
|
|
|
887 |
inputs=[
|
888 |
image_input, state, generated_caption, text_refiner, visual_chatgpt, enable_wiki, length, sentiment, factuality, language,
|
889 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
890 |
+
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key,new_crop_save_path
|
891 |
],
|
892 |
outputs=[
|
893 |
chatbot, state, image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,
|