Spaces:
Running
Running
Niki Zhang
commited on
Update app.py
Browse filesFixed the issue of overlapping captions
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import os
|
2 |
import base64
|
3 |
import json
|
@@ -26,7 +27,7 @@ import tts
|
|
26 |
###############################################################################
|
27 |
|
28 |
|
29 |
-
|
30 |
|
31 |
import os
|
32 |
import imageio
|
@@ -280,7 +281,7 @@ def make3d(images):
|
|
280 |
|
281 |
|
282 |
gpt_state = 0
|
283 |
-
|
284 |
article = """
|
285 |
<div style='margin:20px auto;'>
|
286 |
<p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
|
@@ -532,7 +533,8 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
532 |
print(generated_caption)
|
533 |
print("new crop save",new_crop_save_path)
|
534 |
|
535 |
-
yield state, state, click_state, image_input_nobackground, image_input_withbackground, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path
|
|
|
536 |
|
537 |
|
538 |
|
@@ -541,11 +543,27 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
541 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
542 |
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key,new_crop_save_path):
|
543 |
print("state",state)
|
544 |
-
|
|
|
545 |
click_index = click_index_state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
input_mask = input_mask_state
|
547 |
input_points = input_points_state
|
548 |
input_labels = input_labels_state
|
|
|
|
|
|
|
549 |
focus_map = {
|
550 |
"CFV-D":0,
|
551 |
"CFV-DA":1,
|
@@ -604,10 +622,13 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
604 |
if not args.disable_gpt and text_refiner:
|
605 |
print("new crop save",new_crop_save_path)
|
606 |
focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
|
|
|
|
|
607 |
|
608 |
# state = state + [(None, f"Wiki: {paragraph}")]
|
609 |
state = state + [(None, f"Focus_Caption: {focus_info}")]
|
610 |
print("new_cap",focus_info)
|
|
|
611 |
refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
612 |
input_points=input_points, input_labels=input_labels)
|
613 |
try:
|
@@ -774,7 +795,7 @@ def export_chat_log(chat_state):
|
|
774 |
return None
|
775 |
chat_log = "\n".join(f"{entry[0]}\n{entry[1]}" for entry in chat_state if entry)
|
776 |
print("export log...")
|
777 |
-
print("chat_log",chat_log)
|
778 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
|
779 |
temp_file.write(chat_log.encode('utf-8'))
|
780 |
temp_file_path = temp_file.name
|
@@ -881,6 +902,7 @@ def create_ui():
|
|
881 |
input_points_state = gr.State([])
|
882 |
input_labels_state = gr.State([])
|
883 |
new_crop_save_path = gr.State(None)
|
|
|
884 |
|
885 |
|
886 |
|
@@ -1028,6 +1050,7 @@ def create_ui():
|
|
1028 |
submit_tts = gr.Button(value="Submit", interactive=True)
|
1029 |
clear_tts = gr.Button(value="Clear", interactive=True)
|
1030 |
|
|
|
1031 |
|
1032 |
###############################################################################
|
1033 |
# this part is for 3d generate.
|
@@ -1276,9 +1299,9 @@ def create_ui():
|
|
1276 |
inputs=[
|
1277 |
origin_image, point_prompt, click_mode, enable_wiki, language, sentiment, factuality, length,
|
1278 |
image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
|
1279 |
-
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state
|
1280 |
],
|
1281 |
-
outputs=[chatbot, state, click_state, image_input, input_image, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path],
|
1282 |
show_progress=False, queue=True
|
1283 |
)
|
1284 |
|
@@ -1297,6 +1320,15 @@ def create_ui():
|
|
1297 |
show_progress=True,
|
1298 |
queue=True
|
1299 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1300 |
|
1301 |
|
1302 |
|
|
|
1 |
+
from math import inf
|
2 |
import os
|
3 |
import base64
|
4 |
import json
|
|
|
27 |
###############################################################################
|
28 |
|
29 |
|
30 |
+
import spaces
|
31 |
|
32 |
import os
|
33 |
import imageio
|
|
|
281 |
|
282 |
|
283 |
gpt_state = 0
|
284 |
+
pre_click_index=(inf, inf)
|
285 |
article = """
|
286 |
<div style='margin:20px auto;'>
|
287 |
<p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
|
|
|
533 |
print(generated_caption)
|
534 |
print("new crop save",new_crop_save_path)
|
535 |
|
536 |
+
yield state, state, click_state, image_input_nobackground, image_input_withbackground, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
|
537 |
+
|
538 |
|
539 |
|
540 |
|
|
|
543 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
544 |
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key,new_crop_save_path):
|
545 |
print("state",state)
|
546 |
+
|
547 |
+
global pre_click_index
|
548 |
click_index = click_index_state
|
549 |
+
|
550 |
+
# if pre_click_index==click_index:
|
551 |
+
# click_index = (click_index[0] - 1, click_index[1] - 1)
|
552 |
+
# pre_click_index = click_index
|
553 |
+
# else:
|
554 |
+
# pre_click_index = click_index
|
555 |
+
print("click_index",click_index)
|
556 |
+
print("pre_click_index",pre_click_index)
|
557 |
+
print("input_points_state",input_points_state)
|
558 |
+
print("input_labels_state",input_labels_state)
|
559 |
+
|
560 |
+
|
561 |
input_mask = input_mask_state
|
562 |
input_points = input_points_state
|
563 |
input_labels = input_labels_state
|
564 |
+
|
565 |
+
|
566 |
+
|
567 |
focus_map = {
|
568 |
"CFV-D":0,
|
569 |
"CFV-DA":1,
|
|
|
622 |
if not args.disable_gpt and text_refiner:
|
623 |
print("new crop save",new_crop_save_path)
|
624 |
focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
|
625 |
+
if focus_info.startswith('"') and focus_info.endswith('"'):
|
626 |
+
focus_info=focus_info[1:-1]
|
627 |
|
628 |
# state = state + [(None, f"Wiki: {paragraph}")]
|
629 |
state = state + [(None, f"Focus_Caption: {focus_info}")]
|
630 |
print("new_cap",focus_info)
|
631 |
+
|
632 |
refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
633 |
input_points=input_points, input_labels=input_labels)
|
634 |
try:
|
|
|
795 |
return None
|
796 |
chat_log = "\n".join(f"{entry[0]}\n{entry[1]}" for entry in chat_state if entry)
|
797 |
print("export log...")
|
798 |
+
print("chat_log", chat_log)
|
799 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
|
800 |
temp_file.write(chat_log.encode('utf-8'))
|
801 |
temp_file_path = temp_file.name
|
|
|
902 |
input_points_state = gr.State([])
|
903 |
input_labels_state = gr.State([])
|
904 |
new_crop_save_path = gr.State(None)
|
905 |
+
image_input_nobackground = gr.State(None)
|
906 |
|
907 |
|
908 |
|
|
|
1050 |
submit_tts = gr.Button(value="Submit", interactive=True)
|
1051 |
clear_tts = gr.Button(value="Clear", interactive=True)
|
1052 |
|
1053 |
+
|
1054 |
|
1055 |
###############################################################################
|
1056 |
# this part is for 3d generate.
|
|
|
1299 |
inputs=[
|
1300 |
origin_image, point_prompt, click_mode, enable_wiki, language, sentiment, factuality, length,
|
1301 |
image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
|
1302 |
+
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
|
1303 |
],
|
1304 |
+
outputs=[chatbot, state, click_state, image_input, input_image, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground],
|
1305 |
show_progress=False, queue=True
|
1306 |
)
|
1307 |
|
|
|
1320 |
show_progress=True,
|
1321 |
queue=True
|
1322 |
)
|
1323 |
+
|
1324 |
+
|
1325 |
+
focus_type.change(
|
1326 |
+
lambda x: ([[], [], []], x),
|
1327 |
+
[image_input_nobackground],
|
1328 |
+
[click_state, image_input],
|
1329 |
+
queue=False,
|
1330 |
+
show_progress=False
|
1331 |
+
)
|
1332 |
|
1333 |
|
1334 |
|