Niki Zhang commited on
Commit
7cbc2fd
·
verified ·
1 Parent(s): 4377520

Update app.py

Browse files

Fixed the issue of overlapping captions

Files changed (1) hide show
  1. app.py +39 -7
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import base64
3
  import json
@@ -26,7 +27,7 @@ import tts
26
  ###############################################################################
27
 
28
 
29
- #import spaces
30
 
31
  import os
32
  import imageio
@@ -280,7 +281,7 @@ def make3d(images):
280
 
281
 
282
  gpt_state = 0
283
-
284
  article = """
285
  <div style='margin:20px auto;'>
286
  <p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
@@ -532,7 +533,8 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
532
  print(generated_caption)
533
  print("new crop save",new_crop_save_path)
534
 
535
- yield state, state, click_state, image_input_nobackground, image_input_withbackground, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path
 
536
 
537
 
538
 
@@ -541,11 +543,27 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
541
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
542
  input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key,new_crop_save_path):
543
  print("state",state)
544
-
 
545
  click_index = click_index_state
 
 
 
 
 
 
 
 
 
 
 
 
546
  input_mask = input_mask_state
547
  input_points = input_points_state
548
  input_labels = input_labels_state
 
 
 
549
  focus_map = {
550
  "CFV-D":0,
551
  "CFV-DA":1,
@@ -604,10 +622,13 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
604
  if not args.disable_gpt and text_refiner:
605
  print("new crop save",new_crop_save_path)
606
  focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
 
 
607
 
608
  # state = state + [(None, f"Wiki: {paragraph}")]
609
  state = state + [(None, f"Focus_Caption: {focus_info}")]
610
  print("new_cap",focus_info)
 
611
  refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
612
  input_points=input_points, input_labels=input_labels)
613
  try:
@@ -774,7 +795,7 @@ def export_chat_log(chat_state):
774
  return None
775
  chat_log = "\n".join(f"{entry[0]}\n{entry[1]}" for entry in chat_state if entry)
776
  print("export log...")
777
- print("chat_log",chat_log)
778
  with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
779
  temp_file.write(chat_log.encode('utf-8'))
780
  temp_file_path = temp_file.name
@@ -881,6 +902,7 @@ def create_ui():
881
  input_points_state = gr.State([])
882
  input_labels_state = gr.State([])
883
  new_crop_save_path = gr.State(None)
 
884
 
885
 
886
 
@@ -1028,6 +1050,7 @@ def create_ui():
1028
  submit_tts = gr.Button(value="Submit", interactive=True)
1029
  clear_tts = gr.Button(value="Clear", interactive=True)
1030
 
 
1031
 
1032
  ###############################################################################
1033
  # this part is for 3d generate.
@@ -1276,9 +1299,9 @@ def create_ui():
1276
  inputs=[
1277
  origin_image, point_prompt, click_mode, enable_wiki, language, sentiment, factuality, length,
1278
  image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
1279
- out_state, click_index_state, input_mask_state, input_points_state, input_labels_state
1280
  ],
1281
- outputs=[chatbot, state, click_state, image_input, input_image, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path],
1282
  show_progress=False, queue=True
1283
  )
1284
 
@@ -1297,6 +1320,15 @@ def create_ui():
1297
  show_progress=True,
1298
  queue=True
1299
  )
 
 
 
 
 
 
 
 
 
1300
 
1301
 
1302
 
 
1
+ from math import inf
2
  import os
3
  import base64
4
  import json
 
27
  ###############################################################################
28
 
29
 
30
+ import spaces
31
 
32
  import os
33
  import imageio
 
281
 
282
 
283
  gpt_state = 0
284
+ pre_click_index=(inf, inf)
285
  article = """
286
  <div style='margin:20px auto;'>
287
  <p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
 
533
  print(generated_caption)
534
  print("new crop save",new_crop_save_path)
535
 
536
+ yield state, state, click_state, image_input_nobackground, image_input_withbackground, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
537
+
538
 
539
 
540
 
 
543
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
544
  input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key,new_crop_save_path):
545
  print("state",state)
546
+
547
+ global pre_click_index
548
  click_index = click_index_state
549
+
550
+ # if pre_click_index==click_index:
551
+ # click_index = (click_index[0] - 1, click_index[1] - 1)
552
+ # pre_click_index = click_index
553
+ # else:
554
+ # pre_click_index = click_index
555
+ print("click_index",click_index)
556
+ print("pre_click_index",pre_click_index)
557
+ print("input_points_state",input_points_state)
558
+ print("input_labels_state",input_labels_state)
559
+
560
+
561
  input_mask = input_mask_state
562
  input_points = input_points_state
563
  input_labels = input_labels_state
564
+
565
+
566
+
567
  focus_map = {
568
  "CFV-D":0,
569
  "CFV-DA":1,
 
622
  if not args.disable_gpt and text_refiner:
623
  print("new crop save",new_crop_save_path)
624
  focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
625
+ if focus_info.startswith('"') and focus_info.endswith('"'):
626
+ focus_info=focus_info[1:-1]
627
 
628
  # state = state + [(None, f"Wiki: {paragraph}")]
629
  state = state + [(None, f"Focus_Caption: {focus_info}")]
630
  print("new_cap",focus_info)
631
+
632
  refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
633
  input_points=input_points, input_labels=input_labels)
634
  try:
 
795
  return None
796
  chat_log = "\n".join(f"{entry[0]}\n{entry[1]}" for entry in chat_state if entry)
797
  print("export log...")
798
+ print("chat_log", chat_log)
799
  with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
800
  temp_file.write(chat_log.encode('utf-8'))
801
  temp_file_path = temp_file.name
 
902
  input_points_state = gr.State([])
903
  input_labels_state = gr.State([])
904
  new_crop_save_path = gr.State(None)
905
+ image_input_nobackground = gr.State(None)
906
 
907
 
908
 
 
1050
  submit_tts = gr.Button(value="Submit", interactive=True)
1051
  clear_tts = gr.Button(value="Clear", interactive=True)
1052
 
1053
+
1054
 
1055
  ###############################################################################
1056
  # this part is for 3d generate.
 
1299
  inputs=[
1300
  origin_image, point_prompt, click_mode, enable_wiki, language, sentiment, factuality, length,
1301
  image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
1302
+ out_state, click_index_state, input_mask_state, input_points_state, input_labels_state,
1303
  ],
1304
+ outputs=[chatbot, state, click_state, image_input, input_image, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground],
1305
  show_progress=False, queue=True
1306
  )
1307
 
 
1320
  show_progress=True,
1321
  queue=True
1322
  )
1323
+
1324
+
1325
+ focus_type.change(
1326
+ lambda x: ([[], [], []], x),
1327
+ [image_input_nobackground],
1328
+ [click_state, image_input],
1329
+ queue=False,
1330
+ show_progress=False
1331
+ )
1332
 
1333
 
1334