Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -567,8 +567,8 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
567 |
focus_map = {
|
568 |
"CFV-D":0,
|
569 |
"CFV-DA":1,
|
570 |
-
"
|
571 |
-
"PFV-
|
572 |
}
|
573 |
|
574 |
mapped_value = focus_map.get(focus_type, -1)
|
@@ -632,18 +632,18 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
632 |
focus_info=focus_info[1:-1]
|
633 |
|
634 |
# state = state + [(None, f"Wiki: {paragraph}")]
|
635 |
-
state = state + [(None, f"
|
636 |
print("new_cap",focus_info)
|
637 |
|
638 |
refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
639 |
input_points=input_points, input_labels=input_labels)
|
640 |
try:
|
641 |
-
waveform_visual, audio_output = tts.predict(focus_info, input_language, input_audio, input_mic, use_mic, agree)
|
642 |
return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
643 |
except Exception as e:
|
644 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
645 |
print(f"Error during TTS prediction: {str(e)}")
|
646 |
-
return state, state,
|
647 |
|
648 |
else:
|
649 |
try:
|
@@ -652,7 +652,7 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
652 |
except Exception as e:
|
653 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
654 |
print(f"Error during TTS prediction: {str(e)}")
|
655 |
-
return state, state,
|
656 |
|
657 |
|
658 |
def encode_image(image_path):
|
@@ -941,7 +941,7 @@ def create_ui():
|
|
941 |
material_label = gr.Button(value="Material: ")
|
942 |
with gr.Row(scale=1.0):
|
943 |
focus_type = gr.Radio(
|
944 |
-
choices=["CFV-D", "CFV-DA", "
|
945 |
value="CFV-D",
|
946 |
label="Focus Type",
|
947 |
interactive=True)
|
@@ -1320,7 +1320,7 @@ def create_ui():
|
|
1320 |
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key,new_crop_save_path
|
1321 |
],
|
1322 |
outputs=[
|
1323 |
-
chatbot, state,
|
1324 |
output_waveform, output_audio
|
1325 |
],
|
1326 |
show_progress=True,
|
|
|
567 |
focus_map = {
|
568 |
"CFV-D":0,
|
569 |
"CFV-DA":1,
|
570 |
+
"CFV-DAI":2,
|
571 |
+
"PFV-DDA":3
|
572 |
}
|
573 |
|
574 |
mapped_value = focus_map.get(focus_type, -1)
|
|
|
632 |
focus_info=focus_info[1:-1]
|
633 |
|
634 |
# state = state + [(None, f"Wiki: {paragraph}")]
|
635 |
+
state = state + [(None, f"{focus_info}")]
|
636 |
print("new_cap",focus_info)
|
637 |
|
638 |
refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
639 |
input_points=input_points, input_labels=input_labels)
|
640 |
try:
|
641 |
+
waveform_visual, audio_output = tts.predict(focus_info.replace('#', ''), input_language, input_audio, input_mic, use_mic, agree)
|
642 |
return state, state, refined_image_input, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, waveform_visual, audio_output
|
643 |
except Exception as e:
|
644 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
645 |
print(f"Error during TTS prediction: {str(e)}")
|
646 |
+
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
647 |
|
648 |
else:
|
649 |
try:
|
|
|
652 |
except Exception as e:
|
653 |
state = state + [(None, f"Error during TTS prediction: {str(e)}")]
|
654 |
print(f"Error during TTS prediction: {str(e)}")
|
655 |
+
return state, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state, None, None
|
656 |
|
657 |
|
658 |
def encode_image(image_path):
|
|
|
941 |
material_label = gr.Button(value="Material: ")
|
942 |
with gr.Row(scale=1.0):
|
943 |
focus_type = gr.Radio(
|
944 |
+
choices=["CFV-D", "CFV-DA", "CFV-DAI","PFV-DDA"],
|
945 |
value="CFV-D",
|
946 |
label="Focus Type",
|
947 |
interactive=True)
|
|
|
1320 |
input_text, input_language, input_audio, input_mic, use_mic, agree,paragraph,focus_type,openai_api_key,new_crop_save_path
|
1321 |
],
|
1322 |
outputs=[
|
1323 |
+
chatbot, state, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,
|
1324 |
output_waveform, output_audio
|
1325 |
],
|
1326 |
show_progress=True,
|