Spaces:

fffiloni
/

EchoMimic

Sleeping

fffiloni commited on Oct 30, 2024

Commit

f0478d8

verified ·

1 Parent(s): a644739

make sure image input is squared 512

Files changed (1) hide show

webgui.py CHANGED Viewed

@@ -160,6 +160,7 @@ def process_video(uploaded_img, uploaded_audio, width, height, length, seed, fac
     #### face musk prepare
     face_img = cv2.imread(uploaded_img)
     face_mask = np.zeros((face_img.shape[0], face_img.shape[1])).astype('uint8')
     det_bboxes, probs = face_detector.detect(face_img)
     select_bbox = select_face(det_bboxes, probs)
@@ -340,6 +341,8 @@ with gr.Blocks() as demo:
         if is_shared_ui:
             gr.Info("Trimming audio to max 10 seconds. Duplicate the space for unlimited audio length.")
             uploaded_audio = trim_audio(uploaded_audio, "trimmed_audio.wav")
         final_output_path = process_video(
             uploaded_img, uploaded_audio, width, height, length, seed, facemask_dilation_ratio, facecrop_dilation_ratio, context_frames, context_overlap, cfg, steps, sample_rate, fps, device

     #### face musk prepare
     face_img = cv2.imread(uploaded_img)
+    face_img = cv2.resize(face_img, (512, 512)) # make sure image is squared 512
     face_mask = np.zeros((face_img.shape[0], face_img.shape[1])).astype('uint8')
     det_bboxes, probs = face_detector.detect(face_img)
     select_bbox = select_face(det_bboxes, probs)
         if is_shared_ui:
             gr.Info("Trimming audio to max 10 seconds. Duplicate the space for unlimited audio length.")
             uploaded_audio = trim_audio(uploaded_audio, "trimmed_audio.wav")
         final_output_path = process_video(
             uploaded_img, uploaded_audio, width, height, length, seed, facemask_dilation_ratio, facecrop_dilation_ratio, context_frames, context_overlap, cfg, steps, sample_rate, fps, device