Spaces:

tanbw
/

CosyVoice

Running on Zero

CosyVoice commited on Jul 11, 2024

Commit

6cebcb3

1 Parent(s): 0fd15bb

move use_spk_embedding to processor

Files changed (4) hide show

cosyvoice/dataset/processor.py CHANGED Viewed

@@ -308,7 +308,7 @@ def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000, m
             logging.fatal('Unsupported batch type {}'.format(batch_type))
-def padding(data, mode='train'):
     """ Padding the data into training data
         Args:
@@ -362,4 +362,8 @@ def padding(data, mode='train'):
                           'tts_index': tts_index,
                           'tts_text_token': tts_text_token,
                           'tts_text_token_len': tts_text_token_len})
         yield batch

             logging.fatal('Unsupported batch type {}'.format(batch_type))
+def padding(data, use_spk_embedding, mode='train'):
     """ Padding the data into training data
         Args:
                           'tts_index': tts_index,
                           'tts_text_token': tts_text_token,
                           'tts_text_token_len': tts_text_token_len})
+        if use_spk_embedding is True:
+            batch["embedding"] = batch["spk_embedding"]
+        else:
+            batch["embedding"] = batch["utt_embedding"]
         yield batch

cosyvoice/utils/executor.py CHANGED Viewed

@@ -52,10 +52,6 @@ class Executor:
                 info_dict["batch_idx"] = batch_idx
                 if cosyvoice_join(group_join, info_dict):
                     break
-                if info_dict["use_spk_embedding"] is True:
-                    batch_dict["embedding"] = batch_dict["spk_embedding"]
-                else:
-                    batch_dict["embedding"] = batch_dict["utt_embedding"]
                 # Disable gradient synchronizations across DDP processes.
                 # Within this context, gradients will be accumulated on module

                 info_dict["batch_idx"] = batch_idx
                 if cosyvoice_join(group_join, info_dict):
                     break
                 # Disable gradient synchronizations across DDP processes.
                 # Within this context, gradients will be accumulated on module

examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml CHANGED Viewed

@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
     batch_type: 'dynamic'
     max_frames_in_batch: 12000
 padding: !name:cosyvoice.dataset.processor.padding
 # dataset processor pipeline
 data_pipeline: [
@@ -190,7 +191,6 @@ train_conf:
     scheduler: warmuplr
     scheduler_conf:
         warmup_steps: 25000
-    use_spk_embedding: False # change to True during sft
     max_epoch: 200
     grad_clip: 5
     accum_grad: 2

     batch_type: 'dynamic'
     max_frames_in_batch: 12000
 padding: !name:cosyvoice.dataset.processor.padding
+    use_spk_embedding: False # change to True during sft
 # dataset processor pipeline
 data_pipeline: [
     scheduler: warmuplr
     scheduler_conf:
         warmup_steps: 25000
     max_epoch: 200
     grad_clip: 5
     accum_grad: 2

examples/libritts/cosyvoice/conf/cosyvoice.yaml CHANGED Viewed

@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
     batch_type: 'dynamic'
     max_frames_in_batch: 2000
 padding: !name:cosyvoice.dataset.processor.padding
 # dataset processor pipeline
 data_pipeline: [
@@ -190,7 +191,6 @@ train_conf:
     scheduler: warmuplr # change to constantlr during sft
     scheduler_conf:
         warmup_steps: 2500
-    use_spk_embedding: False # change to True during sft
     max_epoch: 200
     grad_clip: 5
     accum_grad: 2

     batch_type: 'dynamic'
     max_frames_in_batch: 2000
 padding: !name:cosyvoice.dataset.processor.padding
+    use_spk_embedding: False # change to True during sft
 # dataset processor pipeline
 data_pipeline: [
     scheduler: warmuplr # change to constantlr during sft
     scheduler_conf:
         warmup_steps: 2500
     max_epoch: 200
     grad_clip: 5
     accum_grad: 2