CosyVoice commited on
Commit
6cebcb3
·
1 Parent(s): 0fd15bb

move use_spk_embedding to processor

Browse files
cosyvoice/dataset/processor.py CHANGED
@@ -308,7 +308,7 @@ def batch(data, batch_type='static', batch_size=16, max_frames_in_batch=12000, m
308
  logging.fatal('Unsupported batch type {}'.format(batch_type))
309
 
310
 
311
- def padding(data, mode='train'):
312
  """ Padding the data into training data
313
 
314
  Args:
@@ -362,4 +362,8 @@ def padding(data, mode='train'):
362
  'tts_index': tts_index,
363
  'tts_text_token': tts_text_token,
364
  'tts_text_token_len': tts_text_token_len})
 
 
 
 
365
  yield batch
 
308
  logging.fatal('Unsupported batch type {}'.format(batch_type))
309
 
310
 
311
+ def padding(data, use_spk_embedding, mode='train'):
312
  """ Padding the data into training data
313
 
314
  Args:
 
362
  'tts_index': tts_index,
363
  'tts_text_token': tts_text_token,
364
  'tts_text_token_len': tts_text_token_len})
365
+ if use_spk_embedding is True:
366
+ batch["embedding"] = batch["spk_embedding"]
367
+ else:
368
+ batch["embedding"] = batch["utt_embedding"]
369
  yield batch
cosyvoice/utils/executor.py CHANGED
@@ -52,10 +52,6 @@ class Executor:
52
  info_dict["batch_idx"] = batch_idx
53
  if cosyvoice_join(group_join, info_dict):
54
  break
55
- if info_dict["use_spk_embedding"] is True:
56
- batch_dict["embedding"] = batch_dict["spk_embedding"]
57
- else:
58
- batch_dict["embedding"] = batch_dict["utt_embedding"]
59
 
60
  # Disable gradient synchronizations across DDP processes.
61
  # Within this context, gradients will be accumulated on module
 
52
  info_dict["batch_idx"] = batch_idx
53
  if cosyvoice_join(group_join, info_dict):
54
  break
 
 
 
 
55
 
56
  # Disable gradient synchronizations across DDP processes.
57
  # Within this context, gradients will be accumulated on module
examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml CHANGED
@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
167
  batch_type: 'dynamic'
168
  max_frames_in_batch: 12000
169
  padding: !name:cosyvoice.dataset.processor.padding
 
170
 
171
  # dataset processor pipeline
172
  data_pipeline: [
@@ -190,7 +191,6 @@ train_conf:
190
  scheduler: warmuplr
191
  scheduler_conf:
192
  warmup_steps: 25000
193
- use_spk_embedding: False # change to True during sft
194
  max_epoch: 200
195
  grad_clip: 5
196
  accum_grad: 2
 
167
  batch_type: 'dynamic'
168
  max_frames_in_batch: 12000
169
  padding: !name:cosyvoice.dataset.processor.padding
170
+ use_spk_embedding: False # change to True during sft
171
 
172
  # dataset processor pipeline
173
  data_pipeline: [
 
191
  scheduler: warmuplr
192
  scheduler_conf:
193
  warmup_steps: 25000
 
194
  max_epoch: 200
195
  grad_clip: 5
196
  accum_grad: 2
examples/libritts/cosyvoice/conf/cosyvoice.yaml CHANGED
@@ -167,6 +167,7 @@ batch: !name:cosyvoice.dataset.processor.batch
167
  batch_type: 'dynamic'
168
  max_frames_in_batch: 2000
169
  padding: !name:cosyvoice.dataset.processor.padding
 
170
 
171
  # dataset processor pipeline
172
  data_pipeline: [
@@ -190,7 +191,6 @@ train_conf:
190
  scheduler: warmuplr # change to constantlr during sft
191
  scheduler_conf:
192
  warmup_steps: 2500
193
- use_spk_embedding: False # change to True during sft
194
  max_epoch: 200
195
  grad_clip: 5
196
  accum_grad: 2
 
167
  batch_type: 'dynamic'
168
  max_frames_in_batch: 2000
169
  padding: !name:cosyvoice.dataset.processor.padding
170
+ use_spk_embedding: False # change to True during sft
171
 
172
  # dataset processor pipeline
173
  data_pipeline: [
 
191
  scheduler: warmuplr # change to constantlr during sft
192
  scheduler_conf:
193
  warmup_steps: 2500
 
194
  max_epoch: 200
195
  grad_clip: 5
196
  accum_grad: 2