Katock commited on
Commit
13b8440
·
1 Parent(s): ee4a2a4

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +108 -55
utils.py CHANGED
@@ -66,6 +66,84 @@ def plot_data_to_numpy(x, y):
66
  plt.close()
67
  return data
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  def f0_to_coarse(f0):
71
  is_torch = isinstance(f0, torch.Tensor)
@@ -78,6 +156,35 @@ def f0_to_coarse(f0):
78
  assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, (f0_coarse.max(), f0_coarse.min())
79
  return f0_coarse
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  def get_content(cmodel, y):
82
  with torch.no_grad():
83
  c = cmodel.extract_features(y.squeeze(1))[0]
@@ -174,7 +281,6 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, skip_optimizer=False
174
  checkpoint_path, iteration))
175
  return model, optimizer, learning_rate, iteration
176
 
177
-
178
  def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path):
179
  logger.info("Saving model and optimizer state at iteration {} to {}".format(
180
  iteration, checkpoint_path))
@@ -292,47 +398,6 @@ def load_filepaths_and_text(filename, split="|"):
292
  return filepaths_and_text
293
 
294
 
295
- def get_hparams(init=True):
296
- parser = argparse.ArgumentParser()
297
- parser.add_argument('-c', '--config', type=str, default="./configs/config.json",
298
- help='JSON file for configuration')
299
- parser.add_argument('-m', '--model', type=str, required=True,
300
- help='Model name')
301
-
302
- args = parser.parse_args()
303
- model_dir = os.path.join("./logs", args.model)
304
-
305
- if not os.path.exists(model_dir):
306
- os.makedirs(model_dir)
307
-
308
- config_path = args.config
309
- config_save_path = os.path.join(model_dir, "config.json")
310
- if init:
311
- with open(config_path, "r") as f:
312
- data = f.read()
313
- with open(config_save_path, "w") as f:
314
- f.write(data)
315
- else:
316
- with open(config_save_path, "r") as f:
317
- data = f.read()
318
- config = json.loads(data)
319
-
320
- hparams = HParams(**config)
321
- hparams.model_dir = model_dir
322
- return hparams
323
-
324
-
325
- def get_hparams_from_dir(model_dir):
326
- config_save_path = os.path.join(model_dir, "config.json")
327
- with open(config_save_path, "r") as f:
328
- data = f.read()
329
- config = json.loads(data)
330
-
331
- hparams =HParams(**config)
332
- hparams.model_dir = model_dir
333
- return hparams
334
-
335
-
336
  def get_hparams_from_file(config_path):
337
  with open(config_path, "r") as f:
338
  data = f.read()
@@ -393,19 +458,7 @@ def repeat_expand_2d(content, target_len):
393
  return target
394
 
395
 
396
- def mix_model(model_paths,mix_rate,mode):
397
- mix_rate = torch.FloatTensor(mix_rate)/100
398
- model_tem = torch.load(model_paths[0])
399
- models = [torch.load(path)["model"] for path in model_paths]
400
- if mode == 0:
401
- mix_rate = F.softmax(mix_rate,dim=0)
402
- for k in model_tem["model"].keys():
403
- model_tem["model"][k] = torch.zeros_like(model_tem["model"][k])
404
- for i,model in enumerate(models):
405
- model_tem["model"][k] += model[k]*mix_rate[i]
406
- torch.save(model_tem,os.path.join(os.path.curdir,"output.pth"))
407
- return os.path.join(os.path.curdir,"output.pth")
408
-
409
  def change_rms(data1, sr1, data2, sr2, rate): # 1是输入音频,2是输出音频,rate是2的占比 from RVC
410
  # print(data1.max(),data2.max())
411
  rms1 = librosa.feature.rms(
 
66
  plt.close()
67
  return data
68
 
69
+ def interpolate_f0(f0):
70
+ '''
71
+ 对F0进行插值处理
72
+ '''
73
+
74
+ data = np.reshape(f0, (f0.size, 1))
75
+
76
+ vuv_vector = np.zeros((data.size, 1), dtype=np.float32)
77
+ vuv_vector[data > 0.0] = 1.0
78
+ vuv_vector[data <= 0.0] = 0.0
79
+
80
+ ip_data = data
81
+
82
+ frame_number = data.size
83
+ last_value = 0.0
84
+ for i in range(frame_number):
85
+ if data[i] <= 0.0:
86
+ j = i + 1
87
+ for j in range(i + 1, frame_number):
88
+ if data[j] > 0.0:
89
+ break
90
+ if j < frame_number - 1:
91
+ if last_value > 0.0:
92
+ step = (data[j] - data[i - 1]) / float(j - i)
93
+ for k in range(i, j):
94
+ ip_data[k] = data[i - 1] + step * (k - i + 1)
95
+ else:
96
+ for k in range(i, j):
97
+ ip_data[k] = data[j]
98
+ else:
99
+ for k in range(i, frame_number):
100
+ ip_data[k] = last_value
101
+ else:
102
+ ip_data[i] = data[i]
103
+ last_value = data[i]
104
+
105
+ return ip_data[:,0], vuv_vector[:,0]
106
+
107
+ def compute_f0_parselmouth(wav_numpy, p_len=None, sampling_rate=44100, hop_length=512):
108
+ import parselmouth
109
+ x = wav_numpy
110
+ if p_len is None:
111
+ p_len = x.shape[0]//hop_length
112
+ else:
113
+ assert abs(p_len-x.shape[0]//hop_length) < 4, "pad length error"
114
+ time_step = hop_length / sampling_rate * 1000
115
+ f0_min = 50
116
+ f0_max = 1100
117
+ f0 = parselmouth.Sound(x, sampling_rate).to_pitch_ac(
118
+ time_step=time_step / 1000, voicing_threshold=0.6,
119
+ pitch_floor=f0_min, pitch_ceiling=f0_max).selected_array['frequency']
120
+
121
+ pad_size=(p_len - len(f0) + 1) // 2
122
+ if(pad_size>0 or p_len - len(f0) - pad_size>0):
123
+ f0 = np.pad(f0,[[pad_size,p_len - len(f0) - pad_size]], mode='constant')
124
+ return f0
125
+
126
+ def resize_f0(x, target_len):
127
+ source = np.array(x)
128
+ source[source<0.001] = np.nan
129
+ target = np.interp(np.arange(0, len(source)*target_len, len(source))/ target_len, np.arange(0, len(source)), source)
130
+ res = np.nan_to_num(target)
131
+ return res
132
+
133
+ def compute_f0_dio(wav_numpy, p_len=None, sampling_rate=44100, hop_length=512):
134
+ import pyworld
135
+ if p_len is None:
136
+ p_len = wav_numpy.shape[0]//hop_length
137
+ f0, t = pyworld.dio(
138
+ wav_numpy.astype(np.double),
139
+ fs=sampling_rate,
140
+ f0_ceil=800,
141
+ frame_period=1000 * hop_length / sampling_rate,
142
+ )
143
+ f0 = pyworld.stonemask(wav_numpy.astype(np.double), f0, t, sampling_rate)
144
+ for index, pitch in enumerate(f0):
145
+ f0[index] = round(pitch, 1)
146
+ return resize_f0(f0, p_len)
147
 
148
  def f0_to_coarse(f0):
149
  is_torch = isinstance(f0, torch.Tensor)
 
156
  assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, (f0_coarse.max(), f0_coarse.min())
157
  return f0_coarse
158
 
159
+ def get_hubert_model():
160
+ vec_path = "hubert/checkpoint_best_legacy_500.pt"
161
+ print("load model(s) from {}".format(vec_path))
162
+ from fairseq import checkpoint_utils
163
+ models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
164
+ [vec_path],
165
+ suffix="",
166
+ )
167
+ model = models[0]
168
+ model.eval()
169
+ return model
170
+
171
+ def get_hubert_content(hmodel, wav_16k_tensor):
172
+ feats = wav_16k_tensor
173
+ if feats.dim() == 2: # double channels
174
+ feats = feats.mean(-1)
175
+ assert feats.dim() == 1, feats.dim()
176
+ feats = feats.view(1, -1)
177
+ padding_mask = torch.BoolTensor(feats.shape).fill_(False)
178
+ inputs = {
179
+ "source": feats.to(wav_16k_tensor.device),
180
+ "padding_mask": padding_mask.to(wav_16k_tensor.device),
181
+ "output_layer": 9, # layer 9
182
+ }
183
+ with torch.no_grad():
184
+ logits = hmodel.extract_features(**inputs)
185
+ feats = hmodel.final_proj(logits[0])
186
+ return feats.transpose(1, 2)
187
+
188
  def get_content(cmodel, y):
189
  with torch.no_grad():
190
  c = cmodel.extract_features(y.squeeze(1))[0]
 
281
  checkpoint_path, iteration))
282
  return model, optimizer, learning_rate, iteration
283
 
 
284
  def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path):
285
  logger.info("Saving model and optimizer state at iteration {} to {}".format(
286
  iteration, checkpoint_path))
 
398
  return filepaths_and_text
399
 
400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  def get_hparams_from_file(config_path):
402
  with open(config_path, "r") as f:
403
  data = f.read()
 
458
  return target
459
 
460
 
461
+
 
 
 
 
 
 
 
 
 
 
 
 
462
  def change_rms(data1, sr1, data2, sr2, rate): # 1是输入音频,2是输出音频,rate是2的占比 from RVC
463
  # print(data1.max(),data2.max())
464
  rms1 = librosa.feature.rms(