Update main.py
Browse files
main.py
CHANGED
@@ -21,7 +21,7 @@ from pydub import AudioSegment
|
|
21 |
from mdx import run_mdx
|
22 |
from rvc import Config, load_hubert, get_vc, rvc_infer
|
23 |
|
24 |
-
BASE_DIR = os.path.dirname(os.path.
|
25 |
|
26 |
mdxnet_models_dir = os.path.join(BASE_DIR, 'mdxnet_models')
|
27 |
rvc_models_dir = os.path.join(BASE_DIR, 'rvc_models')
|
@@ -30,11 +30,7 @@ output_dir = os.path.join(BASE_DIR, 'song_output')
|
|
30 |
|
31 |
def get_youtube_video_id(url, ignore_playlist=True):
|
32 |
"""
|
33 |
-
|
34 |
-
http://youtu.be/SA2iWivDJiE
|
35 |
-
http://www.youtube.com/watch?v=_oPAwA_Udwc&feature=feedu
|
36 |
-
http://www.youtube.com/embed/SA2iWivDJiE
|
37 |
-
http://www.youtube.com/v/SA2iWivDJiE?version=3&hl=en_US
|
38 |
"""
|
39 |
query = urlparse(url)
|
40 |
if query.hostname == 'youtu.be':
|
@@ -44,7 +40,6 @@ def get_youtube_video_id(url, ignore_playlist=True):
|
|
44 |
|
45 |
if query.hostname in {'www.youtube.com', 'youtube.com', 'music.youtube.com'}:
|
46 |
if not ignore_playlist:
|
47 |
-
# use case: get playlist id not current video in playlist
|
48 |
with suppress(KeyError):
|
49 |
return parse_qs(query.query)['list'][0]
|
50 |
if query.path == '/watch':
|
@@ -55,12 +50,17 @@ def get_youtube_video_id(url, ignore_playlist=True):
|
|
55 |
return query.path.split('/')[2]
|
56 |
if query.path[:3] == '/v/':
|
57 |
return query.path.split('/')[2]
|
|
|
|
|
58 |
|
59 |
-
# returns None for invalid YouTube url
|
60 |
return None
|
61 |
|
62 |
|
|
|
63 |
def yt_download(link):
|
|
|
|
|
|
|
64 |
ydl_opts = {
|
65 |
'format': 'bestaudio',
|
66 |
'outtmpl': '%(title)s',
|
@@ -125,7 +125,6 @@ def get_audio_paths(song_dir):
|
|
125 |
def convert_to_stereo(audio_path):
|
126 |
wave, sr = librosa.load(audio_path, mono=False, sr=44100)
|
127 |
|
128 |
-
# check if mono
|
129 |
if type(wave[0]) != np.ndarray:
|
130 |
stereo_path = f'{os.path.splitext(audio_path)[0]}_stereo.wav'
|
131 |
command = shlex.split(f'ffmpeg -y -loglevel error -i "{audio_path}" -ac 2 -f wav "{stereo_path}"')
|
@@ -197,7 +196,6 @@ def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method,
|
|
197 |
hubert_model = load_hubert(device, config.is_half, os.path.join(rvc_models_dir, 'hubert_base.pt'))
|
198 |
cpt, version, net_g, tgt_sr, vc = get_vc(device, config.is_half, config, rvc_model_path)
|
199 |
|
200 |
-
# convert main vocals
|
201 |
rvc_infer(rvc_index_path, index_rate, vocals_path, output_path, pitch_change, f0_method, cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect, crepe_hop_length, vc, hubert_model)
|
202 |
del hubert_model, cpt
|
203 |
gc.collect()
|
@@ -206,18 +204,16 @@ def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method,
|
|
206 |
def add_audio_effects(audio_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping):
|
207 |
output_path = f'{os.path.splitext(audio_path)[0]}_mixed.wav'
|
208 |
|
209 |
-
# Initialize audio effects plugins
|
210 |
board = Pedalboard(
|
211 |
[
|
212 |
HighpassFilter(),
|
213 |
Compressor(ratio=4, threshold_db=-15),
|
214 |
Reverb(room_size=reverb_rm_size, dry_level=reverb_dry, wet_level=reverb_wet, damping=reverb_damping)
|
215 |
-
|
216 |
)
|
217 |
|
218 |
with AudioFile(audio_path) as f:
|
219 |
with AudioFile(output_path, 'w', f.samplerate, f.num_channels) as o:
|
220 |
-
# Read one second of audio at a time, until the file is empty:
|
221 |
while f.tell() < f.frames:
|
222 |
chunk = f.read(int(f.samplerate))
|
223 |
effected = board(chunk, f.samplerate, reset=False)
|
@@ -226,138 +222,91 @@ def add_audio_effects(audio_path, reverb_rm_size, reverb_wet, reverb_dry, reverb
|
|
226 |
return output_path
|
227 |
|
228 |
|
229 |
-
def
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
|
235 |
|
236 |
-
def
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
try:
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
error_msg = 'Invalid YouTube url.'
|
256 |
-
raise_exception(error_msg, is_webui)
|
257 |
-
|
258 |
-
# local audio file
|
259 |
-
else:
|
260 |
-
input_type = 'local'
|
261 |
-
song_input = song_input.strip('\"')
|
262 |
-
if os.path.exists(song_input):
|
263 |
-
song_id = get_hash(song_input)
|
264 |
-
else:
|
265 |
-
error_msg = f'{song_input} does not exist.'
|
266 |
-
song_id = None
|
267 |
-
raise_exception(error_msg, is_webui)
|
268 |
-
|
269 |
-
song_dir = os.path.join(output_dir, song_id)
|
270 |
-
|
271 |
-
if not os.path.exists(song_dir):
|
272 |
-
os.makedirs(song_dir)
|
273 |
-
orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path = preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress)
|
274 |
-
|
275 |
-
else:
|
276 |
-
vocals_path, main_vocals_path = None, None
|
277 |
-
paths = get_audio_paths(song_dir)
|
278 |
-
|
279 |
-
# if any of the audio files aren't available or keep intermediate files, rerun preprocess
|
280 |
-
if any(path is None for path in paths) or keep_files:
|
281 |
-
orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path = preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress)
|
282 |
-
else:
|
283 |
-
orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path = paths
|
284 |
-
|
285 |
-
pitch_change = pitch_change + pitch_change_all
|
286 |
-
ai_vocals_path = os.path.join(song_dir, f'{os.path.splitext(os.path.basename(orig_song_path))[0]}_lead_{voice_model}_p{pitch_change}_i{index_rate}_fr{filter_radius}_rms{rms_mix_rate}_pro{protect}_{f0_method}{"" if f0_method != "mangio-crepe" else f"_{crepe_hop_length}"}.wav')
|
287 |
-
ai_backing_path = os.path.join(song_dir, f'{os.path.splitext(os.path.basename(orig_song_path))[0]}_backing_{voice_model}_p{pitch_change}_i{index_rate}_fr{filter_radius}_rms{rms_mix_rate}_pro{protect}_{f0_method}{"" if f0_method != "mangio-crepe" else f"_{crepe_hop_length}"}.wav')
|
288 |
-
|
289 |
-
ai_cover_path = os.path.join(song_dir, f'normal {os.path.splitext(os.path.basename(orig_song_path))[0]} ({voice_model} Ver).{output_format}')
|
290 |
-
ai_cover_backing_path = os.path.join(song_dir, f'with backing {os.path.splitext(os.path.basename(orig_song_path))[0]} ({voice_model} Ver).{output_format}')
|
291 |
-
|
292 |
-
if not os.path.exists(ai_vocals_path):
|
293 |
-
display_progress('[~] Converting lead voice using RVC...', 0.5, is_webui, progress)
|
294 |
-
voice_change(voice_model, main_vocals_dereverb_path, ai_vocals_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui)
|
295 |
-
|
296 |
-
display_progress('[~] Converting backing voice using RVC...', 0.65, is_webui, progress)
|
297 |
-
voice_change(voice_model, backup_vocals_path, ai_backing_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui)
|
298 |
-
|
299 |
-
display_progress('[~] Applying audio effects to Vocals...', 0.8, is_webui, progress)
|
300 |
-
ai_vocals_mixed_path = add_audio_effects(ai_vocals_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping)
|
301 |
-
ai_backing_mixed_path = add_audio_effects(ai_backing_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping)
|
302 |
-
|
303 |
-
if pitch_change_all != 0:
|
304 |
-
display_progress('[~] Applying overall pitch change', 0.85, is_webui, progress)
|
305 |
-
instrumentals_path = pitch_shift(instrumentals_path, pitch_change_all)
|
306 |
-
backup_vocals_path = pitch_shift(backup_vocals_path, pitch_change_all)
|
307 |
-
|
308 |
-
display_progress('[~] Combining AI Vocals and Instrumentals...', 0.9, is_webui, progress)
|
309 |
-
combine_audio([ai_vocals_mixed_path, backup_vocals_path, instrumentals_path], ai_cover_path, main_gain, backup_gain, inst_gain, output_format)
|
310 |
-
combine_audio([ai_vocals_mixed_path, ai_backing_mixed_path, instrumentals_path], ai_cover_backing_path, main_gain, backup_gain, inst_gain, output_format)
|
311 |
-
|
312 |
-
if not keep_files:
|
313 |
-
display_progress('[~] Removing intermediate audio files...', 0.95, is_webui, progress)
|
314 |
-
intermediate_files = [vocals_path, main_vocals_path, ai_vocals_mixed_path, ai_backing_mixed_path]
|
315 |
-
if pitch_change_all != 0:
|
316 |
-
intermediate_files += [instrumentals_path, backup_vocals_path]
|
317 |
-
for file in intermediate_files:
|
318 |
-
if file and os.path.exists(file):
|
319 |
-
os.remove(file)
|
320 |
-
|
321 |
-
return ai_cover_path, ai_cover_backing_path
|
322 |
|
|
|
323 |
except Exception as e:
|
324 |
-
raise_exception(str(e), is_webui)
|
|
|
|
|
|
|
|
|
|
|
325 |
|
326 |
|
327 |
if __name__ == '__main__':
|
328 |
-
parser = argparse.ArgumentParser(description='
|
329 |
-
parser.add_argument('
|
330 |
-
parser.add_argument('
|
331 |
-
parser.add_argument('
|
332 |
-
parser.add_argument('
|
333 |
-
parser.add_argument('
|
334 |
-
parser.add_argument('
|
335 |
-
parser.add_argument('
|
336 |
-
parser.add_argument('
|
337 |
-
parser.add_argument('
|
338 |
-
parser.add_argument('
|
339 |
-
parser.add_argument('
|
340 |
-
parser.add_argument('
|
341 |
-
parser.add_argument('
|
342 |
-
parser.add_argument('
|
343 |
-
|
344 |
-
parser.add_argument('-rwet', '--reverb-wetness', type=float, default=0.2, help='Reverb wet level between 0 and 1')
|
345 |
-
parser.add_argument('-rdry', '--reverb-dryness', type=float, default=0.8, help='Reverb dry level between 0 and 1')
|
346 |
-
parser.add_argument('-rdamp', '--reverb-damping', type=float, default=0.7, help='Reverb damping between 0 and 1')
|
347 |
-
parser.add_argument('-oformat', '--output-format', type=str, default='mp3', help='Output format of audio file. mp3 for smaller file size, wav for best quality')
|
348 |
args = parser.parse_args()
|
349 |
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
|
|
|
|
|
|
|
21 |
from mdx import run_mdx
|
22 |
from rvc import Config, load_hubert, get_vc, rvc_infer
|
23 |
|
24 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
25 |
|
26 |
mdxnet_models_dir = os.path.join(BASE_DIR, 'mdxnet_models')
|
27 |
rvc_models_dir = os.path.join(BASE_DIR, 'rvc_models')
|
|
|
30 |
|
31 |
def get_youtube_video_id(url, ignore_playlist=True):
|
32 |
"""
|
33 |
+
Extracts the video ID from a YouTube URL.
|
|
|
|
|
|
|
|
|
34 |
"""
|
35 |
query = urlparse(url)
|
36 |
if query.hostname == 'youtu.be':
|
|
|
40 |
|
41 |
if query.hostname in {'www.youtube.com', 'youtube.com', 'music.youtube.com'}:
|
42 |
if not ignore_playlist:
|
|
|
43 |
with suppress(KeyError):
|
44 |
return parse_qs(query.query)['list'][0]
|
45 |
if query.path == '/watch':
|
|
|
50 |
return query.path.split('/')[2]
|
51 |
if query.path[:3] == '/v/':
|
52 |
return query.path.split('/')[2]
|
53 |
+
if query.path[:8] == '/shorts/':
|
54 |
+
return query.path.split('/')[2]
|
55 |
|
|
|
56 |
return None
|
57 |
|
58 |
|
59 |
+
|
60 |
def yt_download(link):
|
61 |
+
"""
|
62 |
+
Downloads the best audio format from a YouTube link.
|
63 |
+
"""
|
64 |
ydl_opts = {
|
65 |
'format': 'bestaudio',
|
66 |
'outtmpl': '%(title)s',
|
|
|
125 |
def convert_to_stereo(audio_path):
|
126 |
wave, sr = librosa.load(audio_path, mono=False, sr=44100)
|
127 |
|
|
|
128 |
if type(wave[0]) != np.ndarray:
|
129 |
stereo_path = f'{os.path.splitext(audio_path)[0]}_stereo.wav'
|
130 |
command = shlex.split(f'ffmpeg -y -loglevel error -i "{audio_path}" -ac 2 -f wav "{stereo_path}"')
|
|
|
196 |
hubert_model = load_hubert(device, config.is_half, os.path.join(rvc_models_dir, 'hubert_base.pt'))
|
197 |
cpt, version, net_g, tgt_sr, vc = get_vc(device, config.is_half, config, rvc_model_path)
|
198 |
|
|
|
199 |
rvc_infer(rvc_index_path, index_rate, vocals_path, output_path, pitch_change, f0_method, cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect, crepe_hop_length, vc, hubert_model)
|
200 |
del hubert_model, cpt
|
201 |
gc.collect()
|
|
|
204 |
def add_audio_effects(audio_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping):
|
205 |
output_path = f'{os.path.splitext(audio_path)[0]}_mixed.wav'
|
206 |
|
|
|
207 |
board = Pedalboard(
|
208 |
[
|
209 |
HighpassFilter(),
|
210 |
Compressor(ratio=4, threshold_db=-15),
|
211 |
Reverb(room_size=reverb_rm_size, dry_level=reverb_dry, wet_level=reverb_wet, damping=reverb_damping)
|
212 |
+
]
|
213 |
)
|
214 |
|
215 |
with AudioFile(audio_path) as f:
|
216 |
with AudioFile(output_path, 'w', f.samplerate, f.num_channels) as o:
|
|
|
217 |
while f.tell() < f.frames:
|
218 |
chunk = f.read(int(f.samplerate))
|
219 |
effected = board(chunk, f.samplerate, reset=False)
|
|
|
222 |
return output_path
|
223 |
|
224 |
|
225 |
+
def merge_audios(audio_paths, output_path):
|
226 |
+
combined = AudioSegment.from_file(audio_paths[0])
|
227 |
+
for path in audio_paths[1:]:
|
228 |
+
combined = combined.overlay(AudioSegment.from_file(path))
|
229 |
+
combined.export(output_path, format='wav')
|
230 |
|
231 |
|
232 |
+
def process_and_save_song(song_input, input_type, voice_model, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping, progress, is_webui=False):
|
233 |
+
song_id = get_hash(song_input)
|
234 |
+
if not os.path.exists(output_dir):
|
235 |
+
os.makedirs(output_dir)
|
236 |
+
|
237 |
+
song_output_dir = os.path.join(output_dir, song_id)
|
238 |
+
if not os.path.exists(song_output_dir):
|
239 |
+
os.makedirs(song_output_dir)
|
240 |
+
|
241 |
+
if input_type == 'yt' and not get_youtube_video_id(song_input):
|
242 |
+
raise_exception('[!] Invalid YouTube link.', is_webui)
|
243 |
+
|
244 |
+
mdx_model_params = {
|
245 |
+
'demucs_model_path': os.path.join(mdxnet_models_dir, 'models_demucs.h5'),
|
246 |
+
'mdx_model_path': os.path.join(mdxnet_models_dir, 'models_mdx.h5'),
|
247 |
+
'output_path': output_dir,
|
248 |
+
'noise_protect': 0.33,
|
249 |
+
'voc_model_path': os.path.join(mdxnet_models_dir, 'models_vocal.h5')
|
250 |
+
}
|
251 |
+
|
252 |
try:
|
253 |
+
orig_song_path, vocals_path, instrumentals_path, main_vocals_path, backup_vocals_path, main_vocals_dereverb_path = preprocess_song(song_input, mdx_model_params, song_id, is_webui, input_type, progress)
|
254 |
+
|
255 |
+
display_progress('[~] Changing Main Vocals to Target Voice...', 0.4, is_webui, progress)
|
256 |
+
pitch_shifted_main_vocals_path = pitch_shift(main_vocals_dereverb_path, pitch_change)
|
257 |
+
output_vocals_path = os.path.join(song_output_dir, 'main_vocals_changed.wav')
|
258 |
+
voice_change(voice_model, pitch_shifted_main_vocals_path, output_vocals_path, pitch_change, f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui)
|
259 |
+
|
260 |
+
display_progress('[~] Adding Audio Effects...', 0.5, is_webui, progress)
|
261 |
+
final_output_vocals_path = add_audio_effects(output_vocals_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping)
|
262 |
+
|
263 |
+
display_progress('[~] Merging Vocal and Instrumental Tracks...', 0.6, is_webui, progress)
|
264 |
+
final_output_path = os.path.join(output_dir, f'{os.path.basename(orig_song_path)}_{voice_model}_vocal_conversion.wav')
|
265 |
+
merge_audios([final_output_vocals_path, instrumentals_path], final_output_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
|
267 |
+
display_progress('[~] Done!', 1.0, is_webui, progress)
|
268 |
except Exception as e:
|
269 |
+
raise_exception(f'[!] Processing failed: {str(e)}', is_webui)
|
270 |
+
finally:
|
271 |
+
with suppress(FileNotFoundError):
|
272 |
+
os.remove(orig_song_path)
|
273 |
+
|
274 |
+
return final_output_path
|
275 |
|
276 |
|
277 |
if __name__ == '__main__':
|
278 |
+
parser = argparse.ArgumentParser(description='Process song with RVC.')
|
279 |
+
parser.add_argument('--input', type=str, required=True, help='Path to the input song file or YouTube link.')
|
280 |
+
parser.add_argument('--type', type=str, required=True, choices=['local', 'yt'], help='Type of input: "local" for a file, "yt" for a YouTube link.')
|
281 |
+
parser.add_argument('--voice_model', type=str, required=True, help='Name of the voice model to use.')
|
282 |
+
parser.add_argument('--pitch_change', type=float, required=False, default=0, help='Pitch change amount in semitones.')
|
283 |
+
parser.add_argument('--f0_method', type=str, required=False, default='crepe', help='F0 method to use.')
|
284 |
+
parser.add_argument('--index_rate', type=float, required=False, default=1.0, help='Index rate.')
|
285 |
+
parser.add_argument('--filter_radius', type=float, required=False, default=3.0, help='Filter radius.')
|
286 |
+
parser.add_argument('--rms_mix_rate', type=float, required=False, default=0.25, help='RMS mix rate.')
|
287 |
+
parser.add_argument('--protect', type=float, required=False, default=0.33, help='Protection rate.')
|
288 |
+
parser.add_argument('--crepe_hop_length', type=int, required=False, default=128, help='Crepe hop length.')
|
289 |
+
parser.add_argument('--reverb_rm_size', type=float, required=False, default=0.3, help='Reverb room size.')
|
290 |
+
parser.add_argument('--reverb_wet', type=float, required=False, default=0.25, help='Reverb wet level.')
|
291 |
+
parser.add_argument('--reverb_dry', type=float, required=False, default=0.75, help='Reverb dry level.')
|
292 |
+
parser.add_argument('--reverb_damping', type=float, required=False, default=0.5, help='Reverb damping.')
|
293 |
+
|
|
|
|
|
|
|
|
|
294 |
args = parser.parse_args()
|
295 |
|
296 |
+
process_and_save_song(
|
297 |
+
song_input=args.input,
|
298 |
+
input_type=args.type,
|
299 |
+
voice_model=args.voice_model,
|
300 |
+
pitch_change=args.pitch_change,
|
301 |
+
f0_method=args.f0_method,
|
302 |
+
index_rate=args.index_rate,
|
303 |
+
filter_radius=args.filter_radius,
|
304 |
+
rms_mix_rate=args.rms_mix_rate,
|
305 |
+
protect=args.protect,
|
306 |
+
crepe_hop_length=args.crepe_hop_length,
|
307 |
+
reverb_rm_size=args.reverb_rm_size,
|
308 |
+
reverb_wet=args.reverb_wet,
|
309 |
+
reverb_dry=args.reverb_dry,
|
310 |
+
reverb_damping=args.reverb_damping,
|
311 |
+
progress=None
|
312 |
+
)
|