Politrees commited on
Commit
ecb00bc
·
verified ·
1 Parent(s): 6542672

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +604 -2
app.py CHANGED
@@ -1,4 +1,606 @@
1
  import os
 
 
 
 
2
 
3
- os.system("PolUVR --list_models")
4
- os.system("python PolUVR_app.py")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import torch
3
+ import shutil
4
+ import logging
5
+ import gradio as gr
6
 
7
+ from PolUVR.separator import Separator
8
+
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+ use_autocast = device == "cuda"
11
+
12
+ #=========================#
13
+ # Roformer Models #
14
+ #=========================#
15
+ ROFORMER_MODELS = {
16
+ 'BS-Roformer-Viperx-1053': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
17
+ 'BS-Roformer-Viperx-1296': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
18
+ 'BS-Roformer-Viperx-1297': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
19
+ 'BS-Roformer-De-Reverb': 'deverb_bs_roformer_8_384dim_10depth.ckpt',
20
+ 'Mel-Roformer-Viperx-1143': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt',
21
+ 'Mel-Roformer-Crowd-Aufr33-Viperx': 'mel_band_roformer_crowd_aufr33_viperx_sdr_8.7144.ckpt',
22
+ 'Mel-Roformer-Karaoke-Aufr33-Viperx': 'mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt',
23
+ 'Mel-Roformer-Denoise-Aufr33': 'denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt',
24
+ 'Mel-Roformer-Denoise-Aufr33-Aggr': 'denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt',
25
+ 'MelBand Roformer Kim | Inst V1 by Unwa': 'melband_roformer_inst_v1.ckpt',
26
+ 'MelBand Roformer Kim | Inst V2 by Unwa': 'melband_roformer_inst_v2.ckpt',
27
+ 'MelBand Roformer Kim | InstVoc Duality V1 by Unwa': 'melband_roformer_instvoc_duality_v1.ckpt',
28
+ 'MelBand Roformer Kim | InstVoc Duality V2 by Unwa': 'melband_roformer_instvox_duality_v2.ckpt',
29
+ }
30
+ #=========================#
31
+ # MDX23C Models #
32
+ #=========================#
33
+ MDX23C_MODELS = [
34
+ 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
35
+ 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
36
+ 'MDX23C_D1581.ckpt',
37
+ ]
38
+ #=========================#
39
+ # MDXN-NET Models #
40
+ #=========================#
41
+ MDXNET_MODELS = [
42
+ 'UVR-MDX-NET-Inst_1.onnx',
43
+ 'UVR-MDX-NET-Inst_2.onnx',
44
+ 'UVR-MDX-NET-Inst_3.onnx',
45
+ 'UVR-MDX-NET-Inst_HQ_1.onnx',
46
+ 'UVR-MDX-NET-Inst_HQ_2.onnx',
47
+ 'UVR-MDX-NET-Inst_HQ_3.onnx',
48
+ 'UVR-MDX-NET-Inst_HQ_4.onnx',
49
+ 'UVR-MDX-NET-Inst_HQ_5.onnx',
50
+ 'UVR-MDX-NET_Inst_82_beta.onnx',
51
+ 'UVR-MDX-NET_Inst_90_beta.onnx',
52
+ 'UVR-MDX-NET_Inst_187_beta.onnx',
53
+ 'UVR-MDX-NET-Inst_full_292.onnx',
54
+ 'UVR-MDX-NET_Main_340.onnx',
55
+ 'UVR-MDX-NET_Main_390.onnx',
56
+ 'UVR-MDX-NET_Main_406.onnx',
57
+ 'UVR-MDX-NET_Main_427.onnx',
58
+ 'UVR-MDX-NET_Main_438.onnx',
59
+ 'UVR-MDX-NET-Crowd_HQ_1.onnx',
60
+ 'UVR-MDX-NET-Voc_FT.onnx',
61
+ 'UVR_MDXNET_1_9703.onnx',
62
+ 'UVR_MDXNET_2_9682.onnx',
63
+ 'UVR_MDXNET_3_9662.onnx',
64
+ 'UVR_MDXNET_9482.onnx',
65
+ 'UVR_MDXNET_KARA.onnx',
66
+ 'UVR_MDXNET_KARA_2.onnx',
67
+ 'UVR_MDXNET_Main.onnx',
68
+ 'kuielab_a_bass.onnx',
69
+ 'kuielab_a_drums.onnx',
70
+ 'kuielab_a_other.onnx',
71
+ 'kuielab_a_vocals.onnx',
72
+ 'kuielab_b_bass.onnx',
73
+ 'kuielab_b_drums.onnx',
74
+ 'kuielab_b_other.onnx',
75
+ 'kuielab_b_vocals.onnx',
76
+ 'Kim_Inst.onnx',
77
+ 'Kim_Vocal_1.onnx',
78
+ 'Kim_Vocal_2.onnx',
79
+ 'Reverb_HQ_By_FoxJoy.onnx',
80
+ ]
81
+ #========================#
82
+ # VR-ARCH Models #
83
+ #========================#
84
+ VR_ARCH_MODELS = [
85
+ '1_HP-UVR.pth',
86
+ '2_HP-UVR.pth',
87
+ '3_HP-Vocal-UVR.pth',
88
+ '4_HP-Vocal-UVR.pth',
89
+ '5_HP-Karaoke-UVR.pth',
90
+ '6_HP-Karaoke-UVR.pth',
91
+ '7_HP2-UVR.pth',
92
+ '8_HP2-UVR.pth',
93
+ '9_HP2-UVR.pth',
94
+ '10_SP-UVR-2B-32000-1.pth',
95
+ '11_SP-UVR-2B-32000-2.pth',
96
+ '12_SP-UVR-3B-44100.pth',
97
+ '13_SP-UVR-4B-44100-1.pth',
98
+ '14_SP-UVR-4B-44100-2.pth',
99
+ '15_SP-UVR-MID-44100-1.pth',
100
+ '16_SP-UVR-MID-44100-2.pth',
101
+ '17_HP-Wind_Inst-UVR.pth',
102
+ 'MGM_HIGHEND_v4.pth',
103
+ 'MGM_LOWEND_A_v4.pth',
104
+ 'MGM_LOWEND_B_v4.pth',
105
+ 'MGM_MAIN_v4.pth',
106
+ 'UVR-BVE-4B_SN-44100-1.pth',
107
+ 'UVR-DeEcho-DeReverb.pth',
108
+ 'UVR-De-Echo-Aggressive.pth',
109
+ 'UVR-De-Echo-Normal.pth',
110
+ 'UVR-DeNoise-Lite.pth',
111
+ 'UVR-DeNoise.pth',
112
+ ]
113
+ #=======================#
114
+ # DEMUCS Models #
115
+ #=======================#
116
+ DEMUCS_MODELS = [
117
+ 'hdemucs_mmi.yaml',
118
+ 'htdemucs.yaml',
119
+ 'htdemucs_6s.yaml',
120
+ 'htdemucs_ft.yaml',
121
+ ]
122
+
123
+ def print_message(input_file, model_name):
124
+ """Prints information about the audio separation process."""
125
+ base_name = os.path.splitext(os.path.basename(input_file))[0]
126
+ print("\n")
127
+ print("🎵 PolUVR 🎵")
128
+ print("Input audio:", base_name)
129
+ print("Separation Model:", model_name)
130
+ print("Audio Separation Process...")
131
+
132
+ def prepare_output_dir(input_file, output_dir):
133
+ """Create a directory for the output files and clean it if it already exists."""
134
+ base_name = os.path.splitext(os.path.basename(input_file))[0]
135
+ out_dir = os.path.join(output_dir, base_name)
136
+ try:
137
+ if os.path.exists(out_dir):
138
+ shutil.rmtree(out_dir)
139
+ os.makedirs(out_dir)
140
+ except Exception as e:
141
+ raise RuntimeError(f"Failed to prepare output directory {out_dir}: {e}")
142
+ return out_dir
143
+
144
+ def rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem):
145
+ base_name = os.path.splitext(os.path.basename(audio))[0]
146
+ stems = {
147
+ "Vocals": vocals_stem.replace("{base_name}", base_name),
148
+ "Instrumental": instrumental_stem.replace("{base_name}", base_name),
149
+ "Drums": drums_stem.replace("{base_name}", base_name),
150
+ "Bass": bass_stem.replace("{base_name}", base_name),
151
+ "Other": other_stem.replace("{base_name}", base_name),
152
+ "Guitar": guitar_stem.replace("{base_name}", base_name),
153
+ "Piano": piano_stem.replace("{base_name}", base_name),
154
+ }
155
+ return stems
156
+
157
+ def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
158
+ """Separate audio using Roformer model."""
159
+ stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem)
160
+ print_message(audio, model_key)
161
+ model = ROFORMER_MODELS[model_key]
162
+ try:
163
+ out_dir = prepare_output_dir(audio, out_dir)
164
+ separator = Separator(
165
+ log_level=logging.WARNING,
166
+ model_file_dir=model_dir,
167
+ output_dir=out_dir,
168
+ output_format=out_format,
169
+ normalization_threshold=norm_thresh,
170
+ amplification_threshold=amp_thresh,
171
+ use_autocast=use_autocast,
172
+ mdxc_params={
173
+ "segment_size": seg_size,
174
+ "override_model_segment_size": override_seg_size,
175
+ "batch_size": batch_size,
176
+ "overlap": overlap,
177
+ "pitch_shift": pitch_shift,
178
+ }
179
+ )
180
+
181
+ progress(0.2, desc="Model loaded...")
182
+ separator.load_model(model_filename=model)
183
+
184
+ progress(0.7, desc="Audio separated...")
185
+ separation = separator.separate(audio, stemname)
186
+ print(f"Separation complete!\nResults: {', '.join(separation)}")
187
+
188
+ stems = [os.path.join(out_dir, file_name) for file_name in separation]
189
+ return stems[0], stems[1]
190
+ except Exception as e:
191
+ raise RuntimeError(f"Roformer separation failed: {e}") from e
192
+
193
+ def mdx23c_separator(audio, model, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
194
+ """Separate audio using MDX23C model."""
195
+ stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem)
196
+ print_message(audio, model)
197
+ try:
198
+ out_dir = prepare_output_dir(audio, out_dir)
199
+ separator = Separator(
200
+ log_level=logging.WARNING,
201
+ model_file_dir=model_dir,
202
+ output_dir=out_dir,
203
+ output_format=out_format,
204
+ normalization_threshold=norm_thresh,
205
+ amplification_threshold=amp_thresh,
206
+ use_autocast=use_autocast,
207
+ mdxc_params={
208
+ "segment_size": seg_size,
209
+ "override_model_segment_size": override_seg_size,
210
+ "batch_size": batch_size,
211
+ "overlap": overlap,
212
+ "pitch_shift": pitch_shift,
213
+ }
214
+ )
215
+
216
+ progress(0.2, desc="Model loaded...")
217
+ separator.load_model(model_filename=model)
218
+
219
+ progress(0.7, desc="Audio separated...")
220
+ separation = separator.separate(audio, stemname)
221
+ print(f"Separation complete!\nResults: {', '.join(separation)}")
222
+
223
+ stems = [os.path.join(out_dir, file_name) for file_name in separation]
224
+ return stems[0], stems[1]
225
+ except Exception as e:
226
+ raise RuntimeError(f"MDX23C separation failed: {e}") from e
227
+
228
+ def mdx_separator(audio, model, hop_length, seg_size, overlap, denoise, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
229
+ """Separate audio using MDX-NET model."""
230
+ stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem)
231
+ print_message(audio, model)
232
+ try:
233
+ out_dir = prepare_output_dir(audio, out_dir)
234
+ separator = Separator(
235
+ log_level=logging.WARNING,
236
+ model_file_dir=model_dir,
237
+ output_dir=out_dir,
238
+ output_format=out_format,
239
+ normalization_threshold=norm_thresh,
240
+ amplification_threshold=amp_thresh,
241
+ use_autocast=use_autocast,
242
+ mdx_params={
243
+ "hop_length": hop_length,
244
+ "segment_size": seg_size,
245
+ "overlap": overlap,
246
+ "batch_size": batch_size,
247
+ "enable_denoise": denoise,
248
+ }
249
+ )
250
+
251
+ progress(0.2, desc="Model loaded...")
252
+ separator.load_model(model_filename=model)
253
+
254
+ progress(0.7, desc="Audio separated...")
255
+ separation = separator.separate(audio, stemname)
256
+ print(f"Separation complete!\nResults: {', '.join(separation)}")
257
+
258
+ stems = [os.path.join(out_dir, file_name) for file_name in separation]
259
+ return stems[0], stems[1]
260
+ except Exception as e:
261
+ raise RuntimeError(f"MDX-NET separation failed: {e}") from e
262
+
263
+ def vr_separator(audio, model, window_size, aggression, tta, post_process, post_process_threshold, high_end_process, model_dir, out_dir, out_format, norm_thresh, amp_thresh, batch_size, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
264
+ """Separate audio using VR ARCH model."""
265
+ stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem)
266
+ print_message(audio, model)
267
+ try:
268
+ out_dir = prepare_output_dir(audio, out_dir)
269
+ separator = Separator(
270
+ log_level=logging.WARNING,
271
+ model_file_dir=model_dir,
272
+ output_dir=out_dir,
273
+ output_format=out_format,
274
+ normalization_threshold=norm_thresh,
275
+ amplification_threshold=amp_thresh,
276
+ use_autocast=use_autocast,
277
+ vr_params={
278
+ "batch_size": batch_size,
279
+ "window_size": window_size,
280
+ "aggression": aggression,
281
+ "enable_tta": tta,
282
+ "enable_post_process": post_process,
283
+ "post_process_threshold": post_process_threshold,
284
+ "high_end_process": high_end_process,
285
+ }
286
+ )
287
+
288
+ progress(0.2, desc="Model loaded...")
289
+ separator.load_model(model_filename=model)
290
+
291
+ progress(0.7, desc="Audio separated...")
292
+ separation = separator.separate(audio, stemname)
293
+ print(f"Separation complete!\nResults: {', '.join(separation)}")
294
+
295
+ stems = [os.path.join(out_dir, file_name) for file_name in separation]
296
+ return stems[0], stems[1]
297
+ except Exception as e:
298
+ raise RuntimeError(f"VR ARCH separation failed: {e}") from e
299
+
300
+ def demucs_separator(audio, model, seg_size, shifts, overlap, segments_enabled, model_dir, out_dir, out_format, norm_thresh, amp_thresh, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem, progress=gr.Progress(track_tqdm=True)):
301
+ """Separate audio using Demucs model."""
302
+ stemname = rename_stems(audio, vocals_stem, instrumental_stem, other_stem, drums_stem, bass_stem, guitar_stem, piano_stem)
303
+ print_message(audio, model)
304
+ try:
305
+ out_dir = prepare_output_dir(audio, out_dir)
306
+ separator = Separator(
307
+ log_level=logging.WARNING,
308
+ model_file_dir=model_dir,
309
+ output_dir=out_dir,
310
+ output_format=out_format,
311
+ normalization_threshold=norm_thresh,
312
+ amplification_threshold=amp_thresh,
313
+ use_autocast=use_autocast,
314
+ demucs_params={
315
+ "segment_size": seg_size,
316
+ "shifts": shifts,
317
+ "overlap": overlap,
318
+ "segments_enabled": segments_enabled,
319
+ }
320
+ )
321
+
322
+ progress(0.2, desc="Model loaded...")
323
+ separator.load_model(model_filename=model)
324
+
325
+ progress(0.7, desc="Audio separated...")
326
+ separation = separator.separate(audio, stemname)
327
+ print(f"Separation complete!\nResults: {', '.join(separation)}")
328
+
329
+ stems = [os.path.join(out_dir, file_name) for file_name in separation]
330
+
331
+ if model == "htdemucs_6s.yaml":
332
+ return stems[0], stems[1], stems[2], stems[3], stems[4], stems[5]
333
+ else:
334
+ return stems[0], stems[1], stems[2], stems[3], None, None
335
+ except Exception as e:
336
+ raise RuntimeError(f"Demucs separation failed: {e}") from e
337
+
338
+ def update_stems(model):
339
+ if model == "htdemucs_6s.yaml":
340
+ return gr.update(visible=True)
341
+ else:
342
+ return gr.update(visible=False)
343
+
344
+ with gr.Blocks(
345
+ title="🎵 PolUVR 🎵",
346
+ css="footer{display:none !important}",
347
+ theme=gr.themes.Default(
348
+ spacing_size="sm",
349
+ radius_size="lg",
350
+ )
351
+ ) as app:
352
+ gr.HTML("<h1> 🎵 PolUVR 🎵 </h1>")
353
+
354
+ with gr.Tab("Roformer"):
355
+ with gr.Group():
356
+ with gr.Row():
357
+ roformer_model = gr.Dropdown(label="Select the Model", choices=list(ROFORMER_MODELS.keys()))
358
+ with gr.Row():
359
+ roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
360
+ roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
361
+ roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.")
362
+ roformer_pitch_shift = gr.Slider(minimum=-12, maximum=12, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
363
+ with gr.Row():
364
+ roformer_audio = gr.Audio(label="Input Audio", type="filepath")
365
+ with gr.Row():
366
+ roformer_button = gr.Button("Separate!", variant="primary")
367
+ with gr.Row():
368
+ roformer_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
369
+ roformer_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
370
+
371
+ with gr.Tab("MDX23C"):
372
+ with gr.Group():
373
+ with gr.Row():
374
+ mdx23c_model = gr.Dropdown(label="Select the Model", choices=MDX23C_MODELS)
375
+ with gr.Row():
376
+ mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
377
+ mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
378
+ mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
379
+ mdx23c_pitch_shift = gr.Slider(minimum=-12, maximum=12, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
380
+ with gr.Row():
381
+ mdx23c_audio = gr.Audio(label="Input Audio", type="filepath")
382
+ with gr.Row():
383
+ mdx23c_button = gr.Button("Separate!", variant="primary")
384
+ with gr.Row():
385
+ mdx23c_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
386
+ mdx23c_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
387
+
388
+ with gr.Tab("MDX-NET"):
389
+ with gr.Group():
390
+ with gr.Row():
391
+ mdx_model = gr.Dropdown(label="Select the Model", choices=MDXNET_MODELS)
392
+ with gr.Row():
393
+ mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.")
394
+ mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
395
+ mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
396
+ mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
397
+ with gr.Row():
398
+ mdx_audio = gr.Audio(label="Input Audio", type="filepath")
399
+ with gr.Row():
400
+ mdx_button = gr.Button("Separate!", variant="primary")
401
+ with gr.Row():
402
+ mdx_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
403
+ mdx_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
404
+
405
+ with gr.Tab("VR ARCH"):
406
+ with gr.Group():
407
+ with gr.Row():
408
+ vr_model = gr.Dropdown(label="Select the Model", choices=VR_ARCH_MODELS)
409
+ with gr.Row():
410
+ vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.")
411
+ vr_aggression = gr.Slider(minimum=1, maximum=50, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
412
+ vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
413
+ vr_post_process = gr.Checkbox(value=False, label="Post Process", info="Identify leftover artifacts within vocal output; may improve separation for some songs.")
414
+ vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.")
415
+ vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
416
+ with gr.Row():
417
+ vr_audio = gr.Audio(label="Input Audio", type="filepath")
418
+ with gr.Row():
419
+ vr_button = gr.Button("Separate!", variant="primary")
420
+ with gr.Row():
421
+ vr_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
422
+ vr_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
423
+
424
+ with gr.Tab("Demucs"):
425
+ with gr.Group():
426
+ with gr.Row():
427
+ demucs_model = gr.Dropdown(label="Select the Model", choices=DEMUCS_MODELS)
428
+ with gr.Row():
429
+ demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.")
430
+ demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
431
+ demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.")
432
+ demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
433
+ with gr.Row():
434
+ demucs_audio = gr.Audio(label="Input Audio", type="filepath")
435
+ with gr.Row():
436
+ demucs_button = gr.Button("Separate!", variant="primary")
437
+ with gr.Row():
438
+ demucs_stem1 = gr.Audio(label="Stem 1", type="filepath", interactive=False)
439
+ demucs_stem2 = gr.Audio(label="Stem 2", type="filepath", interactive=False)
440
+ with gr.Row():
441
+ demucs_stem3 = gr.Audio(label="Stem 3", type="filepath", interactive=False)
442
+ demucs_stem4 = gr.Audio(label="Stem 4", type="filepath", interactive=False)
443
+ with gr.Row(visible=False) as stem6:
444
+ demucs_stem5 = gr.Audio(label="Stem 5", type="filepath", interactive=False)
445
+ demucs_stem6 = gr.Audio(label="Stem 6", type="filepath", interactive=False)
446
+
447
+ with gr.Tab("Settings"):
448
+ with gr.Accordion("General settings", open=False):
449
+ with gr.Group():
450
+ model_file_dir = gr.Textbox(value="/tmp/PolUVR-models/", label="Directory to cache model files", info="The directory where model files are stored.", placeholder="/tmp/PolUVR-models/")
451
+ with gr.Row():
452
+ output_dir = gr.Textbox(value="output", label="File output directory", info="The directory where output files will be saved.", placeholder="output")
453
+ output_format = gr.Dropdown(value="wav", choices=["wav", "flac", "mp3"], label="Output Format", info="The format of the output audio file.")
454
+ with gr.Row():
455
+ norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
456
+ amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
457
+ with gr.Row():
458
+ batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
459
+
460
+ with gr.Accordion("Rename Stems", open=False):
461
+ gr.HTML("<h3> `{base_name}` - input file name </h3>")
462
+ with gr.Row():
463
+ vocals_stem = gr.Textbox(value="{base_name}_(Vocals)", label="Vocals Stem", placeholder="{base_name}_(Vocals)")
464
+ instrumental_stem = gr.Textbox(value="{base_name}_(Instrumental)", label="Instrumental Stem", placeholder="{base_name}_(Instrumental)")
465
+ other_stem = gr.Textbox(value="{base_name}_(Other)", label="Other Stem", placeholder="{base_name}_(Other)")
466
+ with gr.Row():
467
+ drums_stem = gr.Textbox(value="{base_name}_(Drums)", label="Drums Stem", placeholder="{base_name}_(Drums)")
468
+ bass_stem = gr.Textbox(value="{base_name}_(Bass)", label="Bass Stem", placeholder="{base_name}_(Bass)")
469
+ with gr.Row():
470
+ guitar_stem = gr.Textbox(value="{base_name}_(Guitar)", label="Guitar Stem", placeholder="{base_name}_(Guitar)")
471
+ piano_stem = gr.Textbox(value="{base_name}_(Piano)", label="Piano Stem", placeholder="{base_name}_(Piano)")
472
+
473
+ demucs_model.change(update_stems, inputs=[demucs_model], outputs=stem6)
474
+
475
+ roformer_button.click(
476
+ roformer_separator,
477
+ inputs=[
478
+ roformer_audio,
479
+ roformer_model,
480
+ roformer_seg_size,
481
+ roformer_override_seg_size,
482
+ roformer_overlap,
483
+ roformer_pitch_shift,
484
+ model_file_dir,
485
+ output_dir,
486
+ output_format,
487
+ norm_threshold,
488
+ amp_threshold,
489
+ batch_size,
490
+ vocals_stem,
491
+ instrumental_stem,
492
+ other_stem,
493
+ drums_stem,
494
+ bass_stem,
495
+ guitar_stem,
496
+ piano_stem,
497
+ ],
498
+ outputs=[roformer_stem1, roformer_stem2],
499
+ )
500
+ mdx23c_button.click(
501
+ mdx23c_separator,
502
+ inputs=[
503
+ mdx23c_audio,
504
+ mdx23c_model,
505
+ mdx23c_seg_size,
506
+ mdx23c_override_seg_size,
507
+ mdx23c_overlap,
508
+ mdx23c_pitch_shift,
509
+ model_file_dir,
510
+ output_dir,
511
+ output_format,
512
+ norm_threshold,
513
+ amp_threshold,
514
+ batch_size,
515
+ vocals_stem,
516
+ instrumental_stem,
517
+ other_stem,
518
+ drums_stem,
519
+ bass_stem,
520
+ guitar_stem,
521
+ piano_stem,
522
+ ],
523
+ outputs=[mdx23c_stem1, mdx23c_stem2],
524
+ )
525
+ mdx_button.click(
526
+ mdx_separator,
527
+ inputs=[
528
+ mdx_audio,
529
+ mdx_model,
530
+ mdx_hop_length,
531
+ mdx_seg_size,
532
+ mdx_overlap,
533
+ mdx_denoise,
534
+ model_file_dir,
535
+ output_dir,
536
+ output_format,
537
+ norm_threshold,
538
+ amp_threshold,
539
+ batch_size,
540
+ vocals_stem,
541
+ instrumental_stem,
542
+ other_stem,
543
+ drums_stem,
544
+ bass_stem,
545
+ guitar_stem,
546
+ piano_stem,
547
+ ],
548
+ outputs=[mdx_stem1, mdx_stem2],
549
+ )
550
+ vr_button.click(
551
+ vr_separator,
552
+ inputs=[
553
+ vr_audio,
554
+ vr_model,
555
+ vr_window_size,
556
+ vr_aggression,
557
+ vr_tta,
558
+ vr_post_process,
559
+ vr_post_process_threshold,
560
+ vr_high_end_process,
561
+ model_file_dir,
562
+ output_dir,
563
+ output_format,
564
+ norm_threshold,
565
+ amp_threshold,
566
+ batch_size,
567
+ vocals_stem,
568
+ instrumental_stem,
569
+ other_stem,
570
+ drums_stem,
571
+ bass_stem,
572
+ guitar_stem,
573
+ piano_stem,
574
+ ],
575
+ outputs=[vr_stem1, vr_stem2],
576
+ )
577
+ demucs_button.click(
578
+ demucs_separator,
579
+ inputs=[
580
+ demucs_audio,
581
+ demucs_model,
582
+ demucs_seg_size,
583
+ demucs_shifts,
584
+ demucs_overlap,
585
+ demucs_segments_enabled,
586
+ model_file_dir,
587
+ output_dir,
588
+ output_format,
589
+ norm_threshold,
590
+ amp_threshold,
591
+ vocals_stem,
592
+ instrumental_stem,
593
+ other_stem,
594
+ drums_stem,
595
+ bass_stem,
596
+ guitar_stem,
597
+ piano_stem,
598
+ ],
599
+ outputs=[demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4, demucs_stem5, demucs_stem6],
600
+ )
601
+
602
+ def main():
603
+ app.launch(share=True)
604
+
605
+ if __name__ == "__main__":
606
+ main()