Spaces:

Blane187
/

rvc_infer

Running

App Files Files Community

John6666 commited on Sep 4, 2024

Commit

a8b441e

verified ·

1 Parent(s): d5fcf79

Upload app.py

Browse files

With this change, the function should be executable. The remaining problem is,
- Give a name to each label.
- Set the minimum, maximum and step correctly.
- If gr.Number is better than gr.Scale, change it.
- Where it's a gr.Textbox, it can be gr.Dropdown or gr.Radio.
- Give choices the right choices (list).
- There is a place where the numbers are string, what should we do about this?
- What about the layout? Like the side-by-side part.
- If there are parts that need to be kept out of the tab, we need to decide on that as well.
and so on.

Files changed (1) hide show

app.py +65 -181

app.py CHANGED Viewed

@@ -1,181 +1,65 @@
-import gradio as gr
-from rvc_infer import download_online_model
-import os
-import re
-import random
-from scipy.io.wavfile import write
-from scipy.io.wavfile import read
-import numpy as np
-import yt_dlp
-import subprocess
-def download_model(url, dir_name):
-    output_models = download_online_model(url, dir_name)
-    return dir_name
-uvr_models = {
-        'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
-        'MDX23C-8KFFT-InstVoc_HQ.ckpt': 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
-        'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
-        'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt',
-        'Kim_Vocal_2.onnx': 'Kim_Vocal_2.onnx',
-        'UVR-De-Echo-Aggressive.pth': 'UVR-De-Echo-Aggressive.pth',
-}
-output_format = [
-    'wav',
-    'flac',
-    'mp3',
-]
-mdxnet_overlap_values = [
-    '0.25',
-    '0.5',
-    '0.75',
-    '0.99',
-]
-vrarch_window_size_values = [
-    '320',
-    '512',
-    '1024',
-]
-def download_audio(url):
-    ydl_opts = {
-        'format': 'bestaudio/best',
-        'outtmpl': 'ytdl/%(title)s.%(ext)s',
-        'postprocessors': [{
-            'key': 'FFmpegExtractAudio',
-            'preferredcodec': 'wav',
-            'preferredquality': '192',
-        }],
-    }
-    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-        info_dict = ydl.extract_info(url, download=True)
-        file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
-        sample_rate, audio_data = read(file_path)
-        audio_array = np.asarray(audio_data, dtype=np.int16)
-        return sample_rate, audio_array
-def roformer_separator(roformer_audio, roformer_model, roformer_output_format, roformer_overlap, roformer_segment_size, mdx23c_denoise, mdxnet_denoise, vrarch_tta, vrarch_high_end_process):
-  files_list = []
-  files_list.clear()
-  directory = "./outputs"
-  random_id = str(random.randint(10000, 99999))
-  pattern = f"{random_id}"
-  os.makedirs("outputs", exist_ok=True)
-  write(f'{random_id}.wav', roformer_audio[0], roformer_audio[1])
-  full_roformer_model = roformer_models[roformer_model]
-  prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={roformer_output_format} --normalization=0.9 --mdxc_overlap={roformer_overlap} --mdxc_segment_size={roformer_segment_size}"
-    if mdx23c_denoise:
-        prompt += " --mdx_enable_denoise"
-    if mdxnet_denoise:
-        prompt += " --mdx_enable_denoise"
-    if vrarch_tta:
-        prompt += " --vr_enable_tta"
-    if vrarch_high_end_process:
-        prompt += " --vr_high_end_process"
-  os.system(prompt)
-  for file in os.listdir(directory):
-    if re.search(pattern, file):
-      files_list.append(os.path.join(directory, file))
-  stem1_file = files_list[0]
-  stem2_file = files_list[1]
-  return stem1_file, stem2_file
-CSS = """
-"""
-with gr.Blocks(theme="Hev832/Applio", fill_width=True, css=CSS) as demo:
-    with gr.Tabs():
-        with gr.Tab("inferenece"):
-            gr.Markdown("in progress")
-        with gr.Tab("Download model"):
-            gr.Markdown("## Download Model for infernece")
-            url_input = gr.Textbox(label="Model URL", placeholder="Enter the URL of the model")
-            dir_name_input = gr.Textbox(label="Directory Name", placeholder="Enter the directory name")
-            download_button = gr.Button("Download Model")
-        download_button.click(download_model, inputs=[url_input, dir_name_input], outputs=url_input)
-        with gr.Tab("UVR5"):
-            roformer_model = gr.Dropdown(
-                label = "Select the Model",
-                choices=list(uvr_models.keys()),
-                interactive = True
-            )
-            roformer_output_format = gr.Dropdown(
-                label = "Select the Output Format",
-                choices = output_format,
-                interactive = True
-            )
-            roformer_overlap = gr.Slider(
-                minimum = 2,
-                maximum = 4,
-                step = 1,
-                label = "Overlap",
-                info = "Amount of overlap between prediction windows.",
-                value = 4,
-                interactive = True
-            )
-            roformer_segment_size = gr.Slider(
-                minimum = 32,
-                maximum = 4000,
-                step = 32,
-                label = "Segment Size",
-                info = "Larger consumes more resources, but may give better results.",
-                value = 256,
-                interactive = True
-            )
-            mdx23c_denoise = gr.Checkbox(
-                label = "Denoise",
-                info = "Enable denoising during separation.",
-                value = False,
-                interactive = True
-            )
-        with gr.Tab(" Credits"):
-            gr.Markdown(
-                """
-                this project made by [Blane187](https://huggingface.co/Blane187) with Improvements by [John6666](https://huggingfce.co/John6666)
-                """)
-demo.launch(debug=True,show_api=False)

+import gradio as gr
+from rvc_infer import download_online_model, infer_audio
+def download_model(url, dir_name):
+    output_models = download_online_model(url, dir_name)
+    return output_models
+CSS = """
+"""
+with gr.Blocks(theme="Hev832/Applio", fill_width=True, css=CSS) as demo:
+    with gr.Tab("Inferenece"):
+        gr.Markdown("in progress")
+        model_name = gr.Textbox(label="Model Name #", lines=1, value="")
+        input_audio = gr.Audio(label="Input Audio #", type="filepath")
+        f0_change = gr.Slider(label="f0 change #", minimum=0, maximum=10, step=1, value=0)
+        f0_method = gr.Dropdown(label="f0 method #", choices=["rmvpe+"], value="rmvpe+")
+        min_pitch = gr.Textbox(label="min pitch #", lines=1, value="50")
+        max_pitch = gr.Textbox(label="max pitch #", lines=1, value="1100")
+        crepe_hop_length = gr.Slider(label="crepe_hop_length #", minimum=0, maximum=256, step=1, value=128)
+        index_rate = gr.Slider(label="index_rate #", minimum=0, maximum=1.0, step=0.01, value=0.75)
+        filter_radius = gr.Slider(label="filter_radius #", minimum=0, maximum=10.0, step=0.01, value=3)
+        rms_mix_rate = gr.Slider(label="rms_mix_rate #", minimum=0, maximum=1.0, step=0.01, value=0.25)
+        protect = gr.Slider(label="protect #", minimum=0, maximum=1.0, step=0.01, value=0.33)
+        split_infer = gr.Checkbox(label="split_infer #", value=False)
+        min_silence = gr.Slider(label="min_silence #", minimum=0, maximum=1000, step=1, value=500)
+        silence_threshold = gr.Slider(label="silence_threshold #", minimum=-1000, maximum=1000, step=1, value=-50)
+        seek_step = gr.Slider(label="seek_step #", minimum=0, maximum=100, step=1, value=0)
+        keep_silence = gr.Slider(label="keep_silence #", minimum=-1000, maximum=1000, step=1, value=100)
+        do_formant = gr.Checkbox(label="do_formant #", value=False)
+        quefrency = gr.Slider(label="quefrency #", minimum=0, maximum=100, step=1, value=0)
+        timbre = gr.Slider(label="timbre #", minimum=0, maximum=100, step=1, value=1)
+        f0_autotune = gr.Checkbox(label="f0_autotune #", value=False)
+        audio_format = gr.Dropdown(label="audio_format #", choices=["wav"], value="wav")
+        resample_sr = gr.Slider(label="resample_sr #", minimum=0, maximum=100, step=1, value=0)
+        hubert_model_path = gr.Textbox(label="hubert_model_pathe #", lines=1, value="hubert_base.pt")
+        rmvpe_model_path = gr.Textbox(label="rmvpe_model_path #", lines=1, value="rmvpe.pt")
+        fcpe_model_path = gr.Textbox(label="fcpe_model_path #", lines=1, value="fcpe.pt")
+        submit_inference = gr.Button('Inference #', variant='primary')
+        result_audio = gr.Audio("Output Audio #", type="filepath")
+    with gr.Tab("Download Model"):
+        gr.Markdown("## Download Model for infernece")
+        url_input = gr.Textbox(label="Model URL", placeholder="Enter the URL of the model")
+        dir_name_input = gr.Textbox(label="Directory Name", placeholder="Enter the directory name")
+        output = gr.Textbox(label="Output Models")
+        download_button = gr.Button("Download Model")
+        download_button.click(download_model, inputs=[url_input, dir_name_input], outputs=output)
+    gr.on(
+        triggers=[submit_inference.click],
+        fn=infer_audio,
+        inputs=[model_name, input_audio, f0_change, f0_method, min_pitch, max_pitch, crepe_hop_length, index_rate,
+                filter_radius, rms_mix_rate, protect, split_infer, min_silence, silence_threshold, seek_step,
+                keep_silence, do_formant, quefrency, timbre, f0_autotune, audio_format, resample_sr,
+                hubert_model_path, rmvpe_model_path, fcpe_model_path],
+        outputs=[result_audio],
+        queue=True,
+        show_api=True,
+        show_progress="full",
+    )
+demo.queue()
+demo.launch()