TIMBOVILL commited on
Commit
9bedde5
·
verified ·
1 Parent(s): a5e574f

Update src/UltraSinger.py

Browse files
Files changed (1) hide show
  1. src/UltraSinger.py +984 -130
src/UltraSinger.py CHANGED
@@ -1,140 +1,994 @@
1
- import gradio as gr
2
- import subprocess
3
-
4
- def run_ultrasinger(opt_i, youtube_link, opt_o, mode, whisper_model, language, crepe_model, extra, device):
5
- # Construct the command based on inputs
6
- cmd = ["python", "UltraSinger.py"]
7
-
8
- # Add input option
9
- if opt_i:
10
- cmd.extend(["-i", f'"{opt_i.name}"'])
11
- elif youtube_link:
12
- cmd.extend(["-i", f'"{youtube_link}"'])
13
- else:
14
- return "Error: No input file or YouTube link provided", ""
15
-
16
- # Add output folder option
17
- if opt_o:
18
- cmd.extend(["-o", f'"{opt_o}"'])
19
-
20
- # Add mode
21
- if mode != "default":
22
- mode_flags = {
23
- "Create Ultrastar txt file": "-u",
24
- "Create MIDI file": "-m",
25
- "Create sheet file": "-s"
26
- }
27
- cmd.append(mode_flags[mode])
28
-
29
- # Add transcription options
30
- if whisper_model:
31
- cmd.extend(["--whisper", whisper_model])
32
- if language:
33
- language_codes = {
34
- "English": "en", "French": "fr", "German": "de", "Spanish": "es",
35
- "Italian": "it", "Japanese": "ja", "Chinese": "zh", "Dutch": "nl",
36
- "Ukrainian": "uk", "Portuguese": "pt"
37
- }
38
- cmd.extend(["--language", language_codes[language]])
39
-
40
- # Add pitcher options
41
- cmd.extend(["--crepe", crepe_model])
42
-
43
- # Add extra options
44
- if extra:
45
- cmd.extend(extra.split())
46
-
47
- # Add device options
48
- if device:
49
- cmd.extend(device.split())
50
-
51
- # Debug: Print the command to check if it's constructed correctly
52
- print("Running command:", ' '.join(cmd))
53
-
54
- # Execute the command
55
- try:
56
- result = subprocess.run(cmd, capture_output=True, text=True)
57
- return result.stdout, result.stderr
58
- except Exception as e:
59
- return str(e), "Error occurred during execution"
60
 
61
- def load_text_file(file_path):
62
- try:
63
- with open(file_path, 'r') as file:
64
- return file.read()
65
- except Exception as e:
66
- return str(e)
67
-
68
- # Define Gradio inputs and outputs for UltraSinger
69
- opt_i = gr.File(label="Ultrastar.txt or audio file (.mp3, .wav)")
70
- youtube_link = gr.Textbox(label="YouTube Link", placeholder="Enter YouTube URL here")
71
- opt_o = gr.Textbox(label="Output folder")
72
- mode = gr.Radio(
73
- label="Mode options",
74
- choices=[
75
- "default", "Create Ultrastar txt file", "Create MIDI file",
76
- "Create sheet file"
77
- ],
78
- value="default"
 
 
79
  )
80
- whisper_model = gr.Dropdown(
81
- label="Whisper Model",
82
- choices=[
83
- "tiny", "base", "small", "medium", "large-v1", "large-v2",
84
- "tiny.en", "base.en", "small.en", "medium.en"
85
- ],
86
- value="large-v2"
 
87
  )
88
- language = gr.Dropdown(
89
- label="Language",
90
- choices=[
91
- "English", "French", "German", "Spanish", "Italian",
92
- "Japanese", "Chinese", "Dutch", "Ukrainian", "Portuguese"
93
- ],
94
- value="English"
95
  )
96
- crepe_model = gr.Radio(
97
- label="Crepe Model",
98
- choices=["full", "tiny"],
99
- value="full"
 
100
  )
101
- extra = gr.Textbox(label="Extra options (e.g., --hyphenation True)")
102
- device = gr.Dropdown(
103
- label="Device options",
104
- choices=[
105
- "", "--force_cpu True", "--force_cpu False",
106
- "--force_whisper_cpu True", "--force_whisper_cpu False",
107
- "--force_crepe_cpu True", "--force_crepe_cpu False"
108
- ],
109
- value=""
110
  )
 
 
 
 
 
 
 
 
 
111
 
112
- output_text = gr.Textbox(label="Standard Output")
113
- error_text = gr.Textbox(label="Error Output")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- # Define Gradio interface for UltraSinger
116
- ultrasinger_tab = gr.Interface(
117
- fn=run_ultrasinger,
118
- inputs=[opt_i, youtube_link, opt_o, mode, whisper_model, language, crepe_model, extra, device],
119
- outputs=[output_text, error_text],
120
- title="UltraSinger UI",
121
- description="Upload an Ultrastar.txt or an audio file, set the options, and run UltraSinger."
122
- )
123
 
124
- # Load content for Tab 1 and Tab 2
125
- tab1_content = load_text_file("info.txt")
126
- tab2_content = load_text_file("usdb.txt")
127
-
128
- # Create Gradio tabs
129
- with gr.Blocks(theme="soft") as demo:
130
- with gr.Tabs():
131
- with gr.TabItem("UltraSinger"):
132
- ultrasinger_tab.render()
133
- with gr.TabItem("Info"):
134
- gr.Markdown(tab1_content)
135
- with gr.TabItem("FOR USDB USERS"):
136
- gr.Markdown(tab2_content)
137
-
138
- # Launch the app
139
  if __name__ == "__main__":
140
- demo.launch()
 
1
+ """UltraSinger uses AI to automatically create UltraStar song files"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ import copy
4
+ import getopt
5
+ import os
6
+ import sys
7
+ import re
8
+
9
+ import Levenshtein
10
+ import librosa
11
+
12
+ from tqdm import tqdm
13
+ from packaging import version
14
+
15
+ import soundfile as sf
16
+
17
+ from modules import os_helper
18
+ from modules.Audio.denoise import ffmpeg_reduce_noise
19
+ from modules.Audio.separation import separate_audio
20
+ from modules.Audio.vocal_chunks import (
21
+ export_chunks_from_transcribed_data,
22
+ export_chunks_from_ultrastar_data,
23
  )
24
+ from modules.Audio.silence_processing import remove_silence_from_transcription_data, get_silence_sections
25
+ from modules.csv_handler import export_transcribed_data_to_csv
26
+ from modules.Audio.convert_audio import convert_audio_to_mono_wav, convert_wav_to_mp3
27
+ from modules.Audio.youtube import (
28
+ download_youtube_audio,
29
+ download_youtube_thumbnail,
30
+ download_youtube_video,
31
+ get_youtube_title,
32
  )
33
+ from modules.DeviceDetection.device_detection import check_gpu_support
34
+ from modules.console_colors import (
35
+ ULTRASINGER_HEAD,
36
+ blue_highlighted,
37
+ gold_highlighted,
38
+ light_blue_highlighted,
39
+ red_highlighted,
40
  )
41
+ from modules.Midi import midi_creator
42
+ from modules.Midi.midi_creator import (
43
+ convert_frequencies_to_notes,
44
+ create_midi_notes_from_pitched_data,
45
+ most_frequent,
46
  )
47
+ from modules.Pitcher.pitcher import (
48
+ get_frequencies_with_high_confidence,
49
+ get_pitch_with_crepe_file,
 
 
 
 
 
 
50
  )
51
+ from modules.Pitcher.pitched_data import PitchedData
52
+ from modules.Speech_Recognition.hyphenation import hyphenation, language_check, create_hyphenator
53
+ from modules.Speech_Recognition.Whisper import transcribe_with_whisper
54
+ from modules.Ultrastar import ultrastar_score_calculator, ultrastar_writer, ultrastar_converter, ultrastar_parser
55
+ from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue
56
+ from Settings import Settings
57
+ from modules.Speech_Recognition.TranscribedData import TranscribedData
58
+ from modules.plot import plot, plot_spectrogram
59
+ from modules.musicbrainz_client import get_music_infos
60
 
61
+ settings = Settings()
62
+
63
+
64
+ def convert_midi_notes_to_ultrastar_notes(midi_notes: list[str]) -> list[int]:
65
+ """Convert midi notes to ultrastar notes"""
66
+ print(f"{ULTRASINGER_HEAD} Creating Ultrastar notes from midi data")
67
+
68
+ ultrastar_note_numbers = []
69
+ for i in enumerate(midi_notes):
70
+ pos = i[0]
71
+ note_number_librosa = librosa.note_to_midi(midi_notes[pos])
72
+ pitch = ultrastar_converter.midi_note_to_ultrastar_note(
73
+ note_number_librosa
74
+ )
75
+ ultrastar_note_numbers.append(pitch)
76
+ # todo: Progress?
77
+ # print(
78
+ # f"Note: {midi_notes[i]} midi_note: {str(note_number_librosa)} pitch: {str(pitch)}"
79
+ # )
80
+ return ultrastar_note_numbers
81
+
82
+
83
+ def pitch_each_chunk_with_crepe(directory: str) -> list[str]:
84
+ """Pitch each chunk with crepe and return midi notes"""
85
+ print(
86
+ f"{ULTRASINGER_HEAD} Pitching each chunk with {blue_highlighted('crepe')}"
87
+ )
88
+
89
+ midi_notes = []
90
+ for filename in sorted(
91
+ [f for f in os.listdir(directory) if f.endswith(".wav")],
92
+ key=lambda x: int(x.split("_")[1]),
93
+ ):
94
+ filepath = os.path.join(directory, filename)
95
+ # todo: stepsize = duration? then when shorter than "it" it should take the duration. Otherwise there a more notes
96
+ pitched_data = get_pitch_with_crepe_file(
97
+ filepath,
98
+ settings.crepe_model_capacity,
99
+ settings.crepe_step_size,
100
+ settings.tensorflow_device,
101
+ )
102
+ conf_f = get_frequencies_with_high_confidence(
103
+ pitched_data.frequencies, pitched_data.confidence
104
+ )
105
+
106
+ notes = convert_frequencies_to_notes(conf_f)
107
+ note = most_frequent(notes)[0][0]
108
+
109
+ midi_notes.append(note)
110
+ # todo: Progress?
111
+ # print(filename + " f: " + str(mean))
112
+
113
+ return midi_notes
114
+
115
+
116
+ def add_hyphen_to_data(transcribed_data: list[TranscribedData], hyphen_words: list[list[str]]):
117
+ """Add hyphen to transcribed data return new data list"""
118
+ new_data = []
119
+
120
+ for i, data in enumerate(transcribed_data):
121
+ if not hyphen_words[i]:
122
+ new_data.append(data)
123
+ else:
124
+ chunk_duration = data.end - data.start
125
+ chunk_duration = chunk_duration / (len(hyphen_words[i]))
126
+
127
+ next_start = data.start
128
+ for j in enumerate(hyphen_words[i]):
129
+ hyphenated_word_index = j[0]
130
+ dup = copy.copy(data)
131
+ dup.start = next_start
132
+ next_start = data.end - chunk_duration * (
133
+ len(hyphen_words[i]) - 1 - hyphenated_word_index
134
+ )
135
+ dup.end = next_start
136
+ dup.word = hyphen_words[i][hyphenated_word_index]
137
+ dup.is_hyphen = True
138
+ if hyphenated_word_index == len(hyphen_words[i]) - 1:
139
+ dup.is_word_end = True
140
+ else:
141
+ dup.is_word_end = False
142
+ new_data.append(dup)
143
+
144
+ return new_data
145
+
146
+
147
+ def get_bpm_from_data(data, sampling_rate):
148
+ """Get real bpm from audio data"""
149
+ onset_env = librosa.onset.onset_strength(y=data, sr=sampling_rate)
150
+ wav_tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sampling_rate)
151
+
152
+ print(
153
+ f"{ULTRASINGER_HEAD} BPM is {blue_highlighted(str(round(wav_tempo[0], 2)))}"
154
+ )
155
+ return wav_tempo[0]
156
+
157
+
158
+ def get_bpm_from_file(wav_file: str) -> float:
159
+ """Get real bpm from audio file"""
160
+ data, sampling_rate = librosa.load(wav_file, sr=None)
161
+ return get_bpm_from_data(data, sampling_rate)
162
+
163
+
164
+ def correct_words(recognized_words, word_list_file):
165
+ """Docstring"""
166
+ with open(word_list_file, "r", encoding="utf-8") as file:
167
+ text = file.read()
168
+ word_list = text.split()
169
+
170
+ for i, rec_word in enumerate(recognized_words):
171
+ if rec_word.word in word_list:
172
+ continue
173
+
174
+ closest_word = min(
175
+ word_list, key=lambda x: Levenshtein.distance(rec_word.word, x)
176
+ )
177
+ print(recognized_words[i].word + " - " + closest_word)
178
+ recognized_words[i].word = closest_word
179
+ return recognized_words
180
+
181
+
182
+ def print_help() -> None:
183
+ """Print help text"""
184
+ help_string = """
185
+ UltraSinger.py [opt] [mode] [transcription] [pitcher] [extra]
186
+
187
+ [opt]
188
+ -h This help text.
189
+ -i Ultrastar.txt
190
+ audio like .mp3, .wav, youtube link
191
+ -o Output folder
192
+
193
+ [mode]
194
+ ## INPUT is audio ##
195
+ default Creates all
196
+
197
+ # Single file creation selection is in progress, you currently getting all!
198
+ (-u Create ultrastar txt file) # In Progress
199
+ (-m Create midi file) # In Progress
200
+ (-s Create sheet file) # In Progress
201
+
202
+ ## INPUT is ultrastar.txt ##
203
+ default Creates all
204
+
205
+ # Single selection is in progress, you currently getting all!
206
+ (-r repitch Ultrastar.txt (input has to be audio)) # In Progress
207
+ (-p Check pitch of Ultrastar.txt input) # In Progress
208
+ (-m Create midi file) # In Progress
209
+
210
+ [transcription]
211
+ # Default is whisper
212
+ --whisper Multilingual model > tiny|base|small|medium|large-v1|large-v2 >> ((default) is large-v2
213
+ English-only model > tiny.en|base.en|small.en|medium.en
214
+ --whisper_align_model Use other languages model for Whisper provided from huggingface.co
215
+ --language Override the language detected by whisper, does not affect transcription but steps after transcription
216
+ --whisper_batch_size Reduce if low on GPU mem >> ((default) is 16)
217
+ --whisper_compute_type Change to "int8" if low on GPU mem (may reduce accuracy) >> ((default) is "float16" for cuda devices, "int8" for cpu)
218
+
219
+ [pitcher]
220
+ # Default is crepe
221
+ --crepe tiny|full >> ((default) is full)
222
+ --crepe_step_size unit is miliseconds >> ((default) is 10)
223
+
224
+ [extra]
225
+ --hyphenation True|False >> ((default) is True)
226
+ --disable_separation True|False >> ((default) is False)
227
+ --disable_karaoke True|False >> ((default) is False)
228
+ --create_audio_chunks True|False >> ((default) is False)
229
+ --keep_cache True|False >> ((default) is False)
230
+ --plot True|False >> ((default) is False)
231
+ --format_version 0.3.0|1.0.0|1.1.0 >> ((default) is 1.0.0)
232
+
233
+ [device]
234
+ --force_cpu True|False >> ((default) is False) All steps will be forced to cpu
235
+ --force_whisper_cpu True|False >> ((default) is False) Only whisper will be forced to cpu
236
+ --force_crepe_cpu True|False >> ((default) is False) Only crepe will be forced to cpu
237
+ """
238
+ print(help_string)
239
+
240
+
241
+ def remove_unecessary_punctuations(transcribed_data: list[TranscribedData]) -> None:
242
+ """Remove unecessary punctuations from transcribed data"""
243
+ punctuation = ".,"
244
+ for i, data in enumerate(transcribed_data):
245
+ data.word = data.word.translate(
246
+ {ord(i): None for i in punctuation}
247
+ )
248
+
249
+
250
+ def hyphenate_each_word(language: str, transcribed_data: list[TranscribedData]) -> list[list[str]] | None:
251
+ """Hyphenate each word in the transcribed data."""
252
+ lang_region = language_check(language)
253
+ if lang_region is None:
254
+ print(
255
+ f"{ULTRASINGER_HEAD} {red_highlighted('Error in hyphenation for language ')} {blue_highlighted(language)}{red_highlighted(', maybe you want to disable it?')}"
256
+ )
257
+ return None
258
+
259
+ hyphenated_word = []
260
+ try:
261
+ hyphenator = create_hyphenator(lang_region)
262
+ for i in tqdm(enumerate(transcribed_data)):
263
+ pos = i[0]
264
+ hyphenated_word.append(
265
+ hyphenation(transcribed_data[pos].word, hyphenator)
266
+ )
267
+ except:
268
+ print(f"{ULTRASINGER_HEAD} {red_highlighted('Error in hyphenation for language ')} {blue_highlighted(language)}{red_highlighted(', maybe you want to disable it?')}")
269
+ return None
270
+
271
+ return hyphenated_word
272
+
273
+
274
+ def print_support() -> None:
275
+ """Print support text"""
276
+ print()
277
+ print(
278
+ f"{ULTRASINGER_HEAD} {gold_highlighted('Do you like UltraSinger? Want it to be even better? Then help with your')} {light_blue_highlighted('support')}{gold_highlighted('!')}"
279
+ )
280
+ print(
281
+ f"{ULTRASINGER_HEAD} See project page -> https://github.com/rakuri255/UltraSinger"
282
+ )
283
+ print(
284
+ f"{ULTRASINGER_HEAD} {gold_highlighted('This will help a lot to keep this project alive and improved.')}"
285
+ )
286
+
287
+ def print_version() -> None:
288
+ """Print version text"""
289
+ print()
290
+ print(
291
+ f"{ULTRASINGER_HEAD} {gold_highlighted('*****************************')}"
292
+ )
293
+ print(
294
+ f"{ULTRASINGER_HEAD} {gold_highlighted('UltraSinger Version:')} {light_blue_highlighted(settings.APP_VERSION)}"
295
+ )
296
+ print(
297
+ f"{ULTRASINGER_HEAD} {gold_highlighted('*****************************')}"
298
+ )
299
+
300
+ def run() -> None:
301
+ """The processing function of this program"""
302
+ is_audio = ".txt" not in settings.input_file_path
303
+ ultrastar_class = None
304
+ real_bpm = None
305
+ (title, artist, year, genre) = (None, None, None, None)
306
+
307
+ if not is_audio: # Parse Ultrastar txt
308
+ print(
309
+ f"{ULTRASINGER_HEAD} {gold_highlighted('re-pitch mode')}"
310
+ )
311
+ (
312
+ basename_without_ext,
313
+ real_bpm,
314
+ song_output,
315
+ ultrastar_audio_input_path,
316
+ ultrastar_class,
317
+ ) = parse_ultrastar_txt()
318
+ elif settings.input_file_path.startswith("https:"): # Youtube
319
+ print(
320
+ f"{ULTRASINGER_HEAD} {gold_highlighted('full automatic mode')}"
321
+ )
322
+ (
323
+ basename_without_ext,
324
+ song_output,
325
+ ultrastar_audio_input_path,
326
+ (title, artist, year, genre)
327
+ ) = download_from_youtube()
328
+ else: # Audio File
329
+ print(
330
+ f"{ULTRASINGER_HEAD} {gold_highlighted('full automatic mode')}"
331
+ )
332
+ (
333
+ basename_without_ext,
334
+ song_output,
335
+ ultrastar_audio_input_path,
336
+ (title, artist, year, genre)
337
+ ) = infos_from_audio_input_file()
338
+
339
+ cache_path = os.path.join(song_output, "cache")
340
+ settings.processing_audio_path = os.path.join(
341
+ cache_path, basename_without_ext + ".wav"
342
+ )
343
+ os_helper.create_folder(cache_path)
344
+
345
+ # Separate vocal from audio
346
+ audio_separation_path = separate_vocal_from_audio(
347
+ basename_without_ext, cache_path, ultrastar_audio_input_path
348
+ )
349
+ vocals_path = os.path.join(audio_separation_path, "vocals.wav")
350
+ instrumental_path = os.path.join(audio_separation_path, "no_vocals.wav")
351
+
352
+ # Move instrumental and vocals
353
+ if settings.create_karaoke and version.parse(settings.format_version) < version.parse("1.1.0"):
354
+ karaoke_output_path = os.path.join(song_output, basename_without_ext + " [Karaoke].mp3")
355
+ convert_wav_to_mp3(instrumental_path, karaoke_output_path)
356
+
357
+ if version.parse(settings.format_version) >= version.parse("1.1.0"):
358
+ instrumental_output_path = os.path.join(song_output, basename_without_ext + " [Instrumental].mp3")
359
+ convert_wav_to_mp3(instrumental_path, instrumental_output_path)
360
+ vocals_output_path = os.path.join(song_output, basename_without_ext + " [Vocals].mp3")
361
+ convert_wav_to_mp3(vocals_path, vocals_output_path)
362
+
363
+ if settings.use_separated_vocal:
364
+ input_path = vocals_path
365
+ else:
366
+ input_path = ultrastar_audio_input_path
367
+
368
+ # Denoise vocal audio
369
+ denoised_output_path = os.path.join(
370
+ cache_path, basename_without_ext + "_denoised.wav"
371
+ )
372
+ denoise_vocal_audio(input_path, denoised_output_path)
373
+
374
+ # Convert to mono audio
375
+ mono_output_path = os.path.join(
376
+ cache_path, basename_without_ext + "_mono.wav"
377
+ )
378
+ convert_audio_to_mono_wav(denoised_output_path, mono_output_path)
379
+
380
+ # Mute silence sections
381
+ mute_output_path = os.path.join(
382
+ cache_path, basename_without_ext + "_mute.wav"
383
+ )
384
+ mute_no_singing_parts(mono_output_path, mute_output_path)
385
+
386
+ # Define the audio file to process
387
+ settings.processing_audio_path = mute_output_path
388
+
389
+ # Audio transcription
390
+ transcribed_data = None
391
+ language = settings.language
392
+ if is_audio:
393
+ detected_language, transcribed_data = transcribe_audio()
394
+ if language is None:
395
+ language = detected_language
396
+
397
+ remove_unecessary_punctuations(transcribed_data)
398
+
399
+ if settings.hyphenation:
400
+ hyphen_words = hyphenate_each_word(language, transcribed_data)
401
+ if hyphen_words is not None:
402
+ transcribed_data = add_hyphen_to_data(transcribed_data, hyphen_words)
403
+
404
+ transcribed_data = remove_silence_from_transcription_data(
405
+ settings.processing_audio_path, transcribed_data
406
+ )
407
+
408
+ # todo: do we need to correct words?
409
+ # lyric = 'input/faber_lyric.txt'
410
+ # --corrected_words = correct_words(vosk_speech, lyric)
411
+
412
+ # Create audio chunks
413
+ if settings.create_audio_chunks:
414
+ create_audio_chunks(
415
+ cache_path,
416
+ is_audio,
417
+ transcribed_data,
418
+ ultrastar_audio_input_path,
419
+ ultrastar_class,
420
+ )
421
+
422
+ # Pitch the audio
423
+ midi_notes, pitched_data, ultrastar_note_numbers = pitch_audio(
424
+ is_audio, transcribed_data, ultrastar_class
425
+ )
426
+
427
+ # Create plot
428
+ if settings.create_plot:
429
+ vocals_path = os.path.join(audio_separation_path, "vocals.wav")
430
+ plot_spectrogram(vocals_path, song_output, "vocals.wav")
431
+ plot_spectrogram(settings.processing_audio_path, song_output, "processing audio")
432
+ plot(pitched_data, song_output, transcribed_data, ultrastar_class, midi_notes)
433
+
434
+ # Write Ultrastar txt
435
+ if is_audio:
436
+ real_bpm, ultrastar_file_output = create_ultrastar_txt_from_automation(
437
+ basename_without_ext,
438
+ song_output,
439
+ transcribed_data,
440
+ ultrastar_audio_input_path,
441
+ ultrastar_note_numbers,
442
+ language,
443
+ title,
444
+ artist,
445
+ year,
446
+ genre
447
+ )
448
+ else:
449
+ ultrastar_file_output = create_ultrastar_txt_from_ultrastar_data(
450
+ song_output, ultrastar_class, ultrastar_note_numbers
451
+ )
452
+
453
+ # Calc Points
454
+ ultrastar_class, simple_score, accurate_score = calculate_score_points(
455
+ is_audio, pitched_data, ultrastar_class, ultrastar_file_output
456
+ )
457
+
458
+ # Add calculated score to Ultrastar txt #Todo: Missing Karaoke
459
+ ultrastar_writer.add_score_to_ultrastar_txt(
460
+ ultrastar_file_output, simple_score
461
+ )
462
+
463
+ # Midi
464
+ if settings.create_midi:
465
+ create_midi_file(real_bpm, song_output, ultrastar_class, basename_without_ext)
466
+
467
+ # Cleanup
468
+ if not settings.keep_cache:
469
+ remove_cache_folder(cache_path)
470
+
471
+ # Print Support
472
+ print_support()
473
+
474
+
475
+ def mute_no_singing_parts(mono_output_path, mute_output_path):
476
+ print(
477
+ f"{ULTRASINGER_HEAD} Mute audio parts with no singing"
478
+ )
479
+ silence_sections = get_silence_sections(mono_output_path)
480
+ y, sr = librosa.load(mono_output_path, sr=None)
481
+ # Mute the parts of the audio with no singing
482
+ for i in silence_sections:
483
+ # Define the time range to mute
484
+
485
+ start_time = i[0] # Start time in seconds
486
+ end_time = i[1] # End time in seconds
487
+
488
+ # Convert time to sample indices
489
+ start_sample = int(start_time * sr)
490
+ end_sample = int(end_time * sr)
491
+
492
+ y[start_sample:end_sample] = 0
493
+ sf.write(mute_output_path, y, sr)
494
+
495
+
496
+ def get_unused_song_output_dir(path: str) -> str:
497
+ """Get an unused song output dir"""
498
+ # check if dir exists and add (i) if it does
499
+ i = 1
500
+ if os_helper.check_if_folder_exists(path):
501
+ path = f"{path} ({i})"
502
+ else:
503
+ return path
504
+
505
+ while os_helper.check_if_folder_exists(path):
506
+ path = path.replace(f"({i - 1})", f"({i})")
507
+ i += 1
508
+ if i > 999:
509
+ print(
510
+ f"{ULTRASINGER_HEAD} {red_highlighted('Error: Could not create output folder! (999) is the maximum number of tries.')}"
511
+ )
512
+ sys.exit(1)
513
+ return path
514
+
515
+
516
+ def transcribe_audio() -> (str, list[TranscribedData]):
517
+ """Transcribe audio with AI"""
518
+ if settings.transcriber == "whisper":
519
+ device = "cpu" if settings.force_whisper_cpu else settings.pytorch_device
520
+ transcribed_data, detected_language = transcribe_with_whisper(
521
+ settings.processing_audio_path,
522
+ settings.whisper_model,
523
+ device,
524
+ settings.whisper_align_model,
525
+ settings.whisper_batch_size,
526
+ settings.whisper_compute_type,
527
+ settings.language,
528
+ )
529
+ else:
530
+ raise NotImplementedError
531
+ return detected_language, transcribed_data
532
+
533
+
534
+ def separate_vocal_from_audio(
535
+ basename_without_ext: str, cache_path: str, ultrastar_audio_input_path: str
536
+ ) -> str:
537
+ """Separate vocal from audio"""
538
+ audio_separation_path = os.path.join(
539
+ cache_path, "separated", "htdemucs", basename_without_ext
540
+ )
541
+
542
+ if settings.use_separated_vocal or settings.create_karaoke:
543
+ separate_audio(ultrastar_audio_input_path, cache_path, settings.pytorch_device)
544
+
545
+ return audio_separation_path
546
+
547
+ def calculate_score_points(
548
+ is_audio: bool, pitched_data: PitchedData, ultrastar_class: UltrastarTxtValue, ultrastar_file_output: str
549
+ ):
550
+ """Calculate score points"""
551
+ if is_audio:
552
+ ultrastar_class = ultrastar_parser.parse_ultrastar_txt(
553
+ ultrastar_file_output
554
+ )
555
+ (
556
+ simple_score,
557
+ accurate_score,
558
+ ) = ultrastar_score_calculator.calculate_score(
559
+ pitched_data, ultrastar_class
560
+ )
561
+ ultrastar_score_calculator.print_score_calculation(
562
+ simple_score, accurate_score
563
+ )
564
+ else:
565
+ print(
566
+ f"{ULTRASINGER_HEAD} {blue_highlighted('Score of original Ultrastar txt')}"
567
+ )
568
+ (
569
+ simple_score,
570
+ accurate_score,
571
+ ) = ultrastar_score_calculator.calculate_score(
572
+ pitched_data, ultrastar_class
573
+ )
574
+ ultrastar_score_calculator.print_score_calculation(
575
+ simple_score, accurate_score
576
+ )
577
+ print(
578
+ f"{ULTRASINGER_HEAD} {blue_highlighted('Score of re-pitched Ultrastar txt')}"
579
+ )
580
+ ultrastar_class = ultrastar_parser.parse_ultrastar_txt(
581
+ ultrastar_file_output
582
+ )
583
+ (
584
+ simple_score,
585
+ accurate_score,
586
+ ) = ultrastar_score_calculator.calculate_score(
587
+ pitched_data, ultrastar_class
588
+ )
589
+ ultrastar_score_calculator.print_score_calculation(
590
+ simple_score, accurate_score
591
+ )
592
+ return ultrastar_class, simple_score, accurate_score
593
+
594
+
595
+ def create_ultrastar_txt_from_ultrastar_data(
596
+ song_output: str, ultrastar_class: UltrastarTxtValue, ultrastar_note_numbers: list[int]
597
+ ) -> str:
598
+ """Create Ultrastar txt from Ultrastar data"""
599
+ output_repitched_ultrastar = os.path.join(
600
+ song_output, ultrastar_class.title + ".txt"
601
+ )
602
+ ultrastar_writer.create_repitched_txt_from_ultrastar_data(
603
+ settings.input_file_path,
604
+ ultrastar_note_numbers,
605
+ output_repitched_ultrastar,
606
+ )
607
+ return output_repitched_ultrastar
608
+
609
+
610
+ def create_ultrastar_txt_from_automation(
611
+ basename_without_ext: str,
612
+ song_output: str,
613
+ transcribed_data: list[TranscribedData],
614
+ ultrastar_audio_input_path: str,
615
+ ultrastar_note_numbers: list[int],
616
+ language: str,
617
+ title: str,
618
+ artist: str,
619
+ year: str,
620
+ genre: str
621
+ ):
622
+ """Create Ultrastar txt from automation"""
623
+ ultrastar_header = UltrastarTxtValue()
624
+ ultrastar_header.version = settings.format_version
625
+ ultrastar_header.title = basename_without_ext
626
+ ultrastar_header.artist = basename_without_ext
627
+ ultrastar_header.mp3 = basename_without_ext + ".mp3"
628
+ ultrastar_header.audio = basename_without_ext + ".mp3"
629
+ ultrastar_header.vocals = basename_without_ext + " [Vocals].mp3"
630
+ ultrastar_header.instrumental = basename_without_ext + " [Instrumental].mp3"
631
+ ultrastar_header.video = basename_without_ext + ".mp4"
632
+ ultrastar_header.language = language
633
+ cover = basename_without_ext + " [CO].jpg"
634
+ ultrastar_header.cover = (
635
+ cover
636
+ if os_helper.check_file_exists(os.path.join(song_output, cover))
637
+ else None
638
+ )
639
+ ultrastar_header.creator = f"{ultrastar_header.creator} {Settings.APP_VERSION}"
640
+ ultrastar_header.comment = f"{ultrastar_header.comment} {Settings.APP_VERSION}"
641
+
642
+ # Additional data
643
+ if title is not None:
644
+ ultrastar_header.title = title
645
+ if artist is not None:
646
+ ultrastar_header.artist = artist
647
+ if year is not None:
648
+ ultrastar_header.year = extract_year(year)
649
+ if genre is not None:
650
+ ultrastar_header.genre = format_separated_string(genre)
651
+
652
+ real_bpm = get_bpm_from_file(ultrastar_audio_input_path)
653
+ ultrastar_file_output = os.path.join(
654
+ song_output, basename_without_ext + ".txt"
655
+ )
656
+ ultrastar_writer.create_ultrastar_txt_from_automation(
657
+ transcribed_data,
658
+ ultrastar_note_numbers,
659
+ ultrastar_file_output,
660
+ ultrastar_header,
661
+ real_bpm,
662
+ )
663
+ if settings.create_karaoke and version.parse(settings.format_version) < version.parse("1.1.0"):
664
+ title = basename_without_ext + " [Karaoke]"
665
+ ultrastar_header.title = title
666
+ ultrastar_header.mp3 = title + ".mp3"
667
+ karaoke_output_path = os.path.join(song_output, title)
668
+ karaoke_txt_output_path = karaoke_output_path + ".txt"
669
+ ultrastar_writer.create_ultrastar_txt_from_automation(
670
+ transcribed_data,
671
+ ultrastar_note_numbers,
672
+ karaoke_txt_output_path,
673
+ ultrastar_header,
674
+ real_bpm,
675
+ )
676
+ return real_bpm, ultrastar_file_output
677
+
678
+ def extract_year(date: str) -> str:
679
+ match = re.search(r'\b\d{4}\b', date)
680
+ if match:
681
+ return match.group(0)
682
+ else:
683
+ return date
684
+
685
+ def format_separated_string(data: str) -> str:
686
+ temp = re.sub(r'[;/]', ',', data)
687
+ words = temp.split(',')
688
+ words = [s for s in words if s.strip()]
689
+
690
+ for i, word in enumerate(words):
691
+ if "-" not in word:
692
+ words[i] = word.strip().capitalize() + ', '
693
+ else:
694
+ dash_words = word.split('-')
695
+ capitalized_dash_words = [dash_word.strip().capitalize() for dash_word in dash_words]
696
+ formatted_dash_word = '-'.join(capitalized_dash_words) + ', '
697
+ words[i] = formatted_dash_word
698
+
699
+ formatted_string = ''.join(words)
700
+
701
+ if formatted_string.endswith(', '):
702
+ formatted_string = formatted_string[:-2]
703
+
704
+ return formatted_string
705
+
706
+ def infos_from_audio_input_file() -> tuple[str, str, str, tuple[str, str, str, str]]:
707
+ """Infos from audio input file"""
708
+ basename = os.path.basename(settings.input_file_path)
709
+ basename_without_ext = os.path.splitext(basename)[0]
710
+
711
+ artist, title = None, None
712
+ if " - " in basename_without_ext:
713
+ artist, title = basename_without_ext.split(" - ", 1)
714
+ search_string = f"{artist} - {title}"
715
+ else:
716
+ search_string = basename_without_ext
717
+
718
+ # Get additional data for song
719
+ (title_info, artist_info, year_info, genre_info) = get_music_infos(search_string)
720
+
721
+ if title_info is not None:
722
+ title = title_info
723
+ artist = artist_info
724
+
725
+ if artist is not None and title is not None:
726
+ basename_without_ext = f"{artist} - {title}"
727
+ extension = os.path.splitext(basename)[1]
728
+ basename = f"{basename_without_ext}{extension}"
729
+
730
+ song_output = os.path.join(settings.output_file_path, basename_without_ext)
731
+ song_output = get_unused_song_output_dir(song_output)
732
+ os_helper.create_folder(song_output)
733
+ os_helper.copy(settings.input_file_path, song_output)
734
+ os_helper.rename(os.path.join(song_output, os.path.basename(settings.input_file_path)), os.path.join(song_output, basename))
735
+ ultrastar_audio_input_path = os.path.join(song_output, basename)
736
+ return basename_without_ext, song_output, ultrastar_audio_input_path, (title, artist, year_info, genre_info)
737
+
738
+
739
+ FILENAME_REPLACEMENTS = (('?:"', ""), ("<", "("), (">", ")"), ("/\\|*", "-"))
740
+
741
+
742
+ def sanitize_filename(fname: str) -> str:
743
+ """Sanitize filename"""
744
+ for old, new in FILENAME_REPLACEMENTS:
745
+ for char in old:
746
+ fname = fname.replace(char, new)
747
+ if fname.endswith("."):
748
+ fname = fname.rstrip(" .") # Windows does not like trailing periods
749
+ return fname
750
+
751
+
752
+ def download_from_youtube() -> tuple[str, str, str, tuple[str, str, str, str]]:
753
+ """Download from YouTube"""
754
+ (artist, title) = get_youtube_title(settings.input_file_path)
755
+
756
+ # Get additional data for song
757
+ (title_info, artist_info, year_info, genre_info) = get_music_infos(f"{artist} - {title}")
758
+
759
+ if title_info is not None:
760
+ title = title_info
761
+ artist = artist_info
762
+
763
+ basename_without_ext = sanitize_filename(f"{artist} - {title}")
764
+ basename = basename_without_ext + ".mp3"
765
+ song_output = os.path.join(settings.output_file_path, basename_without_ext)
766
+ song_output = get_unused_song_output_dir(song_output)
767
+ os_helper.create_folder(song_output)
768
+ download_youtube_audio(
769
+ settings.input_file_path, basename_without_ext, song_output
770
+ )
771
+ download_youtube_video(
772
+ settings.input_file_path, basename_without_ext, song_output
773
+ )
774
+ download_youtube_thumbnail(
775
+ settings.input_file_path, basename_without_ext, song_output
776
+ )
777
+ ultrastar_audio_input_path = os.path.join(song_output, basename)
778
+ return basename_without_ext, song_output, ultrastar_audio_input_path, (title, artist, year_info, genre_info)
779
+
780
+
781
+ def parse_ultrastar_txt() -> tuple[str, float, str, str, UltrastarTxtValue]:
782
+ """Parse Ultrastar txt"""
783
+ ultrastar_class = ultrastar_parser.parse_ultrastar_txt(
784
+ settings.input_file_path
785
+ )
786
+ real_bpm = ultrastar_converter.ultrastar_bpm_to_real_bpm(
787
+ float(ultrastar_class.bpm.replace(",", "."))
788
+ )
789
+ ultrastar_mp3_name = ultrastar_class.mp3
790
+ basename_without_ext = os.path.splitext(ultrastar_mp3_name)[0]
791
+ dirname = os.path.dirname(settings.input_file_path)
792
+ ultrastar_audio_input_path = os.path.join(dirname, ultrastar_mp3_name)
793
+ song_output = os.path.join(
794
+ settings.output_file_path,
795
+ ultrastar_class.artist.strip() + " - " + ultrastar_class.title.strip(),
796
+ )
797
+ song_output = get_unused_song_output_dir(str(song_output))
798
+ os_helper.create_folder(song_output)
799
+
800
+ return (
801
+ str(basename_without_ext),
802
+ real_bpm,
803
+ song_output,
804
+ str(ultrastar_audio_input_path),
805
+ ultrastar_class,
806
+ )
807
+
808
+
809
+ def create_midi_file(real_bpm: float,
810
+ song_output: str,
811
+ ultrastar_class: UltrastarTxtValue,
812
+ basename_without_ext: str) -> None:
813
+ """Create midi file"""
814
+ print(
815
+ f"{ULTRASINGER_HEAD} Creating Midi with {blue_highlighted('pretty_midi')}"
816
+ )
817
+
818
+ voice_instrument = [
819
+ midi_creator.convert_ultrastar_to_midi_instrument(ultrastar_class)
820
+ ]
821
+ midi_output = os.path.join(song_output, f"{basename_without_ext}.mid")
822
+ midi_creator.instruments_to_midi(
823
+ voice_instrument, real_bpm, midi_output
824
+ )
825
+
826
+
827
+ def pitch_audio(is_audio: bool, transcribed_data: list[TranscribedData], ultrastar_class: UltrastarTxtValue) -> tuple[
828
+ list[str], PitchedData, list[int]]:
829
+ """Pitch audio"""
830
+ # todo: chunk pitching as option?
831
+ # midi_notes = pitch_each_chunk_with_crepe(chunk_folder_name)
832
+ device = "cpu" if settings.force_crepe_cpu else settings.tensorflow_device
833
+ pitched_data = get_pitch_with_crepe_file(
834
+ settings.processing_audio_path,
835
+ settings.crepe_model_capacity,
836
+ settings.crepe_step_size,
837
+ device,
838
+ )
839
+ if is_audio:
840
+ start_times = []
841
+ end_times = []
842
+ for i, data in enumerate(transcribed_data):
843
+ start_times.append(data.start)
844
+ end_times.append(data.end)
845
+ midi_notes = create_midi_notes_from_pitched_data(
846
+ start_times, end_times, pitched_data
847
+ )
848
+
849
+ else:
850
+ midi_notes = create_midi_notes_from_pitched_data(
851
+ ultrastar_class.startTimes, ultrastar_class.endTimes, pitched_data
852
+ )
853
+ ultrastar_note_numbers = convert_midi_notes_to_ultrastar_notes(midi_notes)
854
+ return midi_notes, pitched_data, ultrastar_note_numbers
855
+
856
+
857
+ def create_audio_chunks(
858
+ cache_path: str,
859
+ is_audio: bool,
860
+ transcribed_data: list[TranscribedData],
861
+ ultrastar_audio_input_path: str,
862
+ ultrastar_class: UltrastarTxtValue
863
+ ) -> None:
864
+ """Create audio chunks"""
865
+ audio_chunks_path = os.path.join(
866
+ cache_path, settings.audio_chunk_folder_name
867
+ )
868
+ os_helper.create_folder(audio_chunks_path)
869
+ if is_audio: # and csv
870
+ csv_filename = os.path.join(audio_chunks_path, "_chunks.csv")
871
+ export_chunks_from_transcribed_data(
872
+ settings.processing_audio_path, transcribed_data, audio_chunks_path
873
+ )
874
+ export_transcribed_data_to_csv(transcribed_data, csv_filename)
875
+ else:
876
+ export_chunks_from_ultrastar_data(
877
+ ultrastar_audio_input_path, ultrastar_class, audio_chunks_path
878
+ )
879
+
880
+ def denoise_vocal_audio(input_path: str, output_path: str) -> None:
881
+ """Denoise vocal audio"""
882
+ ffmpeg_reduce_noise(input_path, output_path)
883
+
884
+
885
+ def main(argv: list[str]) -> None:
886
+ """Main function"""
887
+ print_version()
888
+ init_settings(argv)
889
+ run()
890
+ sys.exit()
891
+
892
+ def remove_cache_folder(cache_path: str) -> None:
893
+ """Remove cache folder"""
894
+ os_helper.remove_folder(cache_path)
895
+
896
+ def init_settings(argv: list[str]) -> None:
897
+ """Init settings"""
898
+ long, short = arg_options()
899
+ opts, args = getopt.getopt(argv, short, long)
900
+ if len(opts) == 0:
901
+ print_help()
902
+ sys.exit()
903
+ for opt, arg in opts:
904
+ if opt == "-h":
905
+ print_help()
906
+ sys.exit()
907
+ elif opt in ("-i", "--ifile"):
908
+ settings.input_file_path = arg
909
+ elif opt in ("-o", "--ofile"):
910
+ settings.output_file_path = arg
911
+ elif opt in ("--whisper"):
912
+ settings.transcriber = "whisper"
913
+ settings.whisper_model = arg
914
+ elif opt in ("--whisper_align_model"):
915
+ settings.whisper_align_model = arg
916
+ elif opt in ("--whisper_batch_size"):
917
+ settings.whisper_batch_size = int(arg)
918
+ elif opt in ("--whisper_compute_type"):
919
+ settings.whisper_compute_type = arg
920
+ elif opt in ("--language"):
921
+ settings.language = arg
922
+ elif opt in ("--crepe"):
923
+ settings.crepe_model_capacity = arg
924
+ elif opt in ("--crepe_step_size"):
925
+ settings.crepe_step_size = int(arg)
926
+ elif opt in ("--plot"):
927
+ settings.create_plot = arg in ["True", "true"]
928
+ elif opt in ("--midi"):
929
+ settings.create_midi = arg in ["True", "true"]
930
+ elif opt in ("--hyphenation"):
931
+ settings.hyphenation = eval(arg.title())
932
+ elif opt in ("--disable_separation"):
933
+ settings.use_separated_vocal = not arg
934
+ elif opt in ("--disable_karaoke"):
935
+ settings.create_karaoke = not arg
936
+ elif opt in ("--create_audio_chunks"):
937
+ settings.create_audio_chunks = arg
938
+ elif opt in ("--force_cpu"):
939
+ settings.force_cpu = arg
940
+ if settings.force_cpu:
941
+ os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
942
+ elif opt in ("--force_whisper_cpu"):
943
+ settings.force_whisper_cpu = eval(arg.title())
944
+ elif opt in ("--force_crepe_cpu"):
945
+ settings.force_crepe_cpu = eval(arg.title())
946
+ elif opt in ("--format_version"):
947
+ if arg != '0.3.0' and arg != '1.0.0' and arg != '1.1.0':
948
+ print(
949
+ f"{ULTRASINGER_HEAD} {red_highlighted('Error: Format version')} {blue_highlighted(arg)} {red_highlighted('is not supported.')}"
950
+ )
951
+ sys.exit(1)
952
+ settings.format_version = arg
953
+ elif opt in ("--keep_cache"):
954
+ settings.keep_cache = arg
955
+ if settings.output_file_path == "":
956
+ if settings.input_file_path.startswith("https:"):
957
+ dirname = os.getcwd()
958
+ else:
959
+ dirname = os.path.dirname(settings.input_file_path)
960
+ settings.output_file_path = os.path.join(dirname, "output")
961
+
962
+ if not settings.force_cpu:
963
+ settings.tensorflow_device, settings.pytorch_device = check_gpu_support()
964
+
965
+
966
+ def arg_options():
967
+ short = "hi:o:amv:"
968
+ long = [
969
+ "ifile=",
970
+ "ofile=",
971
+ "crepe=",
972
+ "crepe_step_size=",
973
+ "whisper=",
974
+ "whisper_align_model=",
975
+ "whisper_batch_size=",
976
+ "whisper_compute_type=",
977
+ "language=",
978
+ "plot=",
979
+ "midi=",
980
+ "hyphenation=",
981
+ "disable_separation=",
982
+ "disable_karaoke=",
983
+ "create_audio_chunks=",
984
+ "force_cpu=",
985
+ "force_whisper_cpu=",
986
+ "force_crepe_cpu=",
987
+ "format_version=",
988
+ "keep_cache"
989
+ ]
990
+ return long, short
991
 
 
 
 
 
 
 
 
 
992
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
993
  if __name__ == "__main__":
994
+ main(sys.argv[1:])