toolkit / audiogen_medium
k4d3's picture
typing on the keyboard still works
308c69e
raw
history blame
722 Bytes
#!/usr/bin/env python
import sys
import torchaudio
from audiocraft.models import AudioGen
from audiocraft.data.audio import audio_write
model = AudioGen.get_pretrained('facebook/audiogen-medium')
model.set_generation_params(duration=5) # generate 5 seconds.
descriptions = sys.argv[1:]
if not descriptions:
print('At least one prompt should be provided')
sys.exit(1)
wav = model.generate(descriptions) # generates 3 samples.
for one_wav, description in zip(wav, descriptions):
file_name = description.replace(' ', '_')
# Will save under {idx}.wav, with loudness normalization at -14 db LUFS.
audio_write(file_name, one_wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True)