""" A script to download the InfoRE dataset and textgrid files. """ import shutil from pathlib import Path import pooch from pooch import Unzip from tqdm.cli import tqdm def download_infore_data(): """download infore wav files""" files = pooch.retrieve( url="https://huggingface.co/datasets/ntt123/infore/resolve/main/infore_16k_denoised.zip", known_hash="2445527b345fb0b1816ce3c8f09bae419d6bbe251f16d6c74d8dd95ef9fb0737", processor=Unzip(), progressbar=True, ) data_dir = Path(sorted(files)[0]).parent return data_dir def download_textgrid(): """download textgrid files""" files = pooch.retrieve( url="https://huggingface.co/datasets/ntt123/infore/resolve/main/infore_tg.zip", known_hash="26e4f53025220097ea95dc266657de8d65104b0a17a6ffba778fc016c8dd36d7", processor=Unzip(), progressbar=True, ) data_dir = Path(sorted(files)[0]).parent return data_dir DATA_ROOT = Path("./train_data") DATA_ROOT.mkdir(parents=True, exist_ok=True) wav_dir = download_infore_data() tg_dir = download_textgrid() for path in tqdm(tg_dir.glob("*.TextGrid")): wav_name = path.with_suffix(".wav").name wav_src = wav_dir / wav_name shutil.copy(path, DATA_ROOT) shutil.copy(wav_src, DATA_ROOT)