File size: 1,297 Bytes
12da6cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
"""
A script to download the InfoRE dataset and textgrid files.
"""
import shutil
from pathlib import Path

import pooch
from pooch import Unzip
from tqdm.cli import tqdm


def download_infore_data():
    """download infore wav files"""
    files = pooch.retrieve(
        url="https://huggingface.co/datasets/ntt123/infore/resolve/main/infore_16k_denoised.zip",
        known_hash="2445527b345fb0b1816ce3c8f09bae419d6bbe251f16d6c74d8dd95ef9fb0737",
        processor=Unzip(),
        progressbar=True,
    )
    data_dir = Path(sorted(files)[0]).parent
    return data_dir


def download_textgrid():
    """download textgrid files"""
    files = pooch.retrieve(
        url="https://huggingface.co/datasets/ntt123/infore/resolve/main/infore_tg.zip",
        known_hash="26e4f53025220097ea95dc266657de8d65104b0a17a6ffba778fc016c8dd36d7",
        processor=Unzip(),
        progressbar=True,
    )
    data_dir = Path(sorted(files)[0]).parent
    return data_dir


DATA_ROOT = Path("./train_data")
DATA_ROOT.mkdir(parents=True, exist_ok=True)
wav_dir = download_infore_data()
tg_dir = download_textgrid()

for path in tqdm(tg_dir.glob("*.TextGrid")):
    wav_name = path.with_suffix(".wav").name
    wav_src = wav_dir / wav_name
    shutil.copy(path, DATA_ROOT)
    shutil.copy(wav_src, DATA_ROOT)