Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,975 Bytes
a84a65c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import glob
import numpy as np
from tqdm import tqdm
import torchaudio
from typing import Any, Dict, List, Optional, Union
from pathlib import Path
import pandas as pd
import random
import os
import csv
def save_df_to_tsv(dataframe, path: Union[str, Path]):
_path = path if isinstance(path, str) else path.as_posix()
dataframe.to_csv(
_path,
sep="\t",
header=True,
index=False,
encoding="utf-8",
escapechar="\\",
quoting=csv.QUOTE_NONE,
)
def generate():
root = '/apdcephfs/share_1316500/nlphuang/data/text_to_audio/text_to_audio2/manifest/audioset-music/'
MANIFEST_COLUMNS = ["name", "dataset", "ori_cap", "audio_path", "mel_path", "duration"]
items = []
with open(os.path.join(f'{root}/audioset_new.tsv'), encoding='utf-8') as f:
reader = csv.DictReader(
f,
delimiter="\t",
quotechar=None,
doublequote=False,
lineterminator="\n",
quoting=csv.QUOTE_NONE,
)
items += [dict(e) for e in tqdm(reader)]
assert len(items) > 0
skip = 0
manifest = {c: [] for c in MANIFEST_COLUMNS}
for i, item in tqdm(enumerate(items)):
mel_path = f'/apdcephfs//share_1316500/nlphuang/data/text_to_audio/text_to_audio2/music/mels/audioset/{Path(item["name"]).stem}_mel.npy'
if not os.path.exists(mel_path):
skip += 1
continue
manifest["name"].append(item['name'])
manifest["dataset"].append(item['dataset'])
manifest["ori_cap"].append(item['ori_cap'])
manifest["duration"].append(item['duration'])
manifest["audio_path"].append(item['audio_path'])
manifest["mel_path"].append(mel_path)
print(f"Writing manifest to {root}/audioset_new_intern.tsv..., skip: {skip}")
save_df_to_tsv(pd.DataFrame.from_dict(manifest), f'{root}/audioset_new_intern.tsv')
if __name__ == '__main__':
generate() |