diffsingerkr / Pattern_Generator.py
codejin's picture
initial commit
67d041f
import numpy as np
import mido, os, pickle, yaml, argparse, math, librosa, hgtk, logging
from tqdm import tqdm
from pysptk.sptk import rapt
from typing import List, Tuple
from argparse import Namespace # for type
import torch
from typing import Dict
from meldataset import mel_spectrogram, spectrogram, spec_energy
from Arg_Parser import Recursive_Parse
def Convert_Feature_Based_Music(
music: List[Tuple[float, str, int]],
sample_rate: int,
frame_shift: int,
consonant_duration: int= 3,
equality_duration: bool= False
):
previous_used = 0
lyrics = []
notes = []
durations = []
for message_time, lyric, note in music:
duration = round(message_time * sample_rate) + previous_used
previous_used = duration % frame_shift
duration = duration // frame_shift
if lyric == '<X>':
lyrics.append(lyric)
notes.append(note)
durations.append(duration)
else:
lyrics.extend(Decompose(lyric))
notes.extend([note] * 3)
if equality_duration or duration < consonant_duration * 3:
split_duration = [duration // 3] * 3
split_duration[1] += duration % 3
durations.extend(split_duration)
else:
durations.extend([
consonant_duration, # onset
duration - consonant_duration * 2, # nucleus
consonant_duration # coda
])
return lyrics, notes, durations
def Expand_by_Duration(
lyrics: List[str],
notes: List[int],
durations: List[int],
):
lyrics = sum([[lyric] * duration for lyric, duration in zip(lyrics, durations)], [])
notes = sum([*[[note] * duration for note, duration in zip(notes, durations)]], [])
durations = [index for duration in durations for index in range(duration)]
return lyrics, notes, durations
def Decompose(syllable: str):
onset, nucleus, coda = hgtk.letter.decompose(syllable)
coda += '_'
return onset, nucleus, coda