torch==2.3.0 torchaudio transformers>=4.35.2 datasets>=2.14.5 lightning>=2.1.0 hydra-core>=1.3.2 tensorboard>=2.14.1 natsort>=8.4.0 einops>=0.7.0 librosa>=0.10.1 rich>=13.5.3 gradio>=4.0.0 wandb>=0.15.11 grpcio>=1.58.0 kui>=1.6.0 zibai-server>=0.9.0 loguru>=0.6.0 loralib>=0.1.2 natsort>=8.4.0 pyrootutils>=1.0.4 vector_quantize_pytorch==1.14.24 samplerate>=0.2.1 resampy>=0.4.3 spaces>=0.26.1 einx[torch]==0.2.2 opencc faster-whisper ormsgpack ffmpeg soundfile