wanna_hear_your_voice / network /models /embedding_model.py
hieugiaosu
Add application file
7596274
raw
history blame contribute delete
545 Bytes
import torch
from resemblyzer import VoiceEncoder
class ResemblyzerVoiceEncoder:
def __init__(self, device) -> None:
self.model = VoiceEncoder(device)
def __call__(self, audio: torch.Tensor):
if audio.ndimension() == 1:
return torch.tensor(self.model.embed_utterance(audio.numpy())).float().cpu()
else:
e = torch.stack([torch.tensor(self.model.embed_utterance(audio[i,:].numpy())).float().cpu()
for i in range(audio.shape[0])])
return e