File size: 545 Bytes
7596274 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
import torch
from resemblyzer import VoiceEncoder
class ResemblyzerVoiceEncoder:
def __init__(self, device) -> None:
self.model = VoiceEncoder(device)
def __call__(self, audio: torch.Tensor):
if audio.ndimension() == 1:
return torch.tensor(self.model.embed_utterance(audio.numpy())).float().cpu()
else:
e = torch.stack([torch.tensor(self.model.embed_utterance(audio[i,:].numpy())).float().cpu()
for i in range(audio.shape[0])])
return e
|