File size: 545 Bytes
7596274
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import torch
from resemblyzer import VoiceEncoder

class ResemblyzerVoiceEncoder:
    def __init__(self, device) -> None:
        self.model = VoiceEncoder(device)
        
    def __call__(self, audio: torch.Tensor):
        if audio.ndimension() == 1:
            return torch.tensor(self.model.embed_utterance(audio.numpy())).float().cpu()
        else:
            e = torch.stack([torch.tensor(self.model.embed_utterance(audio[i,:].numpy())).float().cpu() 
                             for i in range(audio.shape[0])])
            return e