whispervq / models /audio.py
jan-ai's picture
Upload folder using huggingface_hub
6debc39 verified
raw
history blame
760 Bytes
from pydantic import BaseModel
from enum import Enum
class AudioFormat(str, Enum):
WAV = "wav" # Supported by both backends
MP3 = "mp3" # Supported by ffmpeg
FLAC = "flac" # Supported by both
AAC = "aac" # Supported by ffmpeg
OGG = "ogg" # Supported by ffmpeg
OPUS = "opus" # Supported by ffmpeg
PCM = "pcm" # Raw PCM data
# Format to backend mapping
FORMAT_BACKENDS = {
AudioFormat.WAV: ["soundfile", "ffmpeg"],
AudioFormat.MP3: ["ffmpeg"],
AudioFormat.FLAC: ["soundfile", "ffmpeg"],
AudioFormat.AAC: ["ffmpeg"],
AudioFormat.OGG: ["ffmpeg"],
AudioFormat.OPUS: ["ffmpeg"],
AudioFormat.PCM: ["soundfile"]
}
class AudioRequest(BaseModel):
data: str
format: AudioFormat = "wav"