Spaces:
Sleeping
Sleeping
File size: 5,493 Bytes
db2039e ec231a0 cdb03d3 db2039e bbb23df 9679f1e db2039e 48dc726 db60455 db2039e 25f2bab db2039e dcca39f c89be6a 1c725ac 8a27aeb 48dc726 1c725ac c89be6a 8a27aeb 48dc726 c89be6a c8ea3a4 ec231a0 c8ea3a4 9212c90 db2039e 2abc8d7 db2039e b065a65 cdb03d3 db2039e b065a65 6fb5189 db2039e 13698da db2039e 61ac4a7 db2039e 83f9929 227c267 b065a65 db2039e 752e22b b065a65 e3a6426 3a1a0a3 3aeef88 db2039e a5dbf21 db2039e 2abc8d7 b065a65 db2039e 22a07bc db2039e b065a65 c8ea3a4 b065a65 c8ea3a4 25c9e51 b065a65 db2039e c8ea3a4 ec231a0 56b39a5 342afe7 56b39a5 ec231a0 2abc8d7 ec231a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import os
import time
import numpy as np
import torch
from typing import BinaryIO, Union, Tuple, List
import faster_whisper
from faster_whisper.vad import VadOptions
import ctranslate2
import whisper
import gradio as gr
from argparse import Namespace
import spaces
from modules.whisper.whisper_parameter import *
from modules.whisper.whisper_base import WhisperBase
class FasterWhisperInference(WhisperBase):
def __init__(self,
model_dir: str,
output_dir: str,
args: Namespace
):
super().__init__(
model_dir=model_dir,
output_dir=output_dir,
args=args
)
self.model_paths = self.get_model_paths()
self.device = self.get_device()
self.available_models = self.model_paths.keys()
self.available_compute_types = self.get_available_compute_type()
@spaces.GPU(duration=120)
def transcribe(self,
audio: Union[str, BinaryIO, np.ndarray],
progress: gr.Progress,
*whisper_params,
) -> Tuple[List[dict], float]:
"""
transcribe method for faster-whisper.
Parameters
----------
audio: Union[str, BinaryIO, np.ndarray]
Audio path or file binary or Audio numpy array
progress: gr.Progress
Indicator to show progress directly in gradio.
*whisper_params: tuple
Parameters related with whisper. This will be dealt with "WhisperParameters" data class
Returns
----------
segments_result: List[dict]
list of dicts that includes start, end timestamps and transcribed text
elapsed_time: float
elapsed time for transcription
"""
start_time = time.time()
params = WhisperParameters.as_value(*whisper_params)
if params.model_size != self.current_model_size or self.model is None or self.current_compute_type != params.compute_type:
self.update_model(params.model_size, params.compute_type, progress)
segments, info = self.model.transcribe(
audio=audio,
language=params.lang,
task="translate" if params.is_translate and self.current_model_size in self.translatable_models else "transcribe",
beam_size=params.beam_size,
log_prob_threshold=params.log_prob_threshold,
no_speech_threshold=params.no_speech_threshold,
best_of=params.best_of,
patience=params.patience,
temperature=params.temperature,
compression_ratio_threshold=params.compression_ratio_threshold,
)
progress(0, desc="Loading audio..")
segments_result = []
for segment in segments:
progress(segment.start / info.duration, desc="Transcribing..")
segments_result.append({
"start": segment.start,
"end": segment.end,
"text": segment.text
})
elapsed_time = time.time() - start_time
return segments_result, elapsed_time
@spaces.GPU(duration=120)
def update_model(self,
model_size: str,
compute_type: str,
progress: gr.Progress
):
"""
Update current model setting
Parameters
----------
model_size: str
Size of whisper model
compute_type: str
Compute type for transcription.
see more info : https://opennmt.net/CTranslate2/quantization.html
progress: gr.Progress
Indicator to show progress directly in gradio.
"""
progress(0, desc="Initializing Model..")
self.current_model_size = self.model_paths[model_size]
self.current_compute_type = compute_type
self.model = faster_whisper.WhisperModel(
device=self.device,
model_size_or_path=self.current_model_size,
download_root=self.model_dir,
compute_type=self.current_compute_type
)
def get_model_paths(self):
"""
Get available models from models path including fine-tuned model.
Returns
----------
Name list of models
"""
model_paths = {model:model for model in whisper.available_models()}
faster_whisper_prefix = "models--Systran--faster-whisper-"
existing_models = os.listdir(self.model_dir)
wrong_dirs = [".locks"]
existing_models = list(set(existing_models) - set(wrong_dirs))
webui_dir = os.getcwd()
for model_name in existing_models:
if faster_whisper_prefix in model_name:
model_name = model_name[len(faster_whisper_prefix):]
if model_name not in whisper.available_models():
model_paths[model_name] = os.path.join(webui_dir, self.model_dir, model_name)
return model_paths
def get_available_compute_type(self):
if self.device == "cuda":
return ['float32', 'int8_float16', 'float16', 'int8', 'int8_float32']
return ['int16', 'float32', 'int8', 'int8_float32']
@staticmethod
@spaces.GPU(duration=120)
def get_device():
if torch.cuda.is_available():
return "cuda"
elif torch.backends.mps.is_available():
return "auto"
else:
return "cpu"
|