Spaces:
Sleeping
Sleeping
File size: 4,118 Bytes
f717773 1494da1 f717773 e3a6426 4ae4c8e 78c9bd5 3a1a0a3 3aeef88 f5deb64 03ff6dd 3a1a0a3 f717773 1f8abba 03ff6dd 1494da1 03ff6dd 1494da1 03ff6dd 1494da1 03ff6dd 1494da1 03ff6dd 1494da1 03ff6dd 1494da1 03ff6dd 1494da1 e3a6426 1494da1 e3a6426 1494da1 78c9bd5 4ae4c8e 78c9bd5 3a1a0a3 3aeef88 f5deb64 3aeef88 0675963 3aeef88 f5deb64 03ff6dd 3a1a0a3 03ff6dd f717773 e3a6426 4ae4c8e 78c9bd5 3a1a0a3 3aeef88 f5deb64 03ff6dd 3a1a0a3 03ff6dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
from dataclasses import dataclass, fields
import gradio as gr
from typing import Optional
@dataclass
class WhisperGradioComponents:
model_size: gr.Dropdown
lang: gr.Dropdown
is_translate: gr.Checkbox
beam_size: gr.Number
log_prob_threshold: gr.Number
no_speech_threshold: gr.Number
compute_type: gr.Dropdown
best_of: gr.Number
patience: gr.Number
condition_on_previous_text: gr.Checkbox
initial_prompt: gr.Textbox
temperature: gr.Slider
compression_ratio_threshold: gr.Number
vad_filter: gr.Checkbox
"""
A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
See more about Gradio pre-processing: https://www.gradio.app/docs/components
Attributes
----------
model_size: gr.Dropdown
Whisper model size.
lang: gr.Dropdown
Source language of the file to transcribe.
is_translate: gr.Checkbox
Boolean value that determines whether to translate to English.
It's Whisper's feature to translate speech from another language directly into English end-to-end.
beam_size: gr.Number
Int value that is used for decoding option.
log_prob_threshold: gr.Number
If the average log probability over sampled tokens is below this value, treat as failed.
no_speech_threshold: gr.Number
If the no_speech probability is higher than this value AND
the average log probability over sampled tokens is below `log_prob_threshold`,
consider the segment as silent.
compute_type: gr.Dropdown
compute type for transcription.
see more info : https://opennmt.net/CTranslate2/quantization.html
best_of: gr.Number
Number of candidates when sampling with non-zero temperature.
patience: gr.Number
Beam search patience factor.
condition_on_previous_text: gr.Checkbox
if True, the previous output of the model is provided as a prompt for the next window;
disabling may make the text inconsistent across windows, but the model becomes less prone to
getting stuck in a failure loop, such as repetition looping or timestamps going out of sync.
initial_prompt: gr.Textbox
Optional text to provide as a prompt for the first window. This can be used to provide, or
"prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
to make it more likely to predict those word correctly.
temperature: gr.Slider
Temperature for sampling. It can be a tuple of temperatures,
which will be successively used upon failures according to either
`compression_ratio_threshold` or `log_prob_threshold`.
compression_ratio_threshold: gr.Number
If the gzip compression ratio is above this value, treat as failed
vad_filter: gr.Checkbox
Enable the voice activity detection (VAD) to filter out parts of the audio
without speech. This step is using the Silero VAD model
https://github.com/snakers4/silero-vad.
"""
def to_list(self) -> list:
"""
Converts the data class attributes into a list. Use "before" Gradio pre-processing.
See more about Gradio pre-processing: : https://www.gradio.app/docs/components
Returns
----------
A list of Gradio components
"""
return [getattr(self, f.name) for f in fields(self)]
@dataclass
class WhisperValues:
model_size: str
lang: str
is_translate: bool
beam_size: int
log_prob_threshold: float
no_speech_threshold: float
compute_type: str
best_of: int
patience: float
condition_on_previous_text: bool
initial_prompt: Optional[str]
temperature: float
compression_ratio_threshold: float
vad_filter: bool
"""
A data class to use Whisper parameters. Use "after" Gradio pre-processing.
See more about Gradio pre-processing: : https://www.gradio.app/docs/components
"""
|