Transcribe-TG / vosk_handler.py
API-Handler's picture
Update vosk_handler.py
8feb923 verified
from vosk import Model, KaldiRecognizer
import wave
import json
class VoskTranscriber:
def __init__(self, model_path="Vosk/vosk-model-small-en-us-0.15"):
self.model = Model(model_path)
def transcribe_audio(self, audio_data):
try:
with wave.open(audio_data, "rb") as wf:
recognizer = KaldiRecognizer(self.model, wf.getframerate())
recognizer.SetWords(True)
text = ""
while data := wf.readframes(4000):
if recognizer.AcceptWaveform(data):
text += json.loads(recognizer.Result())["text"] + " "
text += json.loads(recognizer.FinalResult())["text"]
return {"success": True, "text": text.strip()}
except Exception as e:
return {"success": False, "error": str(e)}
if __name__ == "__main__":
transcriber = VoskTranscriber()
print(transcriber.transcribe_audio("output.wav"))