Update app.py
Browse files
app.py
CHANGED
@@ -3,13 +3,23 @@ import subprocess
|
|
3 |
import openai
|
4 |
import gradio as gr
|
5 |
from gtts import gTTS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
8 |
|
9 |
def transcribe(audio):
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
13 |
|
14 |
def generate_response(transcribed_text):
|
15 |
response = openai.ChatCompletion.create(
|
|
|
3 |
import openai
|
4 |
import gradio as gr
|
5 |
from gtts import gTTS
|
6 |
+
import torch
|
7 |
+
import librosa
|
8 |
+
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
|
9 |
+
|
10 |
+
# Load the processor and model
|
11 |
+
processor = AutoProcessor.from_pretrained("lyimo/whisper-small-sw2")
|
12 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained("lyimo/whisper-small-sw2")
|
13 |
|
14 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
15 |
|
16 |
def transcribe(audio):
|
17 |
+
audio_data, rate = librosa.load(audio, sr=16000)
|
18 |
+
inputs = processor(audio_data, sampling_rate=rate, return_tensors="pt")
|
19 |
+
with torch.no_grad():
|
20 |
+
outputs = model.generate(inputs["input_features"], max_length=512, num_beams=4, early_stopping=True)
|
21 |
+
transcription = [processor.decode(ids) for ids in outputs]
|
22 |
+
return transcription[0]
|
23 |
|
24 |
def generate_response(transcribed_text):
|
25 |
response = openai.ChatCompletion.create(
|