Zeeshan42 commited on
Commit
23a324b
·
verified ·
1 Parent(s): b83f2a4

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +47 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ from groq import Groq
3
+ from gtts import gTTS
4
+ import gradio as gr
5
+ import os
6
+ import tempfile
7
+
8
+ # Load Whisper model for transcription
9
+ whisper_model = whisper.load_model("base")
10
+
11
+ # Set up Groq client for LLM interaction
12
+
13
+
14
+
15
+ os.environ["GROQ_API_KEY"] = "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"
16
+
17
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
18
+
19
+ def process_speech_to_speech(audio):
20
+ # Step 1: Transcribe the audio using Whisper
21
+ transcript = whisper_model.transcribe(audio)["text"]
22
+
23
+ # Step 2: Send transcription to the LLM via Groq
24
+ chat_completion = client.chat.completions.create(
25
+ messages=[{"role": "user", "content": transcript}],
26
+ model="llama3-8b-8192"
27
+ )
28
+ response_text = chat_completion.choices[0].message.content
29
+
30
+ # Step 3: Convert LLM response to speech using gTTS
31
+ tts = gTTS(text=response_text, lang="en")
32
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
33
+ tts.save(fp.name)
34
+ audio_output = fp.name # Path to the generated speech file
35
+
36
+ return response_text, audio_output
37
+
38
+ # Gradio interface to deploy the application
39
+ iface = gr.Interface(
40
+ fn=process_speech_to_speech,
41
+ inputs=gr.Audio(type="filepath", label="Record your audio"),
42
+ outputs=[gr.Textbox(label="LLM Response"), gr.Audio(type="filepath", label="Spoken Response")],
43
+ live=True
44
+ )
45
+
46
+
47
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ whisper
2
+ groq
3
+ gtts
4
+ gradio
5
+ tempfile