rahul-appu commited on
Commit
fa188df
·
verified ·
1 Parent(s): e5eeb6d

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +121 -0
  2. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+
4
+ import gradio as gr
5
+ from groq import Groq
6
+ import soundfile as sf
7
+ from dotenv import load_dotenv
8
+ from gradio import ChatMessage
9
+ from deepgram import DeepgramClient, SpeakOptions
10
+
11
+
12
+ def get_transcript(audio):
13
+
14
+ # Convert the audio to MP3 format
15
+ audio_buffer = io.BytesIO()
16
+ sf.write(audio_buffer, audio[1], samplerate=audio[0], format="MP3")
17
+ audio_buffer.seek(0)
18
+
19
+ # Groq client
20
+ client = Groq()
21
+
22
+ translation = client.audio.transcriptions.create(
23
+ file=("audio.mp3", audio_buffer.read()),
24
+ model="whisper-large-v3-turbo",
25
+ response_format="json",
26
+ temperature=0.0,
27
+ )
28
+
29
+ return translation.text
30
+
31
+
32
+ def generate_response(chat_history: list[ChatMessage]):
33
+
34
+ # Groq client
35
+ client = Groq()
36
+
37
+ messages = [
38
+ {
39
+ "role": "system",
40
+ "content": "You are an assistant working in a helpline center. Answer queries in short and concise sentences. Keep in mind that the output will be converted to voice, so use appropriate vocabulary.", # noqa
41
+ } # noqa
42
+ ]
43
+
44
+ messages.extend(
45
+ [
46
+ {"role": message["role"], "content": message["content"]}
47
+ for message in chat_history # noqa
48
+ ]
49
+ )
50
+
51
+ response = client.chat.completions.create(
52
+ model="llama3-8b-8192",
53
+ messages=messages,
54
+ )
55
+
56
+ return response.choices[0].message.content
57
+
58
+
59
+ def speech_synthesis(text: str):
60
+
61
+ DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")
62
+ TEXT = {"text": text}
63
+ FILENAME = "audio.mp3"
64
+
65
+ try:
66
+ deepgram = DeepgramClient(DEEPGRAM_API_KEY)
67
+
68
+ options = SpeakOptions(
69
+ model="aura-luna-en",
70
+ )
71
+
72
+ deepgram.speak.v("1").save(FILENAME, TEXT, options)
73
+
74
+ with open(FILENAME, "rb") as audio_file:
75
+ audio_data = audio_file.read()
76
+ return audio_data
77
+
78
+ except Exception as e:
79
+ print(f"Exception: {e}")
80
+ return None
81
+
82
+
83
+ def process_audio(audio, chat_history: list[ChatMessage]):
84
+ # If audio is None, return None and chat history
85
+ if audio is None:
86
+ return None, chat_history
87
+
88
+ transcript = get_transcript(audio)
89
+ chat_history.append({"role": "user", "content": transcript})
90
+
91
+ response = generate_response(chat_history)
92
+ chat_history.append({"role": "assistant", "content": response})
93
+
94
+ audio_data = speech_synthesis(response)
95
+
96
+ return audio_data, chat_history
97
+
98
+
99
+ with gr.Blocks() as demo:
100
+ gr.Markdown(
101
+ "<h1 style='text-align: center;'> Welcome to the Audio Chatbot Demo</h1>" # noqa
102
+ )
103
+ with gr.Row():
104
+ with gr.Column():
105
+ input_audio = gr.Audio(
106
+ label="Input Audio", sources="microphone", type="numpy"
107
+ )
108
+ output_audio = gr.Audio(label="Output Audio", interactive=False)
109
+ with gr.Column():
110
+ chatbot = gr.Chatbot(label="Chatbot", type="messages")
111
+
112
+ process_button = gr.Button("Process Audio")
113
+ process_button.click(
114
+ fn=process_audio,
115
+ inputs=[input_audio, chatbot],
116
+ outputs=[output_audio, chatbot], # noqa
117
+ ) # noqa
118
+
119
+ if __name__ == "__main__":
120
+ load_dotenv()
121
+ demo.launch()
requirements.txt ADDED
Binary file (7.04 kB). View file