Spaces:

rahul-appu
/

VoiceDemo

Sleeping

App Files Files Community

rahul-appu commited on Oct 16, 2024

Commit

fa188df

verified ·

1 Parent(s): e5eeb6d

Upload 2 files

Browse files

Files changed (2) hide show

app.py +121 -0
requirements.txt +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import io
+import os
+import gradio as gr
+from groq import Groq
+import soundfile as sf
+from dotenv import load_dotenv
+from gradio import ChatMessage
+from deepgram import DeepgramClient, SpeakOptions
+def get_transcript(audio):
+    # Convert the audio to MP3 format
+    audio_buffer = io.BytesIO()
+    sf.write(audio_buffer, audio[1], samplerate=audio[0], format="MP3")
+    audio_buffer.seek(0)
+    # Groq client
+    client = Groq()
+    translation = client.audio.transcriptions.create(
+        file=("audio.mp3", audio_buffer.read()),
+        model="whisper-large-v3-turbo",
+        response_format="json",
+        temperature=0.0,
+    )
+    return translation.text
+def generate_response(chat_history: list[ChatMessage]):
+    # Groq client
+    client = Groq()
+    messages = [
+        {
+            "role": "system",
+            "content": "You are an assistant working in a helpline center. Answer queries in short and concise sentences. Keep in mind that the output will be converted to voice, so use appropriate vocabulary.",  # noqa
+        }  # noqa
+    ]
+    messages.extend(
+        [
+            {"role": message["role"], "content": message["content"]}
+            for message in chat_history  # noqa
+        ]
+    )
+    response = client.chat.completions.create(
+        model="llama3-8b-8192",
+        messages=messages,
+    )
+    return response.choices[0].message.content
+def speech_synthesis(text: str):
+    DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")
+    TEXT = {"text": text}
+    FILENAME = "audio.mp3"
+    try:
+        deepgram = DeepgramClient(DEEPGRAM_API_KEY)
+        options = SpeakOptions(
+            model="aura-luna-en",
+        )
+        deepgram.speak.v("1").save(FILENAME, TEXT, options)
+        with open(FILENAME, "rb") as audio_file:
+            audio_data = audio_file.read()
+        return audio_data
+    except Exception as e:
+        print(f"Exception: {e}")
+        return None
+def process_audio(audio, chat_history: list[ChatMessage]):
+    # If audio is None, return None and chat history
+    if audio is None:
+        return None, chat_history
+    transcript = get_transcript(audio)
+    chat_history.append({"role": "user", "content": transcript})
+    response = generate_response(chat_history)
+    chat_history.append({"role": "assistant", "content": response})
+    audio_data = speech_synthesis(response)
+    return audio_data, chat_history
+with gr.Blocks() as demo:
+    gr.Markdown(
+        "<h1 style='text-align: center;'> Welcome to the Audio Chatbot Demo</h1>"  # noqa
+    )
+    with gr.Row():
+        with gr.Column():
+            input_audio = gr.Audio(
+                label="Input Audio", sources="microphone", type="numpy"
+            )
+            output_audio = gr.Audio(label="Output Audio", interactive=False)
+        with gr.Column():
+            chatbot = gr.Chatbot(label="Chatbot", type="messages")
+    process_button = gr.Button("Process Audio")
+    process_button.click(
+        fn=process_audio,
+        inputs=[input_audio, chatbot],
+        outputs=[output_audio, chatbot],  # noqa
+    )  # noqa
+if __name__ == "__main__":
+    load_dotenv()
+    demo.launch()

requirements.txt ADDED Viewed

Binary file (7.04 kB). View file