WhisperDemo / app.py
hwberry2's picture
Update app.py
70ac71d
# This code is based on a YouTube video from
# https://www.youtube.com/@parttimelarry
import os
import gradio as gr
import openai
from gtts import gTTS # Google Text To Speech
# load the api key
openai.api_key = os.environ["OPEN_AI_KEY"]
# takes an audio file from the microphone
# submits the raw audio to OpenAI for
# Speech to Text Translation
# input from Microphone Component
# output to User Input - Textbox Component
def transcribe(audio):
audio_file = open(audio, "rb")
# Call the transcribe method with the file-like object
transcript = openai.Audio.transcribe("whisper-1", audio_file)
return transcript["text"]
# Create a Gradio App using Blocks
with gr.Blocks() as demo:
gr.Markdown(
"""
# Welcome to the Virtual Therapist Chat Bot!
"""
)
with gr.Accordion("Click for Instructions:"):
gr.Markdown(
"""
* Tell the therapist your problems, by recording your query.
* Submit your query, and follow the chat or listen to the Therapists advice.
* When you are ready to respond, clear your last recording and resubmit.
note: Transcribe Audio does not work on iOS
""")
# First message as instructions to OpenAI
# Establishes a State object to create a
# unique state for each user and on reload
messages = gr.State(value=[{"role": "system", "content": "You are a therapist. Respond in less than 5 sentences."}])
# Takes the users transcribed audio as a string
# Takes the messages list as a reference
# Sends the ongoing chat log to OpenAI
# input from User Input - Textbox Component
# output to Chat Log - Textbox Component
def botResponse(user_input, messages):
# adds the user input to the ongoing chat log
# and submits the log to OpenAI
messages.append({"role": "user", "content": user_input})
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-0301",
messages=messages
)
# Parse the response from OpenAI and store
# it in the chat log
system_message = response["choices"][0]["message"]["content"]
messages.append({"role": "assistant", "content": system_message})
# Process the messages list to get the
# chat log into a string. Exclude the
# System responses from the string
chat_transcript = ""
for message in messages:
if (message["role"] != "system"):
chat_transcript += message["role"] + ": " + message["content"] + "\n\n"
return chat_transcript
# Gets the last message in the
# chat log and uses GTTS to
# convert the last response into
# an audio file. Returns a path to
# the converted text as an mp3 file
# input from messages as a reference
# output to GPT Voice - Audio Component
def giveVoice(messages):
bot_message=messages[-1]
myobj = gTTS(text=bot_message["content"])
myobj.save("temp.mp3")
dir = os.getcwd()
new_path = os.path.join(dir, "temp.mp3")
return new_path
# Creates the Gradio interface objects
# The submit button triggers a cascade of
# events that each engage a different
# component as input/output
with gr.Row():
with gr.Column(scale=1):
user_audio = gr.Audio(source="microphone", type="filepath", label="Input Phrase")
submit_btn = gr.Button(value="Transcribe Audio")
submit_btn2 = gr.Button(value="Submit Text")
gpt_voice = gr.Audio(label="Therapists Advice")
with gr.Column(scale=2):
user_transcript = gr.Text(label="Audio Translation", interactive=False)
user_text = gr.Text(label="Text Input")
gpt_transcript = gr.Text(label="Chat Transcript")
submit_btn.click(transcribe, user_audio, user_transcript)
submit_btn2.click(botResponse, [user_text, messages], gpt_transcript)
user_transcript.change(botResponse, [user_transcript, messages], gpt_transcript)
gpt_transcript.change(giveVoice, messages, gpt_voice)
# creates a local web server
# if share=True creates a public
# demo on huggingface.co
demo.launch(share=False)