import os import gradio as gr import openai from gtts import gTTS # Google Text To Speech # load the api key openai.api_key = os.environ["OPEN_AI_KEY"] # takes an audio file from the microphone # submits the raw audio to OpenAI for # Speech to Text Translation # input from Microphone Component # output to User Input - Textbox Component def transcribe(audio): audio_file = open(audio, "rb") # Call the transcribe method with the file-like object transcript = openai.Audio.transcribe("whisper-1", audio_file) return transcript["text"] # Create a Gradio App using Blocks with gr.Blocks() as demo: gr.Markdown( """ # Welcome to the Virtual Therapist Chat Bot! """ ) with gr.Accordion("Click for Instructions:"): gr.Markdown( """ * Tell the therapist your problems, by recording your query. * Submit your query, and follow the chat or listen to the Therapists advice. * When you are ready to respond, clear your last recording and resubmit. """) # First message as instructions to OpenAI # Establishes a State object to create a # unique state for each user and on reload messages = gr.State(value=[{"role": "system", "content": "You are a therapist. Respond in less than 5 sentences."}]) # Takes the users transcribed audio as a string # Takes the messages list as a reference # Sends the ongoing chat log to OpenAI # input from User Input - Textbox Component # output to Chat Log - Textbox Component def botResponse(user_input, messages): # adds the user input to the ongoing chat log # and submits the log to OpenAI messages.append({"role": "user", "content": user_input}) response = openai.ChatCompletion.create( model="gpt-3.5-turbo-0301", messages=messages ) # Parse the response from OpenAI and store # it in the chat log system_message = response["choices"][0]["message"]["content"] messages.append({"role": "assistant", "content": system_message}) # Process the messages list to get the # chat log into a string. Exclude the # System responses from the string chat_transcript = "" for message in messages: if (message["role"] != "system"): chat_transcript += message["role"] + ": " + message["content"] + "\n\n" return chat_transcript # Gets the last message in the # chat log and uses GTTS to # convert the last response into # an audio file. Returns a path to # the converted text as an mp3 file # input from messages as a reference # output to GPT Voice - Audio Component def giveVoice(messages): bot_message=messages[-1] myobj = gTTS(text=bot_message["content"]) myobj.save("temp.mp3") dir = os.getcwd() new_path = os.path.join(dir, "temp.mp3") return new_path # Creates the Gradio interface objects # The submit button triggers a cascade of # events that each engage a different # component as input/output with gr.Row(): with gr.Column(scale=1): user_audio = gr.Audio(source="microphone", type="filepath", label="Input Phrase") submit_btn = gr.Button(value="Transcribe") gpt_voice = gr.Audio(label="Voice Response") with gr.Column(scale=2): user_transcript = gr.Text(label="User Transcript") gpt_transcript = gr.Text(label="Chat Transcript") submit_btn.click(transcribe, user_audio, user_transcript) user_transcript.change(botResponse, [user_transcript, messages], gpt_transcript) gpt_transcript.change(giveVoice, messages, gpt_voice) # creates a local web server # if share=True creates a public # demo on huggingface.co demo.launch(share=False)