import os import gradio as gr import openai from gtts import gTTS openai.api_key = os.environ["OPEN_AI_KEY"] messages = gr.State(value=[{"role": "system", "content": "You are a therapist. Respond in less than 5 sentences."}]) def transcribe(audio): audio_file = open(audio, "rb") # Call the transcribe method with the file-like object transcript = openai.Audio.transcribe("whisper-1", audio_file) return transcript["text"] def botResponse(user_input, msg_contents): #global messages messages = msg_contents messages.append({"role": "user", "content": user_input}) response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=messages ) system_message = response["choices"][0]["message"]["content"] messages.append({"role": "assistant", "content": system_message}) chat_transcript = "" for message in messages: if (message["role"] != "system"): chat_transcript += message["role"] + ": " + message["content"] + "\n\n" return chat_transcript def giveVoice(bot_message): myobj = gTTS(text=bot_message) myobj.save("temp.mp3") dir = os.getcwd() new_path = os.path.join(dir, "temp.mp3") return new_path with gr.Blocks() as demo: with gr.Row(): with gr.Column(): user_audio = gr.Audio(source="microphone", type="filepath", label="Input Phrase") submit_btn = gr.Button(value="Transcribe") with gr.Column(): user_transcript = gr.Text(label="User Transcript") gpt_transcript = gr.Text(label="Chat Transcript") gpt_voice = gr.Audio(label="Voice Response") submit_btn.click(transcribe, inputs=user_audio, outputs=user_transcript) user_transcript.change(botResponse, inputs=[user_transcript, messages], outputs=gpt_transcript) gpt_transcript.change(giveVoice, inputs=gpt_transcript, outputs=gpt_voice) demo.launch(share=False)