Spaces:

hwberry2
/

WhisperDemo

Sleeping

File size: 4,271 Bytes

70ac71d
 
b3b0738
 
 
f241696
b3b0738
f241696
b3b0738
 
f241696
 
 
99b2f92
 
24f06d9
a110eb7
 
 
 
 
b3b0738
a35346c
24f06d9
f241696
24f06d9
c8ed1ff
cb1e26d
 
 
 
1f2b444
9d08bac
86b1ba3
83addd5
 
 
cfaff5e
86b1ba3
9d08bac
 
f241696
 
 
24f06d9
 
f241696
 
 
99b2f92
 
24f06d9
f241696
 
a110eb7
 
e27db14
a110eb7
 
f241696
 
 
a110eb7
 
f241696
 
 
 
a110eb7
 
 
 
 
 
b3b0738
f241696
 
 
 
 
99b2f92
 
7ee2f4c
 
a35346c
7ee2f4c
a35346c
 
 
 
 
 
 
f241696
 
 
 
b3b0738
092aeaf
b3b0738
7a19faa
 
f64ff78
092aeaf
7a19faa
 
854d54e
a35346c
d83a521
b7cf674
7ee2f4c
b3b0738
 
99b2f92
 
 
99c65a9

# This code is based on a YouTube video from 
# https://www.youtube.com/@parttimelarry
import os
import gradio as gr
import openai
from gtts import gTTS # Google Text To Speech

# load the api key
openai.api_key = os.environ["OPEN_AI_KEY"]

# takes an audio file from the microphone
# submits the raw audio to OpenAI for 
# Speech to Text Translation
# input from Microphone Component
# output to User Input - Textbox Component
def transcribe(audio):   
        audio_file = open(audio, "rb")    
        # Call the transcribe method with the file-like object
        transcript = openai.Audio.transcribe("whisper-1", audio_file)
        
        return transcript["text"]



# Create a Gradio App using Blocks    
with gr.Blocks() as demo:
    gr.Markdown(
    """
    # Welcome to the Virtual Therapist Chat Bot!
    """
    )
    with gr.Accordion("Click for Instructions:"):
            gr.Markdown(
    """
    * Tell the therapist your problems, by recording your query.
    * Submit your query, and follow the chat or listen to the Therapists advice.
    * When you are ready to respond, clear your last recording and resubmit.
    note: Transcribe Audio does not work on iOS
    """)
        
    
    # First message as instructions to OpenAI
    # Establishes a State object to create a
    # unique state for each user and on reload
    messages = gr.State(value=[{"role": "system", "content": "You are a therapist. Respond in less than 5 sentences."}])

    # Takes the users transcribed audio as a string
    # Takes the messages list as a reference
    # Sends the ongoing chat log to OpenAI
    # input from User Input - Textbox Component
    # output to Chat Log - Textbox Component
    def botResponse(user_input, messages):
        # adds the user input to the ongoing chat log
        # and submits the log to OpenAI
        messages.append({"role": "user", "content": user_input})
        response = openai.ChatCompletion.create(
          model="gpt-3.5-turbo-0301",
          messages=messages
        )

        # Parse the response from OpenAI and store
        # it in the chat log
        system_message = response["choices"][0]["message"]["content"]
        messages.append({"role": "assistant", "content": system_message})

        # Process the messages list to get the
        # chat log into a string. Exclude the
        # System responses from the string
        chat_transcript = ""
        for message in messages:
            if (message["role"] != "system"):
                chat_transcript += message["role"] + ": " + message["content"] + "\n\n"
    
        return chat_transcript

    # Gets the last message in the 
    # chat log and uses GTTS to
    # convert the last response into
    # an audio file. Returns a path to
    # the converted text as an mp3 file
    # input from messages as a reference
    # output to GPT Voice - Audio Component
    def giveVoice(messages):
        bot_message=messages[-1]
        
        myobj = gTTS(text=bot_message["content"])
        myobj.save("temp.mp3")
    
        dir = os.getcwd()
        new_path = os.path.join(dir, "temp.mp3")
    
        return new_path

    # Creates the Gradio interface objects
    # The submit button triggers a cascade of
    # events that each engage a different 
    # component as input/output
    with gr.Row():
        with gr.Column(scale=1):
            user_audio = gr.Audio(source="microphone", type="filepath", label="Input Phrase")
            submit_btn = gr.Button(value="Transcribe Audio")
            submit_btn2 = gr.Button(value="Submit Text")
            gpt_voice = gr.Audio(label="Therapists Advice")
        with gr.Column(scale=2):
            user_transcript = gr.Text(label="Audio Translation", interactive=False)
            user_text = gr.Text(label="Text Input")
            gpt_transcript = gr.Text(label="Chat Transcript")
    submit_btn.click(transcribe, user_audio, user_transcript)
    submit_btn2.click(botResponse, [user_text, messages], gpt_transcript)
    user_transcript.change(botResponse, [user_transcript, messages], gpt_transcript)
    gpt_transcript.change(giveVoice, messages, gpt_voice)
    
    
# creates a local web server
# if share=True creates a public
# demo on huggingface.co
demo.launch(share=False)