talk-to-ultravox-0.5

Running on T4

App Files Files Community

Steveeeeeeen HF staff commited on 12 days ago

Commit

2cdcbd0

verified ·

1 Parent(s): 77dbc9a

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -20

app.py CHANGED Viewed

@@ -21,9 +21,7 @@ auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
 if account_sid and auth_token:
     client = Client(account_sid, auth_token)
     token = client.tokens.create()
     rtc_configuration = {
         "iceServers": token.ice_servers,
         "iceTransportPolicy": "relay",
@@ -32,11 +30,7 @@ else:
     rtc_configuration = None
-def transcribe(
-    audio: tuple[int, np.ndarray],
-    transformers_chat: list[dict],
-    conversation: list[dict],
-):
     original_sr = audio[0]
     target_sr = 16000
@@ -46,7 +40,7 @@ def transcribe(
     tf_input = [d for d in transformers_chat]
-    # Generate response from the pipeline using the audio input
     output = pipe(
         {"audio": audio_sr, "turns": tf_input, "sampling_rate": target_sr},
         max_new_tokens=512,
@@ -63,21 +57,15 @@ def transcribe(
     yield AdditionalOutputs(transformers_chat, conversation)
-def respond_text(
-    user_text: str,
-    transformers_chat: list[dict],
-    conversation: list[dict],
-):
     if not user_text.strip():
-        # Do nothing if the textbox is empty
         return transformers_chat, conversation
     # Append the user message from the textbox
     conversation.append({"role": "user", "content": user_text})
     transformers_chat.append({"role": "user", "content": user_text})
-    # Generate a response using the pipeline.
-    # Here we assume the pipeline can also process text input via the "text" key.
     output = pipe({"text": user_text, "turns": transformers_chat}, max_new_tokens=512)
     conversation.append({"role": "assistant", "content": output})
@@ -100,6 +88,7 @@ with gr.Blocks() as demo:
         </p>
         """
     )
     # Shared conversation state
     transformers_chat = gr.State(
         value=[
@@ -110,9 +99,12 @@ with gr.Blocks() as demo:
         ]
     )
     with gr.Row():
         with gr.Column(scale=1):
-            transcript = gr.Chatbot(label="Transcript", type="messages")
             text_input = gr.Textbox(
                 placeholder="Type your message here...", label="Your Message"
             )
@@ -125,7 +117,7 @@ with gr.Blocks() as demo:
                 modality="audio",
             )
-    # Audio stream: when you stop speaking, process the audio input.
     audio.stream(
         ReplyOnPause(transcribe),
         inputs=[audio, transformers_chat, transcript],
@@ -139,13 +131,13 @@ with gr.Blocks() as demo:
         show_progress="hidden",
     )
-    # Text input: when you click "Send", process the typed message.
     send_button.click(
         respond_text,
         inputs=[text_input, transformers_chat, transcript],
         outputs=[transformers_chat, transcript],
     )
-    # Optionally clear the text box after sending:
     send_button.click(lambda: "", inputs=[], outputs=[text_input])
 if __name__ == "__main__":

 if account_sid and auth_token:
     client = Client(account_sid, auth_token)
     token = client.tokens.create()
     rtc_configuration = {
         "iceServers": token.ice_servers,
         "iceTransportPolicy": "relay",
     rtc_configuration = None
+def transcribe(audio: tuple[int, np.ndarray], transformers_chat: list[dict], conversation: list[dict]):
     original_sr = audio[0]
     target_sr = 16000
     tf_input = [d for d in transformers_chat]
+    # Generate a response from the pipeline using the audio input
     output = pipe(
         {"audio": audio_sr, "turns": tf_input, "sampling_rate": target_sr},
         max_new_tokens=512,
     yield AdditionalOutputs(transformers_chat, conversation)
+def respond_text(user_text: str, transformers_chat: list[dict], conversation: list[dict]):
     if not user_text.strip():
         return transformers_chat, conversation
     # Append the user message from the textbox
     conversation.append({"role": "user", "content": user_text})
     transformers_chat.append({"role": "user", "content": user_text})
+    # Generate a response using the pipeline. We assume it can process text input via "text"
     output = pipe({"text": user_text, "turns": transformers_chat}, max_new_tokens=512)
     conversation.append({"role": "assistant", "content": output})
         </p>
         """
     )
     # Shared conversation state
     transformers_chat = gr.State(
         value=[
         ]
     )
+    # Chat transcript on top
+    transcript = gr.Chatbot(label="Transcript", type="messages")
+    # Lower row with text input and audio input side by side
     with gr.Row():
         with gr.Column(scale=1):
             text_input = gr.Textbox(
                 placeholder="Type your message here...", label="Your Message"
             )
                 modality="audio",
             )
+    # Audio stream: process audio when speaking stops.
     audio.stream(
         ReplyOnPause(transcribe),
         inputs=[audio, transformers_chat, transcript],
         show_progress="hidden",
     )
+    # Text input: process the typed message when "Send" is clicked.
     send_button.click(
         respond_text,
         inputs=[text_input, transformers_chat, transcript],
         outputs=[transformers_chat, transcript],
     )
+    # Clear text input after sending.
     send_button.click(lambda: "", inputs=[], outputs=[text_input])
 if __name__ == "__main__":