Spaces:

aar2dee2
/

chatty_vader

Runtime error

App Files Files Community

aar2dee2 commited on Sep 10, 2023

Commit

9e7aeb0

1 Parent(s): 68d9107

pass audio file to transcriber directly

Browse files

Files changed (1) hide show

app.py +45 -30

app.py CHANGED Viewed

@@ -6,13 +6,9 @@ import gradio as gr
 import os
 import logging
-from vocode.helpers import create_turn_based_microphone_input_and_speaker_output
 from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
 from vocode.turn_based.synthesizer import CoquiSynthesizer
 from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
-from vocode.turn_based.turn_based_conversation import TurnBasedConversation
 system_prompt = """
 You are the evil lord Darth Vader. You answer all questions your subjects ask you, but do so with immense contempt and loathing.
 You may use any of the following lines or take inspiration from this conversational style where appropriate in your response.
@@ -69,42 +65,61 @@ logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
-def main():
-    (
-        microphone_input,
-        speaker_output,
-    ) = create_turn_based_microphone_input_and_speaker_output(use_default_devices=True)
-    conversation = TurnBasedConversation(
-        input_device=microphone_input,
-        output_device=speaker_output,
-        transcriber=WhisperTranscriber(api_key=getenv("OPENAI_API_KEY")),
-        agent=ChatGPTAgent(
             system_prompt=system_prompt,
             initial_message="What up",
             api_key=getenv("OPENAI_API_KEY"),
-        ),
-        synthesizer=CoquiSynthesizer(
-            CoquiSynthesizerConfig.from_output_device(
-                speaker_output,
-                tts_kwargs={
-                    "voice_id": os.getenv("COQUI_VOICE_ID"),
-                }
-            ),
             api_key=getenv("COQUI_API_KEY"),
-        ),
-        logger=logger,
-    )
     print("Starting conversation. Press Ctrl+C to exit.")
     while True:
         try:
-            input("Press enter to start recording...")
-            conversation.start_speech()
-            input("Press enter to end recording...")
-            conversation.end_speech_and_respond()
-        except KeyboardInterrupt:
             break
 demo = gr.Interface(fn=main, inputs="audio", outputs="audio")
 demo.launch()

 import os
 import logging
 from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
 from vocode.turn_based.synthesizer import CoquiSynthesizer
 from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
 system_prompt = """
 You are the evil lord Darth Vader. You answer all questions your subjects ask you, but do so with immense contempt and loathing.
 You may use any of the following lines or take inspiration from this conversational style where appropriate in your response.
 logger.setLevel(logging.DEBUG)
+def main(input_audio):
+    try:
+        # Initialize WhisperTranscriber
+        transcriber = WhisperTranscriber(api_key=getenv("OPENAI_API_KEY"))
+    except Exception as e:
+        logger.error("Failed to initialize WhisperTranscriber: %s", e)
+        return None
+    try:
+        # Initialize ChatGPTAgent
+        agent = ChatGPTAgent(
             system_prompt=system_prompt,
             initial_message="What up",
             api_key=getenv("OPENAI_API_KEY"),
+        )
+    except Exception as e:
+        logger.error("Failed to initialize ChatGPTAgent: %s", e)
+        return None
+    try:
+        # Initialize CoquiSynthesizer
+        synthesizer = CoquiSynthesizer(
+            voice_id=os.getenv("COQUI_VOICE_ID"),
             api_key=getenv("COQUI_API_KEY"),
+        )
+    except Exception as e:
+        logger.error("Failed to initialize CoquiSynthesizer: %s", e)
+        return None
     print("Starting conversation. Press Ctrl+C to exit.")
     while True:
         try:
+            # Transcribe the input_audio using WhisperTranscriber
+            transcript = transcriber.transcribe(input_audio)
+        except Exception as e:
+            logger.error("Failed to transcribe audio: %s", e)
+            break
+        try:
+            # Generate response using ChatGPTAgent
+            response = agent.generate_response(transcript)
+        except Exception as e:
+            logger.error("Failed to generate response: %s", e)
+            break
+        try:
+            # Synthesize the response into audio using CoquiSynthesizer
+            output_audio = synthesizer.synthesize(response)
+        except Exception as e:
+            logger.error("Failed to synthesize response: %s", e)
             break
+        return output_audio
 demo = gr.Interface(fn=main, inputs="audio", outputs="audio")
 demo.launch()