aar2dee2 commited on
Commit
9e7aeb0
·
1 Parent(s): 68d9107

pass audio file to transcriber directly

Browse files
Files changed (1) hide show
  1. app.py +45 -30
app.py CHANGED
@@ -6,13 +6,9 @@ import gradio as gr
6
  import os
7
  import logging
8
 
9
- from vocode.helpers import create_turn_based_microphone_input_and_speaker_output
10
-
11
  from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
12
  from vocode.turn_based.synthesizer import CoquiSynthesizer
13
  from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
14
- from vocode.turn_based.turn_based_conversation import TurnBasedConversation
15
-
16
  system_prompt = """
17
  You are the evil lord Darth Vader. You answer all questions your subjects ask you, but do so with immense contempt and loathing.
18
  You may use any of the following lines or take inspiration from this conversational style where appropriate in your response.
@@ -69,42 +65,61 @@ logger = logging.getLogger(__name__)
69
  logger.setLevel(logging.DEBUG)
70
 
71
 
72
- def main():
73
- (
74
- microphone_input,
75
- speaker_output,
76
- ) = create_turn_based_microphone_input_and_speaker_output(use_default_devices=True)
 
 
 
77
 
78
- conversation = TurnBasedConversation(
79
- input_device=microphone_input,
80
- output_device=speaker_output,
81
- transcriber=WhisperTranscriber(api_key=getenv("OPENAI_API_KEY")),
82
- agent=ChatGPTAgent(
83
  system_prompt=system_prompt,
84
  initial_message="What up",
85
  api_key=getenv("OPENAI_API_KEY"),
86
- ),
87
- synthesizer=CoquiSynthesizer(
88
- CoquiSynthesizerConfig.from_output_device(
89
- speaker_output,
90
- tts_kwargs={
91
- "voice_id": os.getenv("COQUI_VOICE_ID"),
92
- }
93
- ),
 
94
  api_key=getenv("COQUI_API_KEY"),
95
- ),
96
- logger=logger,
97
- )
 
 
98
  print("Starting conversation. Press Ctrl+C to exit.")
99
  while True:
100
  try:
101
- input("Press enter to start recording...")
102
- conversation.start_speech()
103
- input("Press enter to end recording...")
104
- conversation.end_speech_and_respond()
105
- except KeyboardInterrupt:
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  break
107
 
 
 
108
 
109
  demo = gr.Interface(fn=main, inputs="audio", outputs="audio")
110
  demo.launch()
 
6
  import os
7
  import logging
8
 
 
 
9
  from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
10
  from vocode.turn_based.synthesizer import CoquiSynthesizer
11
  from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
 
 
12
  system_prompt = """
13
  You are the evil lord Darth Vader. You answer all questions your subjects ask you, but do so with immense contempt and loathing.
14
  You may use any of the following lines or take inspiration from this conversational style where appropriate in your response.
 
65
  logger.setLevel(logging.DEBUG)
66
 
67
 
68
+ def main(input_audio):
69
+
70
+ try:
71
+ # Initialize WhisperTranscriber
72
+ transcriber = WhisperTranscriber(api_key=getenv("OPENAI_API_KEY"))
73
+ except Exception as e:
74
+ logger.error("Failed to initialize WhisperTranscriber: %s", e)
75
+ return None
76
 
77
+ try:
78
+ # Initialize ChatGPTAgent
79
+ agent = ChatGPTAgent(
 
 
80
  system_prompt=system_prompt,
81
  initial_message="What up",
82
  api_key=getenv("OPENAI_API_KEY"),
83
+ )
84
+ except Exception as e:
85
+ logger.error("Failed to initialize ChatGPTAgent: %s", e)
86
+ return None
87
+
88
+ try:
89
+ # Initialize CoquiSynthesizer
90
+ synthesizer = CoquiSynthesizer(
91
+ voice_id=os.getenv("COQUI_VOICE_ID"),
92
  api_key=getenv("COQUI_API_KEY"),
93
+ )
94
+ except Exception as e:
95
+ logger.error("Failed to initialize CoquiSynthesizer: %s", e)
96
+ return None
97
+
98
  print("Starting conversation. Press Ctrl+C to exit.")
99
  while True:
100
  try:
101
+ # Transcribe the input_audio using WhisperTranscriber
102
+ transcript = transcriber.transcribe(input_audio)
103
+ except Exception as e:
104
+ logger.error("Failed to transcribe audio: %s", e)
105
+ break
106
+
107
+ try:
108
+ # Generate response using ChatGPTAgent
109
+ response = agent.generate_response(transcript)
110
+ except Exception as e:
111
+ logger.error("Failed to generate response: %s", e)
112
+ break
113
+
114
+ try:
115
+ # Synthesize the response into audio using CoquiSynthesizer
116
+ output_audio = synthesizer.synthesize(response)
117
+ except Exception as e:
118
+ logger.error("Failed to synthesize response: %s", e)
119
  break
120
 
121
+ return output_audio
122
+
123
 
124
  demo = gr.Interface(fn=main, inputs="audio", outputs="audio")
125
  demo.launch()