Prathamesh1420 commited on
Commit
f975d86
β€’
1 Parent(s): fa2f154

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -61
app.py CHANGED
@@ -3,11 +3,9 @@ import groq
3
  import io
4
  import numpy as np
5
  import soundfile as sf
6
- import pyttsx3 # Text-to-speech conversion
7
-
8
- # Initialize text-to-speech engine
9
- tts_engine = pyttsx3.init()
10
 
 
11
  def transcribe_audio(audio, api_key):
12
  if audio is None:
13
  return ""
@@ -27,10 +25,11 @@ def transcribe_audio(audio, api_key):
27
  file=("audio.wav", buffer),
28
  response_format="text"
29
  )
30
- return completion
31
  except Exception as e:
32
  return f"Error in transcription: {str(e)}"
33
 
 
34
  def generate_response(transcription, api_key):
35
  if not transcription:
36
  return "No transcription available. Please try speaking again."
@@ -41,58 +40,55 @@ def generate_response(transcription, api_key):
41
  # Use Llama 3 70B powered by Groq for text generation
42
  completion = client.chat.completions.create(
43
  model="llama3-70b-8192",
44
- messages=[{"role": "user", "content": transcription}]
 
 
 
45
  )
46
- return completion.choices[0].message.content
47
  except Exception as e:
48
  return f"Error in response generation: {str(e)}"
49
 
50
- def convert_text_to_speech(text):
51
- tts_engine.save_to_file(text, 'response_output.wav')
52
- tts_engine.runAndWait()
53
- with open("response_output.wav", "rb") as f:
54
- audio_bytes = f.read()
55
- return audio_bytes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- def process_audio(audio, api_key):
58
- if not api_key:
59
- return "Please enter your Groq API key.", "API key is required."
 
60
 
61
- transcription = transcribe_audio(audio, api_key)
62
- response = generate_response(transcription, api_key)
63
 
64
- if "Error" in response:
65
- return transcription, response, None # In case of error, return empty audio
66
 
67
- audio_output = convert_text_to_speech(response)
68
- return transcription, response, audio_output
69
 
70
- # Custom CSS
71
- custom_css = """
72
- .gradio-container {
73
- background-color: #f5f5f5;
74
- }
75
- .gr-button-primary {
76
- background-color: #f55036 !important;
77
- border-color: #f55036 !important;
78
- }
79
- .gr-button-secondary {
80
- color: #f55036 !important;
81
- border-color: #f55036 !important;
82
- }
83
- #groq-badge {
84
- position: fixed;
85
- bottom: 20px;
86
- right: 20px;
87
- z-index: 1000;
88
- }
89
- """
90
-
91
- # Gradio Interface
92
  with gr.Blocks(theme=gr.themes.Default()) as demo:
93
- gr.Markdown("# πŸŽ™οΈ Groq x Gradio Voice-Powered AI Assistant")
94
 
95
  api_key_input = gr.Textbox(type="password", label="Enter your Groq API Key")
 
96
 
97
  with gr.Row():
98
  audio_input = gr.Audio(label="Speak!", type="numpy")
@@ -100,28 +96,15 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
100
  with gr.Row():
101
  transcription_output = gr.Textbox(label="Transcription")
102
  response_output = gr.Textbox(label="AI Assistant Response")
103
- audio_output = gr.Audio(label="Voice Response", type="file")
104
 
105
- submit_button = gr.Button("Process", variant="primary")
106
 
107
- gr.HTML("""
108
- <div id="groq-badge">
109
- <div style="color: #f55036; font-weight: bold;">POWERED BY GROQ</div>
110
- </div>
111
- """)
112
 
113
  submit_button.click(
114
  process_audio,
115
- inputs=[audio_input, api_key_input],
116
  outputs=[transcription_output, response_output, audio_output]
117
  )
118
-
119
- gr.Markdown("""
120
- ## How to use this app:
121
- 1. Enter your [Groq API Key](https://console.groq.com/keys) in the provided field.
122
- 2. Click on the microphone icon and speak your message (or upload an audio file).
123
- 3. Click the "Process" button to transcribe your speech and generate a response from our AI assistant.
124
- 4. The transcription, AI assistant response, and voice response will appear.
125
- """)
126
 
127
  demo.launch()
 
3
  import io
4
  import numpy as np
5
  import soundfile as sf
6
+ import requests
 
 
 
7
 
8
+ # Function to transcribe audio using Groq
9
  def transcribe_audio(audio, api_key):
10
  if audio is None:
11
  return ""
 
25
  file=("audio.wav", buffer),
26
  response_format="text"
27
  )
28
+ return completion.get('text', '') # Extract transcription text from response
29
  except Exception as e:
30
  return f"Error in transcription: {str(e)}"
31
 
32
+ # Function to generate AI response using Groq
33
  def generate_response(transcription, api_key):
34
  if not transcription:
35
  return "No transcription available. Please try speaking again."
 
40
  # Use Llama 3 70B powered by Groq for text generation
41
  completion = client.chat.completions.create(
42
  model="llama3-70b-8192",
43
+ messages=[
44
+ {"role": "system", "content": "You are a helpful assistant."},
45
+ {"role": "user", "content": transcription}
46
+ ],
47
  )
48
+ return completion.choices[0].message['content']
49
  except Exception as e:
50
  return f"Error in response generation: {str(e)}"
51
 
52
+ # VoiceRSS TTS function
53
+ def text_to_speech(text, tts_api_key):
54
+ url = "https://api.voicerss.org/"
55
+ params = {
56
+ 'key': tts_api_key,
57
+ 'src': text,
58
+ 'hl': 'en-us', # Language: English (US)
59
+ 'r': '0', # Speech rate
60
+ 'c': 'mp3', # Audio format (mp3)
61
+ 'f': '48khz_16bit_stereo' # Frequency and bitrate
62
+ }
63
+
64
+ try:
65
+ response = requests.get(url, params=params)
66
+ if response.status_code == 200:
67
+ return response.content # Return the audio data
68
+ else:
69
+ return f"Error in TTS conversion: {response.status_code}"
70
+ except Exception as e:
71
+ return f"Error in TTS conversion: {str(e)}"
72
 
73
+ # Process audio function to handle transcription, response generation, and TTS
74
+ def process_audio(audio, groq_api_key, tts_api_key):
75
+ if not groq_api_key:
76
+ return "Please enter your Groq API key.", "API key is required.", None
77
 
78
+ transcription = transcribe_audio(audio, groq_api_key)
79
+ response = generate_response(transcription, groq_api_key)
80
 
81
+ # Convert the AI response to speech using VoiceRSS
82
+ audio_response = text_to_speech(response, tts_api_key)
83
 
84
+ return transcription, response, audio_response
 
85
 
86
+ # Gradio interface with TTS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  with gr.Blocks(theme=gr.themes.Default()) as demo:
88
+ gr.Markdown("# πŸŽ™οΈ Groq x Gradio Voice-Powered AI Assistant with TTS")
89
 
90
  api_key_input = gr.Textbox(type="password", label="Enter your Groq API Key")
91
+ tts_api_key_input = gr.Textbox(type="password", label="Enter your VoiceRSS API Key")
92
 
93
  with gr.Row():
94
  audio_input = gr.Audio(label="Speak!", type="numpy")
 
96
  with gr.Row():
97
  transcription_output = gr.Textbox(label="Transcription")
98
  response_output = gr.Textbox(label="AI Assistant Response")
 
99
 
100
+ audio_output = gr.Audio(label="AI Response (Audio)", type="auto")
101
 
102
+ submit_button = gr.Button("Process", variant="primary")
 
 
 
 
103
 
104
  submit_button.click(
105
  process_audio,
106
+ inputs=[audio_input, api_key_input, tts_api_key_input],
107
  outputs=[transcription_output, response_output, audio_output]
108
  )
 
 
 
 
 
 
 
 
109
 
110
  demo.launch()