Spaces:

Prathamesh1420
/

Virtual_assistant

Running

App Files Files Community

Virtual_assistant / app.py

Prathamesh1420

Update app.py

f975d86 verified about 2 months ago

raw

history blame

3.8 kB

	import gradio as gr
	import groq
	import io
	import numpy as np
	import soundfile as sf
	import requests

	# Function to transcribe audio using Groq
	def transcribe_audio(audio, api_key):
	if audio is None:
	return ""

	client = groq.Client(api_key=api_key)

	# Convert audio to the format expected by the model
	audio_data = audio[1] # Get the numpy array from the tuple
	buffer = io.BytesIO()
	sf.write(buffer, audio_data, audio[0], format='wav')
	buffer.seek(0)

	try:
	# Use Distil-Whisper English powered by Groq for transcription
	completion = client.audio.transcriptions.create(
	model="distil-whisper-large-v3-en",
	file=("audio.wav", buffer),
	response_format="text"
	)
	return completion.get('text', '') # Extract transcription text from response
	except Exception as e:
	return f"Error in transcription: {str(e)}"

	# Function to generate AI response using Groq
	def generate_response(transcription, api_key):
	if not transcription:
	return "No transcription available. Please try speaking again."

	client = groq.Client(api_key=api_key)

	try:
	# Use Llama 3 70B powered by Groq for text generation
	completion = client.chat.completions.create(
	model="llama3-70b-8192",
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": transcription}
	],
	)
	return completion.choices[0].message['content']
	except Exception as e:
	return f"Error in response generation: {str(e)}"

	# VoiceRSS TTS function
	def text_to_speech(text, tts_api_key):
	url = "https://api.voicerss.org/"
	params = {
	'key': tts_api_key,
	'src': text,
	'hl': 'en-us', # Language: English (US)
	'r': '0', # Speech rate
	'c': 'mp3', # Audio format (mp3)
	'f': '48khz_16bit_stereo' # Frequency and bitrate
	}

	try:
	response = requests.get(url, params=params)
	if response.status_code == 200:
	return response.content # Return the audio data
	else:
	return f"Error in TTS conversion: {response.status_code}"
	except Exception as e:
	return f"Error in TTS conversion: {str(e)}"

	# Process audio function to handle transcription, response generation, and TTS
	def process_audio(audio, groq_api_key, tts_api_key):
	if not groq_api_key:
	return "Please enter your Groq API key.", "API key is required.", None

	transcription = transcribe_audio(audio, groq_api_key)
	response = generate_response(transcription, groq_api_key)

	# Convert the AI response to speech using VoiceRSS
	audio_response = text_to_speech(response, tts_api_key)

	return transcription, response, audio_response

	# Gradio interface with TTS
	with gr.Blocks(theme=gr.themes.Default()) as demo:
	gr.Markdown("# 🎙️ Groq x Gradio Voice-Powered AI Assistant with TTS")

	api_key_input = gr.Textbox(type="password", label="Enter your Groq API Key")
	tts_api_key_input = gr.Textbox(type="password", label="Enter your VoiceRSS API Key")

	with gr.Row():
	audio_input = gr.Audio(label="Speak!", type="numpy")

	with gr.Row():
	transcription_output = gr.Textbox(label="Transcription")
	response_output = gr.Textbox(label="AI Assistant Response")

	audio_output = gr.Audio(label="AI Response (Audio)", type="auto")

	submit_button = gr.Button("Process", variant="primary")

	submit_button.click(
	process_audio,
	inputs=[audio_input, api_key_input, tts_api_key_input],
	outputs=[transcription_output, response_output, audio_output]
	)

	demo.launch()