Spaces:
Runtime error
Runtime error
import openai, os | |
import gradio as gr | |
import time | |
import boto3 | |
import json | |
import numpy as np | |
import wave | |
import io | |
from langchain import OpenAI | |
from langchain.chains import ConversationChain | |
from langchain.memory import ConversationSummaryBufferMemory | |
from langchain.chat_models import ChatOpenAI | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
from langchain.schema import HumanMessage | |
openai.api_key = os.environ["OPENAI_API_KEY"] | |
polly = boto3.client('polly', region_name='us-east-1') | |
s3 = boto3.client('s3') | |
transcribe = boto3.client('transcribe') | |
memory = ConversationSummaryBufferMemory(llm=ChatOpenAI(), max_token_limit=2048) | |
conversation = ConversationChain( | |
llm=OpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], max_tokens=2048, temperature=0.5), | |
memory=memory, | |
) | |
def play_mp3(file_path): | |
pygame.mixer.init() | |
pygame.mixer.music.load(file_path) | |
pygame.mixer.music.play() | |
def play_mp3_audio(path): | |
with open(path, 'rb') as f: | |
audio_data = f.read() | |
gr.Audio(audio_data) | |
def play_wav_audio(wav_file): | |
# open the wave file | |
wf = wave.open(wav_file, 'rb') | |
# instantiate PyAudio | |
p = pyaudio.PyAudio() | |
# open a stream | |
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), | |
channels=wf.getnchannels(), | |
rate=wf.getframerate(), | |
output=True) | |
# read data from the wave file and play it | |
data = wf.readframes(1024) | |
while data: | |
stream.write(data) | |
data = wf.readframes(1024) | |
# close the stream and terminate PyAudio | |
stream.stop_stream() | |
stream.close() | |
p.terminate() | |
def download_file(bucket_name, object_key, file_path): | |
try: | |
# Download the file from S3 | |
s3.download_file(bucket_name, object_key, file_path) | |
print(f"File downloaded successfully: {file_path}") | |
except Exception as e: | |
print(f"Error downloading file: {str(e)}") | |
def play_s3_voice(text): | |
response = polly.start_speech_synthesis_task( | |
OutputS3BucketName='lingo-audio-materials', #this bucket is in us-east-1 | |
OutputS3KeyPrefix='answers/', | |
OutputFormat='mp3', | |
Text=text, | |
VoiceId='Zhiyu', | |
LanguageCode='cmn-CN', | |
Engine='neural' | |
) | |
# Print the task ID and status | |
task_id = response['SynthesisTask']['TaskId'] | |
print('Task ID:', task_id) | |
while True: | |
task = polly.get_speech_synthesis_task(TaskId=task_id) | |
task_status = task['SynthesisTask']['TaskStatus'] | |
if task_status == 'completed': | |
break | |
elif task_status == 'failed': | |
# Task failed | |
print('Task failed:', task['SynthesisTask']['TaskStatusReason']) | |
break | |
else: | |
print("Polly synthesis task is still in progress...") | |
time.sleep(1) | |
output_uri = response['SynthesisTask']['OutputUri'] | |
print("polly output_uri:"+output_uri) | |
output_uri = output_uri.replace("https://", "") | |
# Split the URI into bucket name and key | |
results = output_uri.split("/") | |
bucket_name = results[1] | |
key_name = results[2]+'/'+results[3] | |
print("bucket name:"+bucket_name) | |
print("key name:"+key_name) | |
mp3_pre_signed_url = s3.generate_presigned_url('get_object',Params={'Bucket': bucket_name,'Key': key_name},ExpiresIn=3600) | |
print("mp3_pre_signed_url:"+mp3_pre_signed_url) | |
output_file = "/tmp/from-s3.mp3" | |
current_dir = os.getcwd() | |
#file_absolute_path = current_dir+'/'+output_file | |
print("current dir:"+current_dir) | |
print("output_file_location: "+output_file) | |
download_file(bucket_name, key_name, output_file) | |
#encoded_path = file_absolute_path.encode("utf-8") | |
#tmp_aud_file_url = output_file | |
#htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3" autoplay></audio>' | |
#audio_htm = gr.HTML(htm_audio) | |
return output_file | |
def predict(input, history=[]): | |
history.append(input) | |
response = conversation.predict(input=input) | |
print("GPT response: "+response) | |
history.append(response) | |
audio_file = play_s3_voice(response) | |
responses = [(u,b) for u,b in zip(history[::2], history[1::2])] | |
print("all historical responses: "+str(responses)) | |
return responses, audio_file, history | |
def predict_text_only(input, history=[]): | |
history.append(input) | |
response = conversation.predict(input=input) | |
audio_file = "/tmp/fake.mp3" | |
return response, audio_file, history | |
def transcribe_func(audio): | |
audio_file = open(audio, "rb") | |
file_name = audio_file.name | |
#file_directory = os.path.dirname(audio_file.name) | |
print("audio_file: "+file_name) | |
#transcript = openai.Audio.transcribe("whisper-1", audio_file) | |
#return transcript['text'] | |
# Set up the job parameters | |
job_name = "lingo-demo" | |
text_output_bucket = 'lingo-text-material' #this bucket is in us-west-1 | |
text_output_key = 'transcriptions/question.json' | |
text_output_key = 'transcriptions/'+job_name+'.json' | |
language_code = 'zh-CN' | |
# Upload the file to an S3 bucket | |
audio_input_bucket_name = "lingo-audio-material" | |
audio_input_s3_key = "questions/tmp-question-from-huggingface.wav" | |
s3.upload_file(file_name, audio_input_bucket_name, audio_input_s3_key) | |
# Construct the S3 bucket URI | |
s3_uri = f"s3://{audio_input_bucket_name}/{audio_input_s3_key}" | |
response = transcribe.list_transcription_jobs() | |
# Iterate through the jobs and print their names | |
for job in response['TranscriptionJobSummaries']: | |
print(job['TranscriptionJobName']) | |
if job['TranscriptionJobName'] == job_name: | |
response = transcribe.delete_transcription_job(TranscriptionJobName=job_name) | |
print("delete transcribe job response:"+str(response)) | |
# Create the transcription job | |
response = transcribe.start_transcription_job( | |
TranscriptionJobName=job_name, | |
Media={'MediaFileUri': s3_uri}, | |
MediaFormat='wav', | |
LanguageCode=language_code, | |
OutputBucketName=text_output_bucket, | |
OutputKey=text_output_key | |
) | |
print("start transcribe job response:"+str(response)) | |
job_name = response["TranscriptionJob"]["TranscriptionJobName"] | |
# Wait for the transcription job to complete | |
while True: | |
status = transcribe.get_transcription_job(TranscriptionJobName=job_name)['TranscriptionJob']['TranscriptionJobStatus'] | |
if status in ['COMPLETED', 'FAILED']: | |
break | |
print("Transcription job still in progress...") | |
time.sleep(1) | |
# Get the transcript | |
#transcript = transcribe.get_transcription_job(TranscriptionJobName=job_name) | |
transcript_uri = transcribe.get_transcription_job(TranscriptionJobName=job_name)['TranscriptionJob']['Transcript']['TranscriptFileUri'] | |
print("transcript uri: " + str(transcript_uri)) | |
transcript_file_content = s3.get_object(Bucket=text_output_bucket, Key=text_output_key)['Body'].read().decode('utf-8') | |
print(transcript_file_content) | |
json_data = json.loads(transcript_file_content) | |
# Extract the transcript value | |
transcript_text = json_data['results']['transcripts'][0]['transcript'] | |
return transcript_text | |
def process_audio(audio, history=[]): | |
text = transcribe_func(audio) | |
return predict(text, history) | |
with gr.Blocks(css="#chatbot{height:350px} .overflow-y-auto{height:500px}") as demo: | |
chatbot = gr.Chatbot(elem_id="chatbot") | |
state = gr.State([]) | |
with gr.Row(): | |
txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False) | |
with gr.Row(): | |
audio_input = gr.Audio(source="microphone", type="filepath", label="Audio Input") | |
with gr.Row(): | |
audio_output = gr.Audio(type="filepath", label="Audio Output", elem_id="speaker", interactive=False) | |
#audio_html = gr.HTML() | |
txt.submit(predict_text_only, [txt, state], [chatbot, audio_output, state]) | |
audio_input.change(process_audio, [audio_input, state], [chatbot, audio_output, state]) | |
demo.launch(debug=True) | |