Spaces:

xdstone1
/

ai-bot-demo

Runtime error

App Files Files Community

EC2 Default User commited on Jun 5, 2023

Commit

e31cd83

1 Parent(s): 17600ff

add video function

Browse files

Files changed (1) hide show

app.py +57 -13

app.py CHANGED Viewed

@@ -49,14 +49,14 @@ def wav_to_pcm(input_file, output_file):
 openai.api_key = os.environ["OPENAI_API_KEY"]
 polly = boto3.client('polly', region_name='us-east-1')
 s3 = boto3.client('s3')
 transcribe = boto3.client('transcribe')
-avatar_url = "https://create-images-results.d-id.com/DefaultPresenters/Magen_f/image.jpeg"
 #memory = ConversationSummaryBufferMemory(llm=ChatOpenAI(), max_token_limit=2048)
 memory = ConversationBufferWindowMemory(k=5)
 conversation = ConversationChain(
@@ -179,17 +179,61 @@ def play_s3_voice(text):
     #tmp_aud_file_url = output_file
     #htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3" autoplay></audio>'
     #audio_htm = gr.HTML(htm_audio)
-    return output_file
 def predict(input, history=[]):
-    history.append(input)
-    response = conversation.predict(input=input)
-    print("GPT response: "+response)
-    history.append(response)
-    audio_file = play_s3_voice(response)
-    responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
-    print("all historical responses: "+str(responses))
-    return responses, audio_file, history
 def transcribe_func_new(audio):
     audio_file = open(audio, "rb")
@@ -302,7 +346,7 @@ with gr.Blocks(css="#chatbot{height:350px} .overflow-y-auto{height:500px}") as d
     with gr.Row():
         video = gr.HTML(f'<img src="{avatar_url}" width="320" height="240" alt="John Carmack">', live=False)
-    txt.submit(predict, [txt, state], [chatbot, audio_output, state])
-    audio_input.change(process_audio, [audio_input, state], [chatbot, audio_output, state])
 demo.launch(debug=True)

 openai.api_key = os.environ["OPENAI_API_KEY"]
+did_api_key = os.environ["DID_API_KEY"]
+avatar_url = "https://create-images-results.d-id.com/DefaultPresenters/Magen_f/image.jpeg"
 polly = boto3.client('polly', region_name='us-east-1')
 s3 = boto3.client('s3')
 transcribe = boto3.client('transcribe')
 #memory = ConversationSummaryBufferMemory(llm=ChatOpenAI(), max_token_limit=2048)
 memory = ConversationBufferWindowMemory(k=5)
 conversation = ConversationChain(
     #tmp_aud_file_url = output_file
     #htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3" autoplay></audio>'
     #audio_htm = gr.HTML(htm_audio)
+    return output_file, mp3_pre_signed_url
+def generate_talk_with_audio(input, avatar_url, api_key = did_api_key):
+    url = "https://api.d-id.com/talks"
+    payload = {
+        "script": {
+            "type": "audio",
+            "audio_url": input
+        },
+        "config": {
+            "auto_match": "true",
+            "result_format": "mp4"
+        },
+        "source_url": avatar_url
+    }
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+        "authorization": "Basic " + api_key
+    }
+    response = requests.post(url, json=payload, headers=headers)
+    return response.json()
+def get_mp4_video(input, avatar_url=avatar_url):
+    response = generate_talk_with_audio(input=input, avatar_url=avatar_url)
+    talk = get_a_talk(response['id'])
+    video_url = ""
+    index = 0
+    while index < 30:
+        index += 1
+        if 'result_url' in talk:
+            video_url = talk['result_url']
+            return video_url
+        else:
+            time.sleep(1)
+            talk = get_a_talk(response['id'])
+    return video_url
 def predict(input, history=[]):
+    if input is not None:
+        history.append(input)
+        response = conversation.predict(input=input)
+        audio_file, pre_signed_url = play_s3_voice(response)
+        video_url = get_mp4_video(input=pre_signed_url, avatar_url=avatar_url)
+        video_html = f"""<video width="320" height="240" controls autoplay><source src="{video_url}" type="video/mp4"></video>"""
+        history.append(response)
+        responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
+        return responses, audio_file, video_html, history
+    else:
+        video_html = f'<img src="{avatar_url}" width="320" height="240" alt="John Carmack">'
+        responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
+        return responses, audio_file, video_html, history
 def transcribe_func_new(audio):
     audio_file = open(audio, "rb")
     with gr.Row():
         video = gr.HTML(f'<img src="{avatar_url}" width="320" height="240" alt="John Carmack">', live=False)
+    txt.submit(predict, [txt, state], [chatbot, audio_output, video, state])
+    audio_input.change(process_audio, [audio_input, state], [chatbot, audio_output, video, state])
 demo.launch(debug=True)