EC2 Default User commited on
Commit
e31cd83
1 Parent(s): 17600ff

add video function

Browse files
Files changed (1) hide show
  1. app.py +57 -13
app.py CHANGED
@@ -49,14 +49,14 @@ def wav_to_pcm(input_file, output_file):
49
 
50
 
51
  openai.api_key = os.environ["OPENAI_API_KEY"]
 
 
52
 
53
  polly = boto3.client('polly', region_name='us-east-1')
54
  s3 = boto3.client('s3')
55
  transcribe = boto3.client('transcribe')
56
 
57
 
58
- avatar_url = "https://create-images-results.d-id.com/DefaultPresenters/Magen_f/image.jpeg"
59
-
60
  #memory = ConversationSummaryBufferMemory(llm=ChatOpenAI(), max_token_limit=2048)
61
  memory = ConversationBufferWindowMemory(k=5)
62
  conversation = ConversationChain(
@@ -179,17 +179,61 @@ def play_s3_voice(text):
179
  #tmp_aud_file_url = output_file
180
  #htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3" autoplay></audio>'
181
  #audio_htm = gr.HTML(htm_audio)
182
- return output_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
  def predict(input, history=[]):
185
- history.append(input)
186
- response = conversation.predict(input=input)
187
- print("GPT response: "+response)
188
- history.append(response)
189
- audio_file = play_s3_voice(response)
190
- responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
191
- print("all historical responses: "+str(responses))
192
- return responses, audio_file, history
 
 
 
 
 
 
193
 
194
  def transcribe_func_new(audio):
195
  audio_file = open(audio, "rb")
@@ -302,7 +346,7 @@ with gr.Blocks(css="#chatbot{height:350px} .overflow-y-auto{height:500px}") as d
302
  with gr.Row():
303
  video = gr.HTML(f'<img src="{avatar_url}" width="320" height="240" alt="John Carmack">', live=False)
304
 
305
- txt.submit(predict, [txt, state], [chatbot, audio_output, state])
306
- audio_input.change(process_audio, [audio_input, state], [chatbot, audio_output, state])
307
 
308
  demo.launch(debug=True)
 
49
 
50
 
51
  openai.api_key = os.environ["OPENAI_API_KEY"]
52
+ did_api_key = os.environ["DID_API_KEY"]
53
+ avatar_url = "https://create-images-results.d-id.com/DefaultPresenters/Magen_f/image.jpeg"
54
 
55
  polly = boto3.client('polly', region_name='us-east-1')
56
  s3 = boto3.client('s3')
57
  transcribe = boto3.client('transcribe')
58
 
59
 
 
 
60
  #memory = ConversationSummaryBufferMemory(llm=ChatOpenAI(), max_token_limit=2048)
61
  memory = ConversationBufferWindowMemory(k=5)
62
  conversation = ConversationChain(
 
179
  #tmp_aud_file_url = output_file
180
  #htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3" autoplay></audio>'
181
  #audio_htm = gr.HTML(htm_audio)
182
+ return output_file, mp3_pre_signed_url
183
+
184
+ def generate_talk_with_audio(input, avatar_url, api_key = did_api_key):
185
+ url = "https://api.d-id.com/talks"
186
+ payload = {
187
+ "script": {
188
+ "type": "audio",
189
+ "audio_url": input
190
+ },
191
+ "config": {
192
+ "auto_match": "true",
193
+ "result_format": "mp4"
194
+ },
195
+ "source_url": avatar_url
196
+ }
197
+ headers = {
198
+ "accept": "application/json",
199
+ "content-type": "application/json",
200
+ "authorization": "Basic " + api_key
201
+ }
202
+
203
+ response = requests.post(url, json=payload, headers=headers)
204
+ return response.json()
205
+
206
+ def get_mp4_video(input, avatar_url=avatar_url):
207
+ response = generate_talk_with_audio(input=input, avatar_url=avatar_url)
208
+ talk = get_a_talk(response['id'])
209
+ video_url = ""
210
+ index = 0
211
+ while index < 30:
212
+ index += 1
213
+ if 'result_url' in talk:
214
+ video_url = talk['result_url']
215
+ return video_url
216
+ else:
217
+ time.sleep(1)
218
+ talk = get_a_talk(response['id'])
219
+ return video_url
220
+
221
 
222
  def predict(input, history=[]):
223
+ if input is not None:
224
+ history.append(input)
225
+ response = conversation.predict(input=input)
226
+ audio_file, pre_signed_url = play_s3_voice(response)
227
+
228
+ video_url = get_mp4_video(input=pre_signed_url, avatar_url=avatar_url)
229
+ video_html = f"""<video width="320" height="240" controls autoplay><source src="{video_url}" type="video/mp4"></video>"""
230
+ history.append(response)
231
+ responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
232
+ return responses, audio_file, video_html, history
233
+ else:
234
+ video_html = f'<img src="{avatar_url}" width="320" height="240" alt="John Carmack">'
235
+ responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
236
+ return responses, audio_file, video_html, history
237
 
238
  def transcribe_func_new(audio):
239
  audio_file = open(audio, "rb")
 
346
  with gr.Row():
347
  video = gr.HTML(f'<img src="{avatar_url}" width="320" height="240" alt="John Carmack">', live=False)
348
 
349
+ txt.submit(predict, [txt, state], [chatbot, audio_output, video, state])
350
+ audio_input.change(process_audio, [audio_input, state], [chatbot, audio_output, video, state])
351
 
352
  demo.launch(debug=True)