Spaces:
Runtime error
Runtime error
EC2 Default User
commited on
Commit
•
e31cd83
1
Parent(s):
17600ff
add video function
Browse files
app.py
CHANGED
@@ -49,14 +49,14 @@ def wav_to_pcm(input_file, output_file):
|
|
49 |
|
50 |
|
51 |
openai.api_key = os.environ["OPENAI_API_KEY"]
|
|
|
|
|
52 |
|
53 |
polly = boto3.client('polly', region_name='us-east-1')
|
54 |
s3 = boto3.client('s3')
|
55 |
transcribe = boto3.client('transcribe')
|
56 |
|
57 |
|
58 |
-
avatar_url = "https://create-images-results.d-id.com/DefaultPresenters/Magen_f/image.jpeg"
|
59 |
-
|
60 |
#memory = ConversationSummaryBufferMemory(llm=ChatOpenAI(), max_token_limit=2048)
|
61 |
memory = ConversationBufferWindowMemory(k=5)
|
62 |
conversation = ConversationChain(
|
@@ -179,17 +179,61 @@ def play_s3_voice(text):
|
|
179 |
#tmp_aud_file_url = output_file
|
180 |
#htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3" autoplay></audio>'
|
181 |
#audio_htm = gr.HTML(htm_audio)
|
182 |
-
return output_file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
|
184 |
def predict(input, history=[]):
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
|
194 |
def transcribe_func_new(audio):
|
195 |
audio_file = open(audio, "rb")
|
@@ -302,7 +346,7 @@ with gr.Blocks(css="#chatbot{height:350px} .overflow-y-auto{height:500px}") as d
|
|
302 |
with gr.Row():
|
303 |
video = gr.HTML(f'<img src="{avatar_url}" width="320" height="240" alt="John Carmack">', live=False)
|
304 |
|
305 |
-
txt.submit(predict, [txt, state], [chatbot, audio_output, state])
|
306 |
-
audio_input.change(process_audio, [audio_input, state], [chatbot, audio_output, state])
|
307 |
|
308 |
demo.launch(debug=True)
|
|
|
49 |
|
50 |
|
51 |
openai.api_key = os.environ["OPENAI_API_KEY"]
|
52 |
+
did_api_key = os.environ["DID_API_KEY"]
|
53 |
+
avatar_url = "https://create-images-results.d-id.com/DefaultPresenters/Magen_f/image.jpeg"
|
54 |
|
55 |
polly = boto3.client('polly', region_name='us-east-1')
|
56 |
s3 = boto3.client('s3')
|
57 |
transcribe = boto3.client('transcribe')
|
58 |
|
59 |
|
|
|
|
|
60 |
#memory = ConversationSummaryBufferMemory(llm=ChatOpenAI(), max_token_limit=2048)
|
61 |
memory = ConversationBufferWindowMemory(k=5)
|
62 |
conversation = ConversationChain(
|
|
|
179 |
#tmp_aud_file_url = output_file
|
180 |
#htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3" autoplay></audio>'
|
181 |
#audio_htm = gr.HTML(htm_audio)
|
182 |
+
return output_file, mp3_pre_signed_url
|
183 |
+
|
184 |
+
def generate_talk_with_audio(input, avatar_url, api_key = did_api_key):
|
185 |
+
url = "https://api.d-id.com/talks"
|
186 |
+
payload = {
|
187 |
+
"script": {
|
188 |
+
"type": "audio",
|
189 |
+
"audio_url": input
|
190 |
+
},
|
191 |
+
"config": {
|
192 |
+
"auto_match": "true",
|
193 |
+
"result_format": "mp4"
|
194 |
+
},
|
195 |
+
"source_url": avatar_url
|
196 |
+
}
|
197 |
+
headers = {
|
198 |
+
"accept": "application/json",
|
199 |
+
"content-type": "application/json",
|
200 |
+
"authorization": "Basic " + api_key
|
201 |
+
}
|
202 |
+
|
203 |
+
response = requests.post(url, json=payload, headers=headers)
|
204 |
+
return response.json()
|
205 |
+
|
206 |
+
def get_mp4_video(input, avatar_url=avatar_url):
|
207 |
+
response = generate_talk_with_audio(input=input, avatar_url=avatar_url)
|
208 |
+
talk = get_a_talk(response['id'])
|
209 |
+
video_url = ""
|
210 |
+
index = 0
|
211 |
+
while index < 30:
|
212 |
+
index += 1
|
213 |
+
if 'result_url' in talk:
|
214 |
+
video_url = talk['result_url']
|
215 |
+
return video_url
|
216 |
+
else:
|
217 |
+
time.sleep(1)
|
218 |
+
talk = get_a_talk(response['id'])
|
219 |
+
return video_url
|
220 |
+
|
221 |
|
222 |
def predict(input, history=[]):
|
223 |
+
if input is not None:
|
224 |
+
history.append(input)
|
225 |
+
response = conversation.predict(input=input)
|
226 |
+
audio_file, pre_signed_url = play_s3_voice(response)
|
227 |
+
|
228 |
+
video_url = get_mp4_video(input=pre_signed_url, avatar_url=avatar_url)
|
229 |
+
video_html = f"""<video width="320" height="240" controls autoplay><source src="{video_url}" type="video/mp4"></video>"""
|
230 |
+
history.append(response)
|
231 |
+
responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
|
232 |
+
return responses, audio_file, video_html, history
|
233 |
+
else:
|
234 |
+
video_html = f'<img src="{avatar_url}" width="320" height="240" alt="John Carmack">'
|
235 |
+
responses = [(u,b) for u,b in zip(history[::2], history[1::2])]
|
236 |
+
return responses, audio_file, video_html, history
|
237 |
|
238 |
def transcribe_func_new(audio):
|
239 |
audio_file = open(audio, "rb")
|
|
|
346 |
with gr.Row():
|
347 |
video = gr.HTML(f'<img src="{avatar_url}" width="320" height="240" alt="John Carmack">', live=False)
|
348 |
|
349 |
+
txt.submit(predict, [txt, state], [chatbot, audio_output, video, state])
|
350 |
+
audio_input.change(process_audio, [audio_input, state], [chatbot, audio_output, video, state])
|
351 |
|
352 |
demo.launch(debug=True)
|