radames commited on
Commit
505b98a
·
1 Parent(s): 43333ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -63
app.py CHANGED
@@ -238,73 +238,78 @@ css = """
238
  #cut_btn, #reset_btn { align-self:stretch; }
239
  #\\31 3 { max-width: 540px; }
240
  .output-markdown {max-width: 65ch !important;}
 
 
 
 
241
  """
242
  with gr.Blocks(css=css) as demo:
243
- transcription_var = gr.State()
244
- timestamps_var = gr.State()
245
- with gr.Row():
246
- with gr.Column():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  gr.Markdown("""
248
- # Edit Video By Editing Text
249
- This project is a quick proof of concept of a simple video editor where the edits
250
- are made by editing the audio transcription.
251
- Using the [Huggingface Automatic Speech Recognition Pipeline](https://huggingface.co/tasks/automatic-speech-recognition)
252
- with a fine tuned [Wav2Vec2 model using Connectionist Temporal Classification (CTC)](https://huggingface.co/facebook/wav2vec2-large-960h-lv60-self)
253
- you can predict not only the text transcription but also the [character or word base timestamps](https://huggingface.co/docs/transformers/v4.19.2/en/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__.return_timestamps)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  """)
255
-
256
- with gr.Row():
257
-
258
- examples.render()
259
-
260
- def load_example(id):
261
- video = SAMPLES[id]['video']
262
- transcription = SAMPLES[id]['transcription'].lower()
263
- timestamps = SAMPLES[id]['timestamps']
264
-
265
- return (video, transcription, transcription, timestamps)
266
-
267
- examples.click(
268
- load_example,
269
- inputs=[examples],
270
- outputs=[video_in, text_in, transcription_var, timestamps_var],
271
- queue=False)
272
- with gr.Row():
273
- with gr.Column():
274
- video_in.render()
275
- transcribe_btn = gr.Button("Transcribe Audio")
276
- transcribe_btn.click(speech_to_text, [video_in], [
277
- text_in, transcription_var, timestamps_var])
278
-
279
- with gr.Row():
280
- gr.Markdown("""
281
- ### Now edit as text
282
- After running the video transcription, you can make cuts to the text below (only cuts, not additions!)""")
283
-
284
- with gr.Row():
285
- with gr.Column():
286
- text_in.render()
287
- with gr.Row():
288
- cut_btn = gr.Button("Cut to video", elem_id="cut_btn")
289
- # send audio path and hidden variables
290
- cut_btn.click(cut_timestamps_to_video, [
291
- video_in, transcription_var, text_in, timestamps_var], [diff_out, video_out])
292
-
293
- reset_transcription = gr.Button(
294
- "Reset to last trascription", elem_id="reset_btn")
295
- reset_transcription.click(
296
- lambda x: x, transcription_var, text_in)
297
- with gr.Column():
298
- video_out.render()
299
- diff_out.render()
300
- with gr.Row():
301
- gr.Markdown("""
302
- #### Video Credits
303
-
304
- 1. [Cooking](https://vimeo.com/573792389)
305
- 1. [Shia LaBeouf "Just Do It"](https://www.youtube.com/watch?v=n2lTxIk_Dr0)
306
- 1. [Mark Zuckerberg & Yuval Noah Harari in Conversation](https://www.youtube.com/watch?v=Boj9eD0Wug8)
307
- """)
308
  demo.queue()
309
  if __name__ == "__main__":
310
  demo.launch(debug=True)
 
238
  #cut_btn, #reset_btn { align-self:stretch; }
239
  #\\31 3 { max-width: 540px; }
240
  .output-markdown {max-width: 65ch !important;}
241
+ #container{
242
+ margin: 0 auto;
243
+ max-width: 40rem;
244
+ }
245
  """
246
  with gr.Blocks(css=css) as demo:
247
+ with gr.Column(elem_id="container"):
248
+ transcription_var = gr.State()
249
+ timestamps_var = gr.State()
250
+ with gr.Row():
251
+ with gr.Column():
252
+ gr.Markdown("""
253
+ # Edit Video By Editing Text
254
+ This project is a quick proof of concept of a simple video editor where the edits
255
+ are made by editing the audio transcription.
256
+ Using the [Huggingface Automatic Speech Recognition Pipeline](https://huggingface.co/tasks/automatic-speech-recognition)
257
+ with a fine tuned [Wav2Vec2 model using Connectionist Temporal Classification (CTC)](https://huggingface.co/facebook/wav2vec2-large-960h-lv60-self)
258
+ you can predict not only the text transcription but also the [character or word base timestamps](https://huggingface.co/docs/transformers/v4.19.2/en/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__.return_timestamps)
259
+ """)
260
+
261
+ with gr.Row():
262
+
263
+ examples.render()
264
+
265
+ def load_example(id):
266
+ video = SAMPLES[id]['video']
267
+ transcription = SAMPLES[id]['transcription'].lower()
268
+ timestamps = SAMPLES[id]['timestamps']
269
+
270
+ return (video, transcription, transcription, timestamps)
271
+
272
+ examples.click(
273
+ load_example,
274
+ inputs=[examples],
275
+ outputs=[video_in, text_in, transcription_var, timestamps_var],
276
+ queue=False)
277
+ with gr.Row():
278
+ with gr.Column():
279
+ video_in.render()
280
+ transcribe_btn = gr.Button("Transcribe Audio")
281
+ transcribe_btn.click(speech_to_text, [video_in], [
282
+ text_in, transcription_var, timestamps_var])
283
+
284
+ with gr.Row():
285
  gr.Markdown("""
286
+ ### Now edit as text
287
+ After running the video transcription, you can make cuts to the text below (only cuts, not additions!)""")
288
+
289
+ with gr.Row():
290
+ with gr.Column():
291
+ text_in.render()
292
+ with gr.Row():
293
+ cut_btn = gr.Button("Cut to video", elem_id="cut_btn")
294
+ # send audio path and hidden variables
295
+ cut_btn.click(cut_timestamps_to_video, [
296
+ video_in, transcription_var, text_in, timestamps_var], [diff_out, video_out])
297
+
298
+ reset_transcription = gr.Button(
299
+ "Reset to last trascription", elem_id="reset_btn")
300
+ reset_transcription.click(
301
+ lambda x: x, transcription_var, text_in)
302
+ with gr.Column():
303
+ video_out.render()
304
+ diff_out.render()
305
+ with gr.Row():
306
+ gr.Markdown("""
307
+ #### Video Credits
308
+
309
+ 1. [Cooking](https://vimeo.com/573792389)
310
+ 1. [Shia LaBeouf "Just Do It"](https://www.youtube.com/watch?v=n2lTxIk_Dr0)
311
+ 1. [Mark Zuckerberg & Yuval Noah Harari in Conversation](https://www.youtube.com/watch?v=Boj9eD0Wug8)
312
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  demo.queue()
314
  if __name__ == "__main__":
315
  demo.launch(debug=True)