update example
Browse files- example_usage.ipynb +262 -36
example_usage.ipynb
CHANGED
@@ -2,31 +2,24 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"id": "5920c653-448e-43b3-93eb-12d7073ad352",
|
7 |
"metadata": {
|
8 |
"tags": []
|
9 |
},
|
10 |
-
"outputs": [
|
11 |
-
{
|
12 |
-
"name": "stderr",
|
13 |
-
"output_type": "stream",
|
14 |
-
"text": [
|
15 |
-
"/opt/espnet/tools/anaconda/envs/espnet/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
16 |
-
" from .autonotebook import tqdm as notebook_tqdm\n"
|
17 |
-
]
|
18 |
-
}
|
19 |
-
],
|
20 |
"source": [
|
21 |
-
"
|
22 |
-
"from espnet2.bin.asr_align import CTCSegmentation\n",
|
23 |
"import soundfile\n",
|
24 |
-
"import pandas as pd"
|
|
|
|
|
|
|
25 |
]
|
26 |
},
|
27 |
{
|
28 |
"cell_type": "code",
|
29 |
-
"execution_count":
|
30 |
"id": "83058587-1a8a-4b01-92ff-e9125fbe55a3",
|
31 |
"metadata": {
|
32 |
"tags": []
|
@@ -47,14 +40,15 @@
|
|
47 |
},
|
48 |
{
|
49 |
"cell_type": "code",
|
50 |
-
"execution_count":
|
51 |
"id": "5e4670d6-0949-48cf-b6b1-d9cc4cf3ad65",
|
52 |
"metadata": {
|
53 |
"tags": []
|
54 |
},
|
55 |
"outputs": [],
|
56 |
"source": [
|
57 |
-
"
|
|
|
58 |
]
|
59 |
},
|
60 |
{
|
@@ -69,7 +63,7 @@
|
|
69 |
},
|
70 |
{
|
71 |
"cell_type": "code",
|
72 |
-
"execution_count":
|
73 |
"id": "e8120e8e-3718-4a1a-ab7a-46ef98a6bc11",
|
74 |
"metadata": {
|
75 |
"tags": []
|
@@ -82,7 +76,7 @@
|
|
82 |
},
|
83 |
{
|
84 |
"cell_type": "code",
|
85 |
-
"execution_count":
|
86 |
"id": "eec8d4b2-c27a-4780-aeed-8aa7538f70e5",
|
87 |
"metadata": {
|
88 |
"tags": []
|
@@ -92,8 +86,8 @@
|
|
92 |
"name": "stdout",
|
93 |
"output_type": "stream",
|
94 |
"text": [
|
95 |
-
"CPU times: user
|
96 |
-
"Wall time:
|
97 |
]
|
98 |
}
|
99 |
],
|
@@ -103,7 +97,7 @@
|
|
103 |
},
|
104 |
{
|
105 |
"cell_type": "code",
|
106 |
-
"execution_count":
|
107 |
"id": "39f41a8b-94c3-42d6-a989-6c7183a6f94d",
|
108 |
"metadata": {
|
109 |
"tags": []
|
@@ -123,7 +117,7 @@
|
|
123 |
},
|
124 |
{
|
125 |
"cell_type": "code",
|
126 |
-
"execution_count":
|
127 |
"id": "812060a6-90de-4134-8d1f-9f3d98853bc2",
|
128 |
"metadata": {
|
129 |
"tags": []
|
@@ -224,7 +218,7 @@
|
|
224 |
},
|
225 |
{
|
226 |
"cell_type": "code",
|
227 |
-
"execution_count":
|
228 |
"id": "ae9f7e3f-b75d-4bcb-98d1-ae2f037fb4af",
|
229 |
"metadata": {
|
230 |
"tags": []
|
@@ -244,7 +238,7 @@
|
|
244 |
},
|
245 |
{
|
246 |
"cell_type": "code",
|
247 |
-
"execution_count":
|
248 |
"id": "0215d312-1896-43f1-9782-c92aced787b7",
|
249 |
"metadata": {
|
250 |
"tags": []
|
@@ -254,8 +248,8 @@
|
|
254 |
"name": "stdout",
|
255 |
"output_type": "stream",
|
256 |
"text": [
|
257 |
-
"CPU times: user
|
258 |
-
"Wall time:
|
259 |
]
|
260 |
}
|
261 |
],
|
@@ -268,7 +262,7 @@
|
|
268 |
},
|
269 |
{
|
270 |
"cell_type": "code",
|
271 |
-
"execution_count":
|
272 |
"id": "d31d6840-3a80-411a-969c-05f4a5e3e9a1",
|
273 |
"metadata": {
|
274 |
"tags": []
|
@@ -506,20 +500,252 @@
|
|
506 |
]
|
507 |
},
|
508 |
{
|
509 |
-
"cell_type": "
|
510 |
-
"
|
511 |
-
"id": "7a4be2b1-5e0f-4558-8097-b37be0b83785",
|
512 |
"metadata": {},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
513 |
"outputs": [],
|
514 |
-
"source": [
|
|
|
|
|
515 |
},
|
516 |
{
|
517 |
"cell_type": "code",
|
518 |
-
"execution_count":
|
519 |
-
"id": "
|
520 |
-
"metadata": {
|
|
|
|
|
521 |
"outputs": [],
|
522 |
-
"source": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
523 |
}
|
524 |
],
|
525 |
"metadata": {
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 16,
|
6 |
"id": "5920c653-448e-43b3-93eb-12d7073ad352",
|
7 |
"metadata": {
|
8 |
"tags": []
|
9 |
},
|
10 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
"source": [
|
12 |
+
"import time\n",
|
|
|
13 |
"import soundfile\n",
|
14 |
+
"import pandas as pd\n",
|
15 |
+
"import matplotlib.pyplot as plt\n",
|
16 |
+
"from espnet2.bin.asr_inference import Speech2Text\n",
|
17 |
+
"from espnet2.bin.asr_align import CTCSegmentation"
|
18 |
]
|
19 |
},
|
20 |
{
|
21 |
"cell_type": "code",
|
22 |
+
"execution_count": 5,
|
23 |
"id": "83058587-1a8a-4b01-92ff-e9125fbe55a3",
|
24 |
"metadata": {
|
25 |
"tags": []
|
|
|
40 |
},
|
41 |
{
|
42 |
"cell_type": "code",
|
43 |
+
"execution_count": 44,
|
44 |
"id": "5e4670d6-0949-48cf-b6b1-d9cc4cf3ad65",
|
45 |
"metadata": {
|
46 |
"tags": []
|
47 |
},
|
48 |
"outputs": [],
|
49 |
"source": [
|
50 |
+
"#longer beam size take more time but is more accurate, default is 20\n",
|
51 |
+
"speech2text = Speech2Text(\"exp/config.yaml\", \"exp/valid.acc.ave_10best.pth\", quantize_asr_model=True, quantize_lm=True, beam_size=10)"
|
52 |
]
|
53 |
},
|
54 |
{
|
|
|
63 |
},
|
64 |
{
|
65 |
"cell_type": "code",
|
66 |
+
"execution_count": 45,
|
67 |
"id": "e8120e8e-3718-4a1a-ab7a-46ef98a6bc11",
|
68 |
"metadata": {
|
69 |
"tags": []
|
|
|
76 |
},
|
77 |
{
|
78 |
"cell_type": "code",
|
79 |
+
"execution_count": 46,
|
80 |
"id": "eec8d4b2-c27a-4780-aeed-8aa7538f70e5",
|
81 |
"metadata": {
|
82 |
"tags": []
|
|
|
86 |
"name": "stdout",
|
87 |
"output_type": "stream",
|
88 |
"text": [
|
89 |
+
"CPU times: user 1.71 s, sys: 9.89 ms, total: 1.72 s\n",
|
90 |
+
"Wall time: 1.75 s\n"
|
91 |
]
|
92 |
}
|
93 |
],
|
|
|
97 |
},
|
98 |
{
|
99 |
"cell_type": "code",
|
100 |
+
"execution_count": 47,
|
101 |
"id": "39f41a8b-94c3-42d6-a989-6c7183a6f94d",
|
102 |
"metadata": {
|
103 |
"tags": []
|
|
|
117 |
},
|
118 |
{
|
119 |
"cell_type": "code",
|
120 |
+
"execution_count": 32,
|
121 |
"id": "812060a6-90de-4134-8d1f-9f3d98853bc2",
|
122 |
"metadata": {
|
123 |
"tags": []
|
|
|
218 |
},
|
219 |
{
|
220 |
"cell_type": "code",
|
221 |
+
"execution_count": 25,
|
222 |
"id": "ae9f7e3f-b75d-4bcb-98d1-ae2f037fb4af",
|
223 |
"metadata": {
|
224 |
"tags": []
|
|
|
238 |
},
|
239 |
{
|
240 |
"cell_type": "code",
|
241 |
+
"execution_count": 26,
|
242 |
"id": "0215d312-1896-43f1-9782-c92aced787b7",
|
243 |
"metadata": {
|
244 |
"tags": []
|
|
|
248 |
"name": "stdout",
|
249 |
"output_type": "stream",
|
250 |
"text": [
|
251 |
+
"CPU times: user 1.68 s, sys: 0 ns, total: 1.68 s\n",
|
252 |
+
"Wall time: 1.68 s\n"
|
253 |
]
|
254 |
}
|
255 |
],
|
|
|
262 |
},
|
263 |
{
|
264 |
"cell_type": "code",
|
265 |
+
"execution_count": 27,
|
266 |
"id": "d31d6840-3a80-411a-969c-05f4a5e3e9a1",
|
267 |
"metadata": {
|
268 |
"tags": []
|
|
|
500 |
]
|
501 |
},
|
502 |
{
|
503 |
+
"cell_type": "markdown",
|
504 |
+
"id": "6288dbee-b84b-4465-829e-978352a9f0e7",
|
|
|
505 |
"metadata": {},
|
506 |
+
"source": [
|
507 |
+
"## Chunk audio to see how long audio increases transcripton time significantly"
|
508 |
+
]
|
509 |
+
},
|
510 |
+
{
|
511 |
+
"cell_type": "code",
|
512 |
+
"execution_count": 1,
|
513 |
+
"id": "6e7af387-d4bf-486e-a12a-9689242793fe",
|
514 |
+
"metadata": {
|
515 |
+
"tags": []
|
516 |
+
},
|
517 |
"outputs": [],
|
518 |
+
"source": [
|
519 |
+
"from subprocess import Popen, PIPE"
|
520 |
+
]
|
521 |
},
|
522 |
{
|
523 |
"cell_type": "code",
|
524 |
+
"execution_count": 7,
|
525 |
+
"id": "0d51f384-4e1d-435f-993e-351af6bc42ff",
|
526 |
+
"metadata": {
|
527 |
+
"tags": []
|
528 |
+
},
|
529 |
"outputs": [],
|
530 |
+
"source": [
|
531 |
+
"def chunk_audio(src_file, to_file, start, end):\n",
|
532 |
+
" proc = Popen(['sox', src_file, to_file, 'trim', str(start), f'={end}'], stdout=PIPE, stderr=PIPE)\n",
|
533 |
+
" stdout, stderr = proc.communicate()\n",
|
534 |
+
" return stdout, stderr\n",
|
535 |
+
"\n",
|
536 |
+
"from_file='example_audio/oden_kypsis16k.wav'\n",
|
537 |
+
"to_files=[]\n",
|
538 |
+
"for i in range(5, 31):\n",
|
539 |
+
" to_file=f'example_audio/chunks/oden_kypsis16k_chunk_{i}.wav'\n",
|
540 |
+
" chunk_audio(from_file, to_file, 0, i)\n",
|
541 |
+
" to_files.append(to_file)"
|
542 |
+
]
|
543 |
+
},
|
544 |
+
{
|
545 |
+
"cell_type": "code",
|
546 |
+
"execution_count": 38,
|
547 |
+
"id": "9aad1658-bdbc-479c-b1f9-89e52c6c2487",
|
548 |
+
"metadata": {
|
549 |
+
"tags": []
|
550 |
+
},
|
551 |
+
"outputs": [],
|
552 |
+
"source": [
|
553 |
+
"chunk_times=[]\n",
|
554 |
+
"for file in to_files:\n",
|
555 |
+
" speech, rate = soundfile.read(file)\n",
|
556 |
+
" assert rate == 16000\n",
|
557 |
+
" start=time.time()\n",
|
558 |
+
" text, *_ = speech2text(speech)\n",
|
559 |
+
" end=time.time()\n",
|
560 |
+
" duration=end-start\n",
|
561 |
+
" chunk_times.append([file, text[0], duration, len(speech)/16000])\n",
|
562 |
+
"df_chunk_times=pd.DataFrame(chunk_times)"
|
563 |
+
]
|
564 |
+
},
|
565 |
+
{
|
566 |
+
"cell_type": "code",
|
567 |
+
"execution_count": 39,
|
568 |
+
"id": "9d3cd39b-9199-493c-a4d9-4084c92d844a",
|
569 |
+
"metadata": {
|
570 |
+
"tags": []
|
571 |
+
},
|
572 |
+
"outputs": [
|
573 |
+
{
|
574 |
+
"data": {
|
575 |
+
"text/html": [
|
576 |
+
"<div>\n",
|
577 |
+
"<style scoped>\n",
|
578 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
579 |
+
" vertical-align: middle;\n",
|
580 |
+
" }\n",
|
581 |
+
"\n",
|
582 |
+
" .dataframe tbody tr th {\n",
|
583 |
+
" vertical-align: top;\n",
|
584 |
+
" }\n",
|
585 |
+
"\n",
|
586 |
+
" .dataframe thead th {\n",
|
587 |
+
" text-align: right;\n",
|
588 |
+
" }\n",
|
589 |
+
"</style>\n",
|
590 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
591 |
+
" <thead>\n",
|
592 |
+
" <tr style=\"text-align: right;\">\n",
|
593 |
+
" <th></th>\n",
|
594 |
+
" <th>file</th>\n",
|
595 |
+
" <th>hyp</th>\n",
|
596 |
+
" <th>elapsed_time</th>\n",
|
597 |
+
" <th>audio_dur_sec</th>\n",
|
598 |
+
" <th>trans_time_audio_dur_share</th>\n",
|
599 |
+
" </tr>\n",
|
600 |
+
" </thead>\n",
|
601 |
+
" <tbody>\n",
|
602 |
+
" <tr>\n",
|
603 |
+
" <th>0</th>\n",
|
604 |
+
" <td>example_audio/chunks/oden_kypsis16k_chunk_5.wav</td>\n",
|
605 |
+
" <td>enamus ajast nagu klik</td>\n",
|
606 |
+
" <td>0.418611</td>\n",
|
607 |
+
" <td>5.0</td>\n",
|
608 |
+
" <td>0.083722</td>\n",
|
609 |
+
" </tr>\n",
|
610 |
+
" <tr>\n",
|
611 |
+
" <th>1</th>\n",
|
612 |
+
" <td>example_audio/chunks/oden_kypsis16k_chunk_6.wav</td>\n",
|
613 |
+
" <td>enamus ajast nagu klikid neid all</td>\n",
|
614 |
+
" <td>0.481883</td>\n",
|
615 |
+
" <td>6.0</td>\n",
|
616 |
+
" <td>0.080314</td>\n",
|
617 |
+
" </tr>\n",
|
618 |
+
" <tr>\n",
|
619 |
+
" <th>2</th>\n",
|
620 |
+
" <td>example_audio/chunks/oden_kypsis16k_chunk_7.wav</td>\n",
|
621 |
+
" <td>enamus ajast nagu klikid neid allserva tekivad</td>\n",
|
622 |
+
" <td>0.700862</td>\n",
|
623 |
+
" <td>7.0</td>\n",
|
624 |
+
" <td>0.100123</td>\n",
|
625 |
+
" </tr>\n",
|
626 |
+
" <tr>\n",
|
627 |
+
" <th>3</th>\n",
|
628 |
+
" <td>example_audio/chunks/oden_kypsis16k_chunk_8.wav</td>\n",
|
629 |
+
" <td>enamus ajast nagu klikid neid allserva tekivad...</td>\n",
|
630 |
+
" <td>0.839978</td>\n",
|
631 |
+
" <td>8.0</td>\n",
|
632 |
+
" <td>0.104997</td>\n",
|
633 |
+
" </tr>\n",
|
634 |
+
" <tr>\n",
|
635 |
+
" <th>4</th>\n",
|
636 |
+
" <td>example_audio/chunks/oden_kypsis16k_chunk_9.wav</td>\n",
|
637 |
+
" <td>enamus ajast nagu klikid neid allserva tekivad...</td>\n",
|
638 |
+
" <td>1.016149</td>\n",
|
639 |
+
" <td>9.0</td>\n",
|
640 |
+
" <td>0.112905</td>\n",
|
641 |
+
" </tr>\n",
|
642 |
+
" </tbody>\n",
|
643 |
+
"</table>\n",
|
644 |
+
"</div>"
|
645 |
+
],
|
646 |
+
"text/plain": [
|
647 |
+
" file \\\n",
|
648 |
+
"0 example_audio/chunks/oden_kypsis16k_chunk_5.wav \n",
|
649 |
+
"1 example_audio/chunks/oden_kypsis16k_chunk_6.wav \n",
|
650 |
+
"2 example_audio/chunks/oden_kypsis16k_chunk_7.wav \n",
|
651 |
+
"3 example_audio/chunks/oden_kypsis16k_chunk_8.wav \n",
|
652 |
+
"4 example_audio/chunks/oden_kypsis16k_chunk_9.wav \n",
|
653 |
+
"\n",
|
654 |
+
" hyp elapsed_time \\\n",
|
655 |
+
"0 enamus ajast nagu klik 0.418611 \n",
|
656 |
+
"1 enamus ajast nagu klikid neid all 0.481883 \n",
|
657 |
+
"2 enamus ajast nagu klikid neid allserva tekivad 0.700862 \n",
|
658 |
+
"3 enamus ajast nagu klikid neid allserva tekivad... 0.839978 \n",
|
659 |
+
"4 enamus ajast nagu klikid neid allserva tekivad... 1.016149 \n",
|
660 |
+
"\n",
|
661 |
+
" audio_dur_sec trans_time_audio_dur_share \n",
|
662 |
+
"0 5.0 0.083722 \n",
|
663 |
+
"1 6.0 0.080314 \n",
|
664 |
+
"2 7.0 0.100123 \n",
|
665 |
+
"3 8.0 0.104997 \n",
|
666 |
+
"4 9.0 0.112905 "
|
667 |
+
]
|
668 |
+
},
|
669 |
+
"execution_count": 39,
|
670 |
+
"metadata": {},
|
671 |
+
"output_type": "execute_result"
|
672 |
+
}
|
673 |
+
],
|
674 |
+
"source": [
|
675 |
+
"df_chunk_times.columns=['file', 'hyp','elapsed_time', 'audio_dur_sec']\n",
|
676 |
+
"df_chunk_times['trans_time_audio_dur_share']=df_chunk_times.elapsed_time/df_chunk_times.audio_dur_sec\n",
|
677 |
+
"df_chunk_times=df_chunk_times.sort_values('audio_dur_sec')\n",
|
678 |
+
"df_chunk_times=df_chunk_times.reset_index(drop=True)\n",
|
679 |
+
"df_chunk_times.head()"
|
680 |
+
]
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"cell_type": "code",
|
684 |
+
"execution_count": 40,
|
685 |
+
"id": "1d8d9520-1bbd-43f5-ae7a-08643def9285",
|
686 |
+
"metadata": {
|
687 |
+
"tags": []
|
688 |
+
},
|
689 |
+
"outputs": [
|
690 |
+
{
|
691 |
+
"data": {
|
692 |
+
"text/plain": [
|
693 |
+
"<Axes: xlabel='elapsed_time', ylabel='audio_dur_sec'>"
|
694 |
+
]
|
695 |
+
},
|
696 |
+
"execution_count": 40,
|
697 |
+
"metadata": {},
|
698 |
+
"output_type": "execute_result"
|
699 |
+
},
|
700 |
+
{
|
701 |
+
"data": {
|
702 |
+
"image/png": "",
|
703 |
+
"text/plain": [
|
704 |
+
"<Figure size 640x480 with 1 Axes>"
|
705 |
+
]
|
706 |
+
},
|
707 |
+
"metadata": {},
|
708 |
+
"output_type": "display_data"
|
709 |
+
}
|
710 |
+
],
|
711 |
+
"source": [
|
712 |
+
"df_chunk_times.plot.scatter('elapsed_time', 'audio_dur_sec')"
|
713 |
+
]
|
714 |
+
},
|
715 |
+
{
|
716 |
+
"cell_type": "code",
|
717 |
+
"execution_count": 41,
|
718 |
+
"id": "fcd06626-4e6e-4461-bf6b-7495bcc825b5",
|
719 |
+
"metadata": {
|
720 |
+
"tags": []
|
721 |
+
},
|
722 |
+
"outputs": [
|
723 |
+
{
|
724 |
+
"data": {
|
725 |
+
"text/plain": [
|
726 |
+
"Text(0.5, 0, 'audio duration')"
|
727 |
+
]
|
728 |
+
},
|
729 |
+
"execution_count": 41,
|
730 |
+
"metadata": {},
|
731 |
+
"output_type": "execute_result"
|
732 |
+
},
|
733 |
+
{
|
734 |
+
"data": {
|
735 |
+
"image/png": "",
|
736 |
+
"text/plain": [
|
737 |
+
"<Figure size 640x480 with 1 Axes>"
|
738 |
+
]
|
739 |
+
},
|
740 |
+
"metadata": {},
|
741 |
+
"output_type": "display_data"
|
742 |
+
}
|
743 |
+
],
|
744 |
+
"source": [
|
745 |
+
"df_chunk_times['trans_time_audio_dur_share'].plot()\n",
|
746 |
+
"plt.ylabel('transc time/audio duration ratio')\n",
|
747 |
+
"plt.xlabel('audio duration')"
|
748 |
+
]
|
749 |
}
|
750 |
],
|
751 |
"metadata": {
|