cahya
/

wav2vec2-large-xlsr-sundanese

Automatic Speech Recognition

xlsr-fine-tuning-week

Inference Endpoints

Model card Files Files and versions Community

cahya commited on Mar 29, 2021

Commit

1d0d0d2

·

1 Parent(s): 716a685

udpated readme

Files changed (1) hide show

README.md +30 -26

README.md CHANGED Viewed

@@ -42,19 +42,21 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 def load_dataset_sundanese():
-    root_dir = Path("/dataset/ASR/sundanese")
-    url_sundanese_female = "https://www.openslr.org/resources/44/su_id_female.zip"
-    url_sundanese_male = "https://www.openslr.org/resources/44/su_id_male.zip"
-    data_dirs = [ root_dir/"su_id_female/wavs", root_dir/"su_id_male/wavs" ]
-    filenames = [ root_dir/"su_id_female/line_index.tsv", root_dir/"su_id_male/line_index.tsv" ]
-    if not (root_dir/"su_id_female").exists():
-        !wget -P {root_dir} {url_sundanese_female}
-        !unzip {root_dir}/su_id_female.zip -d {root_dir}
-    if not (root_dir/"su_id_male").exists():
-        !wget -P {root_dir} {url_sundanese_male}
-        !unzip {root_dir}/su_id_male.zip -d {root_dir}
     dfs = []
     dfs.append(pd.read_csv(filenames[0], sep='\t\t', names=["path", "sentence"]))
@@ -109,19 +111,21 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import re
 def load_dataset_sundanese():
-    root_dir = Path("/dataset/ASR/sundanese")
-    url_sundanese_female = "https://www.openslr.org/resources/44/su_id_female.zip"
-    url_sundanese_male = "https://www.openslr.org/resources/44/su_id_male.zip"
-    data_dirs = [ root_dir/"su_id_female/wavs", root_dir/"su_id_male/wavs" ]
-    filenames = [ root_dir/"su_id_female/line_index.tsv", root_dir/"su_id_male/line_index.tsv" ]
-    if not (root_dir/"su_id_female").exists():
-        !wget -P {root_dir} {url_sundanese_female}
-        !unzip {root_dir}/su_id_female.zip -d {root_dir}
-    if not (root_dir/"su_id_male").exists():
-        !wget -P {root_dir} {url_sundanese_male}
-        !unzip {root_dir}/su_id_male.zip -d {root_dir}
     dfs = []
     dfs.append(pd.read_csv(filenames[0], sep='\t\t', names=["path", "sentence"]))

 def load_dataset_sundanese():
+    urls = [
+        "https://www.openslr.org/resources/44/su_id_female.zip",
+        "https://www.openslr.org/resources/44/su_id_male.zip"
+    ]
+    dm = DownloadManager()
+    download_dirs = dm.download_and_extract(urls)
+    data_dirs = [
+        Path(download_dirs[0])/"su_id_female/wavs",
+        Path(download_dirs[1])/"su_id_male/wavs",
+    ]
+    filenames = [
+        Path(download_dirs[0])/"su_id_female/line_index.tsv",
+        Path(download_dirs[1])/"su_id_male/line_index.tsv",
+    ]
     dfs = []
     dfs.append(pd.read_csv(filenames[0], sep='\t\t', names=["path", "sentence"]))
 import re
 def load_dataset_sundanese():
+    urls = [
+        "https://www.openslr.org/resources/44/su_id_female.zip",
+        "https://www.openslr.org/resources/44/su_id_male.zip"
+    ]
+    dm = DownloadManager()
+    download_dirs = dm.download_and_extract(urls)
+    data_dirs = [
+        Path(download_dirs[0])/"su_id_female/wavs",
+        Path(download_dirs[1])/"su_id_male/wavs",
+    ]
+    filenames = [
+        Path(download_dirs[0])/"su_id_female/line_index.tsv",
+        Path(download_dirs[1])/"su_id_male/line_index.tsv",
+    ]
     dfs = []
     dfs.append(pd.read_csv(filenames[0], sep='\t\t', names=["path", "sentence"]))