cahya commited on
Commit
1d0d0d2
·
1 Parent(s): 716a685

udpated readme

Browse files
Files changed (1) hide show
  1. README.md +30 -26
README.md CHANGED
@@ -42,19 +42,21 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
42
 
43
 
44
  def load_dataset_sundanese():
45
- root_dir = Path("/dataset/ASR/sundanese")
46
- url_sundanese_female = "https://www.openslr.org/resources/44/su_id_female.zip"
47
- url_sundanese_male = "https://www.openslr.org/resources/44/su_id_male.zip"
48
- data_dirs = [ root_dir/"su_id_female/wavs", root_dir/"su_id_male/wavs" ]
49
- filenames = [ root_dir/"su_id_female/line_index.tsv", root_dir/"su_id_male/line_index.tsv" ]
50
-
51
- if not (root_dir/"su_id_female").exists():
52
- !wget -P {root_dir} {url_sundanese_female}
53
- !unzip {root_dir}/su_id_female.zip -d {root_dir}
54
- if not (root_dir/"su_id_male").exists():
55
- !wget -P {root_dir} {url_sundanese_male}
56
- !unzip {root_dir}/su_id_male.zip -d {root_dir}
57
-
 
 
58
  dfs = []
59
 
60
  dfs.append(pd.read_csv(filenames[0], sep='\t\t', names=["path", "sentence"]))
@@ -109,19 +111,21 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
109
  import re
110
 
111
  def load_dataset_sundanese():
112
- root_dir = Path("/dataset/ASR/sundanese")
113
- url_sundanese_female = "https://www.openslr.org/resources/44/su_id_female.zip"
114
- url_sundanese_male = "https://www.openslr.org/resources/44/su_id_male.zip"
115
- data_dirs = [ root_dir/"su_id_female/wavs", root_dir/"su_id_male/wavs" ]
116
- filenames = [ root_dir/"su_id_female/line_index.tsv", root_dir/"su_id_male/line_index.tsv" ]
117
-
118
- if not (root_dir/"su_id_female").exists():
119
- !wget -P {root_dir} {url_sundanese_female}
120
- !unzip {root_dir}/su_id_female.zip -d {root_dir}
121
- if not (root_dir/"su_id_male").exists():
122
- !wget -P {root_dir} {url_sundanese_male}
123
- !unzip {root_dir}/su_id_male.zip -d {root_dir}
124
-
 
 
125
  dfs = []
126
 
127
  dfs.append(pd.read_csv(filenames[0], sep='\t\t', names=["path", "sentence"]))
 
42
 
43
 
44
  def load_dataset_sundanese():
45
+ urls = [
46
+ "https://www.openslr.org/resources/44/su_id_female.zip",
47
+ "https://www.openslr.org/resources/44/su_id_male.zip"
48
+ ]
49
+ dm = DownloadManager()
50
+ download_dirs = dm.download_and_extract(urls)
51
+ data_dirs = [
52
+ Path(download_dirs[0])/"su_id_female/wavs",
53
+ Path(download_dirs[1])/"su_id_male/wavs",
54
+ ]
55
+ filenames = [
56
+ Path(download_dirs[0])/"su_id_female/line_index.tsv",
57
+ Path(download_dirs[1])/"su_id_male/line_index.tsv",
58
+ ]
59
+
60
  dfs = []
61
 
62
  dfs.append(pd.read_csv(filenames[0], sep='\t\t', names=["path", "sentence"]))
 
111
  import re
112
 
113
  def load_dataset_sundanese():
114
+ urls = [
115
+ "https://www.openslr.org/resources/44/su_id_female.zip",
116
+ "https://www.openslr.org/resources/44/su_id_male.zip"
117
+ ]
118
+ dm = DownloadManager()
119
+ download_dirs = dm.download_and_extract(urls)
120
+ data_dirs = [
121
+ Path(download_dirs[0])/"su_id_female/wavs",
122
+ Path(download_dirs[1])/"su_id_male/wavs",
123
+ ]
124
+ filenames = [
125
+ Path(download_dirs[0])/"su_id_female/line_index.tsv",
126
+ Path(download_dirs[1])/"su_id_male/line_index.tsv",
127
+ ]
128
+
129
  dfs = []
130
 
131
  dfs.append(pd.read_csv(filenames[0], sep='\t\t', names=["path", "sentence"]))