udpated readme
Browse files
README.md
CHANGED
@@ -42,19 +42,21 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
42 |
|
43 |
|
44 |
def load_dataset_sundanese():
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
58 |
dfs = []
|
59 |
|
60 |
dfs.append(pd.read_csv(filenames[0], sep='\t\t', names=["path", "sentence"]))
|
@@ -109,19 +111,21 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
109 |
import re
|
110 |
|
111 |
def load_dataset_sundanese():
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
|
|
|
|
125 |
dfs = []
|
126 |
|
127 |
dfs.append(pd.read_csv(filenames[0], sep='\t\t', names=["path", "sentence"]))
|
|
|
42 |
|
43 |
|
44 |
def load_dataset_sundanese():
|
45 |
+
urls = [
|
46 |
+
"https://www.openslr.org/resources/44/su_id_female.zip",
|
47 |
+
"https://www.openslr.org/resources/44/su_id_male.zip"
|
48 |
+
]
|
49 |
+
dm = DownloadManager()
|
50 |
+
download_dirs = dm.download_and_extract(urls)
|
51 |
+
data_dirs = [
|
52 |
+
Path(download_dirs[0])/"su_id_female/wavs",
|
53 |
+
Path(download_dirs[1])/"su_id_male/wavs",
|
54 |
+
]
|
55 |
+
filenames = [
|
56 |
+
Path(download_dirs[0])/"su_id_female/line_index.tsv",
|
57 |
+
Path(download_dirs[1])/"su_id_male/line_index.tsv",
|
58 |
+
]
|
59 |
+
|
60 |
dfs = []
|
61 |
|
62 |
dfs.append(pd.read_csv(filenames[0], sep='\t\t', names=["path", "sentence"]))
|
|
|
111 |
import re
|
112 |
|
113 |
def load_dataset_sundanese():
|
114 |
+
urls = [
|
115 |
+
"https://www.openslr.org/resources/44/su_id_female.zip",
|
116 |
+
"https://www.openslr.org/resources/44/su_id_male.zip"
|
117 |
+
]
|
118 |
+
dm = DownloadManager()
|
119 |
+
download_dirs = dm.download_and_extract(urls)
|
120 |
+
data_dirs = [
|
121 |
+
Path(download_dirs[0])/"su_id_female/wavs",
|
122 |
+
Path(download_dirs[1])/"su_id_male/wavs",
|
123 |
+
]
|
124 |
+
filenames = [
|
125 |
+
Path(download_dirs[0])/"su_id_female/line_index.tsv",
|
126 |
+
Path(download_dirs[1])/"su_id_male/line_index.tsv",
|
127 |
+
]
|
128 |
+
|
129 |
dfs = []
|
130 |
|
131 |
dfs.append(pd.read_csv(filenames[0], sep='\t\t', names=["path", "sentence"]))
|