Upload README.md
Browse files
README.md
CHANGED
@@ -8,11 +8,11 @@ tags:
|
|
8 |
- speech
|
9 |
- xlsr-fine-tuning-week
|
10 |
widget:
|
11 |
-
-
|
12 |
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample1.flac
|
13 |
-
-
|
14 |
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample2978.flac
|
15 |
-
-
|
16 |
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample5168.flac
|
17 |
model-index:
|
18 |
- name: XLSR Wav2Vec2 Persian (Farsi) V3 by Mehrdad Farahani
|
@@ -76,7 +76,7 @@ def cleaning(text):
|
|
76 |
|
77 |
data_dir = "/content/cv-corpus-6.1-2020-12-11/fa"
|
78 |
|
79 |
-
test = pd.read_csv(f"{data_dir}/test.tsv", sep="
|
80 |
test["path"] = data_dir + "/clips/" + test["path"]
|
81 |
print(f"Step 0: {len(test)}")
|
82 |
|
@@ -93,7 +93,7 @@ test = test.reset_index(drop=True)
|
|
93 |
print(test.head())
|
94 |
|
95 |
test = test[["path", "sentence"]]
|
96 |
-
test.to_csv("/content/test.csv", sep="
|
97 |
```
|
98 |
|
99 |
**Prediction**
|
@@ -146,7 +146,7 @@ def predict(batch):
|
|
146 |
return batch
|
147 |
|
148 |
|
149 |
-
dataset = load_dataset("csv", data_files={"test": "/content/test.csv"}, delimiter="
|
150 |
dataset = dataset.map(speech_file_to_array_fn)
|
151 |
result = dataset.map(predict, batched=True, batch_size=4)
|
152 |
```
|
|
|
8 |
- speech
|
9 |
- xlsr-fine-tuning-week
|
10 |
widget:
|
11 |
+
- example_title: Common Voice sample 1
|
12 |
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample1.flac
|
13 |
+
- example_title: Common Voice sample 2978
|
14 |
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample2978.flac
|
15 |
+
- example_title: Common Voice sample 5168
|
16 |
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample5168.flac
|
17 |
model-index:
|
18 |
- name: XLSR Wav2Vec2 Persian (Farsi) V3 by Mehrdad Farahani
|
|
|
76 |
|
77 |
data_dir = "/content/cv-corpus-6.1-2020-12-11/fa"
|
78 |
|
79 |
+
test = pd.read_csv(f"{data_dir}/test.tsv", sep=" ")
|
80 |
test["path"] = data_dir + "/clips/" + test["path"]
|
81 |
print(f"Step 0: {len(test)}")
|
82 |
|
|
|
93 |
print(test.head())
|
94 |
|
95 |
test = test[["path", "sentence"]]
|
96 |
+
test.to_csv("/content/test.csv", sep=" ", encoding="utf-8", index=False)
|
97 |
```
|
98 |
|
99 |
**Prediction**
|
|
|
146 |
return batch
|
147 |
|
148 |
|
149 |
+
dataset = load_dataset("csv", data_files={"test": "/content/test.csv"}, delimiter=" ")["test"]
|
150 |
dataset = dataset.map(speech_file_to_array_fn)
|
151 |
result = dataset.map(predict, batched=True, batch_size=4)
|
152 |
```
|