mishig HF staff commited on
Commit
f3ceecb
1 Parent(s): e6e49a7

Upload README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -6
README.md CHANGED
@@ -8,11 +8,11 @@ tags:
8
  - speech
9
  - xlsr-fine-tuning-week
10
  widget:
11
- - label: Common Voice sample 1
12
  src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample1.flac
13
- - label: Common Voice sample 2978
14
  src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample2978.flac
15
- - label: Common Voice sample 5168
16
  src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample5168.flac
17
  model-index:
18
  - name: XLSR Wav2Vec2 Persian (Farsi) V3 by Mehrdad Farahani
@@ -76,7 +76,7 @@ def cleaning(text):
76
 
77
  data_dir = "/content/cv-corpus-6.1-2020-12-11/fa"
78
 
79
- test = pd.read_csv(f"{data_dir}/test.tsv", sep="\t")
80
  test["path"] = data_dir + "/clips/" + test["path"]
81
  print(f"Step 0: {len(test)}")
82
 
@@ -93,7 +93,7 @@ test = test.reset_index(drop=True)
93
  print(test.head())
94
 
95
  test = test[["path", "sentence"]]
96
- test.to_csv("/content/test.csv", sep="\t", encoding="utf-8", index=False)
97
  ```
98
 
99
  **Prediction**
@@ -146,7 +146,7 @@ def predict(batch):
146
  return batch
147
 
148
 
149
- dataset = load_dataset("csv", data_files={"test": "/content/test.csv"}, delimiter="\t")["test"]
150
  dataset = dataset.map(speech_file_to_array_fn)
151
  result = dataset.map(predict, batched=True, batch_size=4)
152
  ```
 
8
  - speech
9
  - xlsr-fine-tuning-week
10
  widget:
11
+ - example_title: Common Voice sample 1
12
  src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample1.flac
13
+ - example_title: Common Voice sample 2978
14
  src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample2978.flac
15
+ - example_title: Common Voice sample 5168
16
  src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-persian-v3/resolve/main/sample5168.flac
17
  model-index:
18
  - name: XLSR Wav2Vec2 Persian (Farsi) V3 by Mehrdad Farahani
 
76
 
77
  data_dir = "/content/cv-corpus-6.1-2020-12-11/fa"
78
 
79
+ test = pd.read_csv(f"{data_dir}/test.tsv", sep=" ")
80
  test["path"] = data_dir + "/clips/" + test["path"]
81
  print(f"Step 0: {len(test)}")
82
 
 
93
  print(test.head())
94
 
95
  test = test[["path", "sentence"]]
96
+ test.to_csv("/content/test.csv", sep=" ", encoding="utf-8", index=False)
97
  ```
98
 
99
  **Prediction**
 
146
  return batch
147
 
148
 
149
+ dataset = load_dataset("csv", data_files={"test": "/content/test.csv"}, delimiter=" ")["test"]
150
  dataset = dataset.map(speech_file_to_array_fn)
151
  result = dataset.map(predict, batched=True, batch_size=4)
152
  ```