anuragshas
/

wav2vec2-large-xlsr-53-telugu

Automatic Speech Recognition

xlsr-fine-tuning-week

Inference Endpoints

Model card Files Files and versions Community

anuragshas commited on Mar 24, 2021

Commit

64ae049

•

1 Parent(s): f1ff9aa

Update README.md

Files changed (1) hide show

README.md +7 -5

README.md CHANGED Viewed

@@ -38,7 +38,7 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import pandas as pd
 # Evaluation notebook contains the procedure to download the data
-df = pd.read_csv("/content/te/test.tsv", sep="\t")
 df["path"] = "/content/te/clips/" + df["path"]
 test_dataset = Dataset.from_pandas(df)
@@ -72,7 +72,7 @@ from sklearn.model_selection import train_test_split
 import pandas as pd
 # Evaluation notebook contains the procedure to download the data
-df = pd.read_csv("/content/te/test.tsv", sep="\t")
 df["path"] = "/content/te/clips/" + df["path"]
 test_dataset = Dataset.from_pandas(df)
 wer = load_metric("wer")
@@ -81,12 +81,14 @@ processor = Wav2Vec2Processor.from_pretrained("anuragshas/wav2vec2-large-xlsr-53
 model = Wav2Vec2ForCTC.from_pretrained("anuragshas/wav2vec2-large-xlsr-53-telugu")
 model.to("cuda")
-chars_to_ignore_regex = '[\,\?\.\!\-\_\;\:\"\“\%\‘\”\।\’\'\&]'
 resampler = torchaudio.transforms.Resample(48_000, 16_000)
 def normalizer(text):
     # Use your custom normalizer
-    text = text.replace("\\n","\n")
     text = ' '.join(text.split())
     text = re.sub(r'''([a-z]+)''','',text,flags=re.IGNORECASE)
     text = re.sub(r'''%'''," శాతం ", text)
@@ -117,7 +119,7 @@ print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"],
 **Test Result**: 44.98%
 ## Training
-70% of the OpenSLR Marathi dataset was used for training.
 Train Split of annotations is [here](https://www.dropbox.com/s/xqc0wtour7f9h4c/train.tsv)

 import pandas as pd
 # Evaluation notebook contains the procedure to download the data
+df = pd.read_csv("/content/te/test.tsv", sep="\\t")
 df["path"] = "/content/te/clips/" + df["path"]
 test_dataset = Dataset.from_pandas(df)
 import pandas as pd
 # Evaluation notebook contains the procedure to download the data
+df = pd.read_csv("/content/te/test.tsv", sep="\\t")
 df["path"] = "/content/te/clips/" + df["path"]
 test_dataset = Dataset.from_pandas(df)
 wer = load_metric("wer")
 model = Wav2Vec2ForCTC.from_pretrained("anuragshas/wav2vec2-large-xlsr-53-telugu")
 model.to("cuda")
+chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\_\\;\\:\\"\\“\\%\\‘\\”\\।\\’\\'\\&]'
 resampler = torchaudio.transforms.Resample(48_000, 16_000)
 def normalizer(text):
     # Use your custom normalizer
+    text = text.replace("\\\
+","\
+")
     text = ' '.join(text.split())
     text = re.sub(r'''([a-z]+)''','',text,flags=re.IGNORECASE)
     text = re.sub(r'''%'''," శాతం ", text)
 **Test Result**: 44.98%
 ## Training
+70% of the OpenSLR Telugu dataset was used for training.
 Train Split of annotations is [here](https://www.dropbox.com/s/xqc0wtour7f9h4c/train.tsv)