anuragshas commited on
Commit
032c0ae
β€’
1 Parent(s): 64ae049

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -5
README.md CHANGED
@@ -38,7 +38,7 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
38
  import pandas as pd
39
 
40
  # Evaluation notebook contains the procedure to download the data
41
- df = pd.read_csv("/content/te/test.tsv", sep="\\t")
42
  df["path"] = "/content/te/clips/" + df["path"]
43
  test_dataset = Dataset.from_pandas(df)
44
 
@@ -72,7 +72,7 @@ from sklearn.model_selection import train_test_split
72
  import pandas as pd
73
 
74
  # Evaluation notebook contains the procedure to download the data
75
- df = pd.read_csv("/content/te/test.tsv", sep="\\t")
76
  df["path"] = "/content/te/clips/" + df["path"]
77
  test_dataset = Dataset.from_pandas(df)
78
  wer = load_metric("wer")
@@ -81,13 +81,13 @@ processor = Wav2Vec2Processor.from_pretrained("anuragshas/wav2vec2-large-xlsr-53
81
  model = Wav2Vec2ForCTC.from_pretrained("anuragshas/wav2vec2-large-xlsr-53-telugu")
82
  model.to("cuda")
83
 
84
- chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\_\\;\\:\\"\\β€œ\\%\\β€˜\\”\\ΰ₯€\\’\\'\\&]'
85
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
86
 
87
  def normalizer(text):
88
  # Use your custom normalizer
89
- text = text.replace("\\\
90
- ","\
91
  ")
92
  text = ' '.join(text.split())
93
  text = re.sub(r'''([a-z]+)''','',text,flags=re.IGNORECASE)
 
38
  import pandas as pd
39
 
40
  # Evaluation notebook contains the procedure to download the data
41
+ df = pd.read_csv("/content/te/test.tsv", sep="\\\\t")
42
  df["path"] = "/content/te/clips/" + df["path"]
43
  test_dataset = Dataset.from_pandas(df)
44
 
 
72
  import pandas as pd
73
 
74
  # Evaluation notebook contains the procedure to download the data
75
+ df = pd.read_csv("/content/te/test.tsv", sep="\\\\t")
76
  df["path"] = "/content/te/clips/" + df["path"]
77
  test_dataset = Dataset.from_pandas(df)
78
  wer = load_metric("wer")
 
81
  model = Wav2Vec2ForCTC.from_pretrained("anuragshas/wav2vec2-large-xlsr-53-telugu")
82
  model.to("cuda")
83
 
84
+ chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\_\\;\\:\\"\\β€œ\\%\\β€˜\\”\\ΰ₯€\\’\'\\&]'
85
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
86
 
87
  def normalizer(text):
88
  # Use your custom normalizer
89
+ text = text.replace("\\\\\\
90
+ ","\\
91
  ")
92
  text = ' '.join(text.split())
93
  text = re.sub(r'''([a-z]+)''','',text,flags=re.IGNORECASE)