sts

Runtime error

lewistape commited on 9 days ago

Commit

6a7ab5f

verified ·

1 Parent(s): 2b90fdf

Update asr.py

Files changed (1) hide show

asr.py CHANGED Viewed

@@ -11,6 +11,13 @@ MODEL_ID = "facebook/mms-1b-all"
 processor = AutoProcessor.from_pretrained(MODEL_ID)
 model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID).to("cuda")
 def transcribe(audio_data=None, lang="eng (English)"):
     if audio_data is None or (isinstance(audio_data, np.ndarray) and audio_data.size == 0):
         return "<<ERROR: Empty Audio Input>>"
@@ -32,10 +39,21 @@ def transcribe(audio_data=None, lang="eng (English)"):
     else:
         return f"<<ERROR: Invalid Audio Input Instance: {type(audio_data)}>>"
     lang_code = lang.split()[0]
-    processor.tokenizer.set_target_lang(lang_code)
-    model.load_adapter(lang_code)
     inputs = processor(audio_samples, sampling_rate=ASR_SAMPLING_RATE, return_tensors="pt").to("cuda")
     with torch.no_grad(), autocast():
@@ -44,11 +62,4 @@ def transcribe(audio_data=None, lang="eng (English)"):
     ids = torch.argmax(outputs, dim=-1)[0]
     transcription = processor.decode(ids)
-    return transcription
-    ASR_LANGUAGES = {
-    "eng": "English",
-    "spa": "Spanish",
-    "fra": "French",
-    # Add more languages as needed
-}

 processor = AutoProcessor.from_pretrained(MODEL_ID)
 model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID).to("cuda")
+# Load supported languages from the TSV file
+ASR_LANGUAGES = {}
+with open("data/asr/all_langs.tsv") as f:
+    for line in f:
+        iso, name = line.split(" ", 1)
+        ASR_LANGUAGES[iso.strip()] = name.strip()
 def transcribe(audio_data=None, lang="eng (English)"):
     if audio_data is None or (isinstance(audio_data, np.ndarray) and audio_data.size == 0):
         return "<<ERROR: Empty Audio Input>>"
     else:
         return f"<<ERROR: Invalid Audio Input Instance: {type(audio_data)}>>"
+    # Extract language code (e.g., "eng" from "eng (English)")
     lang_code = lang.split()[0]
+    # Validate if the language code is supported
+    if lang_code not in ASR_LANGUAGES:
+        return f"<<ERROR: Unsupported Language Code: {lang_code}>>"
+    try:
+        # Set target language and load adapter
+        processor.tokenizer.set_target_lang(lang_code)
+        model.load_adapter(lang_code)
+    except Exception as e:
+        return f"<<ERROR: Language Adaptation Failed: {str(e)}>>"
+    # Process audio and perform transcription
     inputs = processor(audio_samples, sampling_rate=ASR_SAMPLING_RATE, return_tensors="pt").to("cuda")
     with torch.no_grad(), autocast():
     ids = torch.argmax(outputs, dim=-1)[0]
     transcription = processor.decode(ids)
+    return transcription