Siddhant
Update demo
b9a6dd9
from typing import Tuple
import numpy as np
from espnet2.sds.utils.utils import int2float
def handle_espnet_ASR_WER(
ASR_audio_output: Tuple[int, np.ndarray], ASR_transcript: str
) -> str:
"""
Compute and return Word Error Rate (WER) and Character Error Rate (CER) metrics
for multiple judge ASR systems (ESPnet, OWSM, Whisper) using the Versa library.
This function performs the following:
1. Imports necessary metrics and setup functions from Versa.
2. Prepares configuration arguments for each ASR system (ESPnet, OWSM, Whisper).
3. Runs the Levenshtein-based WER/CER calculations.
4. Returns a formatted string summarizing WER and CER
results for reference produced by each ASR system.
Args:
ASR_audio_output (tuple):
A tuple where:
- The first element is the frame rate.
- The second element is the audio signal (NumPy array).
ASR_transcript (str):
The transcript produced by the ASR model in the cascaded
conversational AI pipeline.
Returns:
str:
A formatted string showing the WER and CER percentages
for ESPnet, OWSM, and Whisper. Example output:
"ESPnet WER: 10.50
ESPnet CER: 7.20
OWSM WER: 11.30
OWSM CER: 8.00
Whisper WER: 9.25
Whisper CER: 6.50"
Raises:
ImportError:
If Versa is not installed or cannot be imported.
Example:
>>> asr_audio_output = (16000, audio_array)
>>> asr_transcript = "This is the ASR transcript."
>>> result = handle_espnet_ASR_WER(asr_audio_output, asr_transcript)
>>> print(result)
"ESPnet WER: 10.50
ESPnet CER: 7.20
OWSM WER: 11.30
OWSM CER: 8.00
Whisper WER: 9.25
Whisper CER: 6.50"
"""
try:
from versa import (
espnet_levenshtein_metric,
espnet_wer_setup,
owsm_levenshtein_metric,
owsm_wer_setup,
whisper_levenshtein_metric,
whisper_wer_setup,
)
except Exception as e:
print("Error: Versa is not properly installed.")
raise e
score_modules_espnet = {
"module": espnet_levenshtein_metric,
"args": espnet_wer_setup(
model_tag="default",
beam_size=1,
text_cleaner="whisper_en",
use_gpu=True,
),
}
dict1 = score_modules_espnet["module"](
score_modules_espnet["args"],
int2float(ASR_audio_output[1]),
ASR_transcript,
ASR_audio_output[0],
)
espnet_wer = (
dict1["espnet_wer_delete"]
+ dict1["espnet_wer_insert"]
+ dict1["espnet_wer_replace"]
) / (
dict1["espnet_wer_insert"]
+ dict1["espnet_wer_replace"]
+ dict1["espnet_wer_equal"]
)
espnet_cer = (
dict1["espnet_cer_delete"]
+ dict1["espnet_cer_insert"]
+ dict1["espnet_cer_replace"]
) / (
dict1["espnet_cer_insert"]
+ dict1["espnet_cer_replace"]
+ dict1["espnet_cer_equal"]
)
score_modules_owsm = {
"module": owsm_levenshtein_metric,
"args": owsm_wer_setup(
model_tag="default",
beam_size=1,
text_cleaner="whisper_en",
use_gpu=True,
),
}
dict1 = score_modules_owsm["module"](
score_modules_owsm["args"],
int2float(ASR_audio_output[1]),
ASR_transcript,
ASR_audio_output[0],
)
owsm_wer = (
dict1["owsm_wer_delete"] + dict1["owsm_wer_insert"] + dict1["owsm_wer_replace"]
) / (dict1["owsm_wer_insert"] + dict1["owsm_wer_replace"] + dict1["owsm_wer_equal"])
owsm_cer = (
dict1["owsm_cer_delete"] + dict1["owsm_cer_insert"] + dict1["owsm_cer_replace"]
) / (dict1["owsm_cer_insert"] + dict1["owsm_cer_replace"] + dict1["owsm_cer_equal"])
score_modules_whisper = {
"module": whisper_levenshtein_metric,
"args": whisper_wer_setup(
model_tag="default",
beam_size=1,
text_cleaner="whisper_en",
use_gpu=True,
),
}
dict1 = score_modules_whisper["module"](
score_modules_whisper["args"],
int2float(ASR_audio_output[1]),
ASR_transcript,
ASR_audio_output[0],
)
whisper_wer = (
dict1["whisper_wer_delete"]
+ dict1["whisper_wer_insert"]
+ dict1["whisper_wer_replace"]
) / (
dict1["whisper_wer_insert"]
+ dict1["whisper_wer_replace"]
+ dict1["whisper_wer_equal"]
)
whisper_cer = (
dict1["whisper_cer_delete"]
+ dict1["whisper_cer_insert"]
+ dict1["whisper_cer_replace"]
) / (
dict1["whisper_cer_insert"]
+ dict1["whisper_cer_replace"]
+ dict1["whisper_cer_equal"]
)
return (
f"ESPnet WER: {espnet_wer*100:.2f}\n"
f"ESPnet CER: {espnet_cer*100:.2f}\n"
f"OWSM WER: {owsm_wer*100:.2f}\n"
f"OWSM CER: {owsm_cer*100:.2f}\n"
f"Whisper WER: {whisper_wer*100:.2f}\n"
f"Whisper CER: {whisper_cer*100:.2f}"
)