zhuohan-7 commited on
Commit
5792938
·
verified ·
1 Parent(s): 2c6e496

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. app/content.py +48 -34
  2. app/draw_diagram.py +16 -12
  3. app/pages.py +30 -3
app/content.py CHANGED
@@ -7,8 +7,15 @@ asr_datsets = {'LibriSpeech-Test-Clean': 'A clean, high-quality testset of the L
7
  'Earnings22-Test' : 'Similar to Earnings21, but covering earnings calls from 2022.',
8
  'Tedlium3-Test' : 'A test set derived from TED talks, covering diverse speakers and topics.',
9
  'Tedlium3-Long-form-Test': 'A longer version of the TED-LIUM dataset, containing extended audio samples. This poses challenges to existing fusion methods in handling long audios. However, it provides benchmark for future development.',
 
 
 
10
  'IMDA-Part1-ASR-Test' : 'Speech recognition test data from the IMDA NSC project, Part 1.',
11
- 'IMDA-Part2-ASR-Test' : 'Speech recognition test data from the IMDA NSC project, Part 1.'
 
 
 
 
12
  }
13
 
14
  sqa_datasets = {'CN-College-Listen-MCQ-Test': 'Chinese College English Listening Test, with multiple-choice questions.',
@@ -78,39 +85,46 @@ metrics_info = {
78
  }
79
 
80
  dataname_column_rename_in_table = {
81
- 'librispeech_test_clean' : 'LibriSpeech-Clean',
82
- 'librispeech_test_other' : 'LibriSpeech-Other',
83
- 'common_lvoice_15_en_test': 'CommonVoice-15-EN',
84
- 'peoples_speech_test' : 'Peoples-Speech',
85
- 'gigaspeech_test' : 'GigaSpeech-1',
86
- 'earnings21_test' : 'Earnings-21',
87
- 'earnings22_test' : 'Earnings-22',
88
- 'tedlium3_test' : 'TED-LIUM-3',
89
- 'tedlium3_long_form_test': 'TED-LIUM-3-Long',
90
- 'aishell_asr_zh_test' : 'Aishell-ASR-ZH',
91
- 'covost2_en_id_test' : 'Covost2-EN-ID',
92
- 'covost2_en_zh_test' : 'Covost2-EN-ZH',
93
- 'covost2_en_ta_test' : 'Covost2-EN-TA',
94
- 'covost2_id_en_test' : 'Covost2-ID-EN',
95
- 'covost2_zh_en_test' : 'Covost2-ZH-EN',
96
- 'covost2_ta_en_test' : 'Covost2-TA-EN',
97
  'cn_college_listen_mcq_test': 'CN-College-Listen-MCQ',
98
- 'dream_tts_mcq_test' : 'DREAM-TTS-MCQ',
99
- 'slue_p2_sqa5_test' : 'SLUE-P2-SQA5',
100
- 'public_sg_speech_qa_test': 'Public-SG-Speech-QA',
101
- 'spoken_squad_test' : 'Spoken-SQuAD',
102
- 'openhermes_audio_test' : 'OpenHermes-Audio',
103
- 'alpaca_audio_test' : 'ALPACA-Audio',
104
- 'wavcaps_test' : 'WavCaps',
105
- 'audiocaps_test' : 'AudioCaps',
106
- 'clotho_aqa_test' : 'Clotho-AQA',
107
- 'wavcaps_qa_test' : 'WavCaps-QA',
108
- 'audiocaps_qa_test' : 'AudioCaps-QA',
109
- 'voxceleb_accent_test' : 'VoxCeleb-Accent',
110
- 'voxceleb_gender_test' : 'VoxCeleb-Gender',
111
- 'iemocap_gender_test': 'IEMOCAP-Gender',
112
- 'iemocap_emotion_test': 'IEMOCAP-Emotion',
113
- 'meld_sentiment_test': 'MELD-Sentiment',
114
- 'meld_emotion_test': 'MELD-Emotion',
 
 
 
 
 
 
 
115
 
116
  }
 
7
  'Earnings22-Test' : 'Similar to Earnings21, but covering earnings calls from 2022.',
8
  'Tedlium3-Test' : 'A test set derived from TED talks, covering diverse speakers and topics.',
9
  'Tedlium3-Long-form-Test': 'A longer version of the TED-LIUM dataset, containing extended audio samples. This poses challenges to existing fusion methods in handling long audios. However, it provides benchmark for future development.',
10
+ }
11
+
12
+ singlish_asr_datasets = {
13
  'IMDA-Part1-ASR-Test' : 'Speech recognition test data from the IMDA NSC project, Part 1.',
14
+ 'IMDA-Part2-ASR-Test' : 'Speech recognition test data from the IMDA NSC project, Part 2.',
15
+ 'IMDA-Part3-30s-ASR-Test': 'Speech recognition test data from the IMDA NSC project, Part 3.',
16
+ 'IMDA-Part4-30s-ASR-Test': 'Speech recognition test data from the IMDA NSC project, Part 4.',
17
+ 'IMDA-Part5-30s-ASR-Test': 'Speech recognition test data from the IMDA NSC project, Part 5.',
18
+ 'IMDA-Part6-30s-ASR-Test': 'Speech recognition test data from the IMDA NSC project, Part 6.'
19
  }
20
 
21
  sqa_datasets = {'CN-College-Listen-MCQ-Test': 'Chinese College English Listening Test, with multiple-choice questions.',
 
85
  }
86
 
87
  dataname_column_rename_in_table = {
88
+ 'librispeech_test_clean' : 'LibriSpeech-Clean',
89
+ 'librispeech_test_other' : 'LibriSpeech-Other',
90
+ 'common_lvoice_15_en_test' : 'CommonVoice-15-EN',
91
+ 'peoples_speech_test' : 'Peoples-Speech',
92
+ 'gigaspeech_test' : 'GigaSpeech-1',
93
+ 'earnings21_test' : 'Earnings-21',
94
+ 'earnings22_test' : 'Earnings-22',
95
+ 'tedlium3_test' : 'TED-LIUM-3',
96
+ 'tedlium3_long_form_test' : 'TED-LIUM-3-Long',
97
+ 'aishell_asr_zh_test' : 'Aishell-ASR-ZH',
98
+ 'covost2_en_id_test' : 'Covost2-EN-ID',
99
+ 'covost2_en_zh_test' : 'Covost2-EN-ZH',
100
+ 'covost2_en_ta_test' : 'Covost2-EN-TA',
101
+ 'covost2_id_en_test' : 'Covost2-ID-EN',
102
+ 'covost2_zh_en_test' : 'Covost2-ZH-EN',
103
+ 'covost2_ta_en_test' : 'Covost2-TA-EN',
104
  'cn_college_listen_mcq_test': 'CN-College-Listen-MCQ',
105
+ 'dream_tts_mcq_test' : 'DREAM-TTS-MCQ',
106
+ 'slue_p2_sqa5_test' : 'SLUE-P2-SQA5',
107
+ 'public_sg_speech_qa_test' : 'Public-SG-Speech-QA',
108
+ 'spoken_squad_test' : 'Spoken-SQuAD',
109
+ 'openhermes_audio_test' : 'OpenHermes-Audio',
110
+ 'alpaca_audio_test' : 'ALPACA-Audio',
111
+ 'wavcaps_test' : 'WavCaps',
112
+ 'audiocaps_test' : 'AudioCaps',
113
+ 'clotho_aqa_test' : 'Clotho-AQA',
114
+ 'wavcaps_qa_test' : 'WavCaps-QA',
115
+ 'audiocaps_qa_test' : 'AudioCaps-QA',
116
+ 'voxceleb_accent_test' : 'VoxCeleb-Accent',
117
+ 'voxceleb_gender_test' : 'VoxCeleb-Gender',
118
+ 'iemocap_gender_test' : 'IEMOCAP-Gender',
119
+ 'iemocap_emotion_test' : 'IEMOCAP-Emotion',
120
+ 'meld_sentiment_test' : 'MELD-Sentiment',
121
+ 'meld_emotion_test' : 'MELD-Emotion',
122
+ 'imda_part1_asr_test' : 'IMDA-Part1-ASR',
123
+ 'imda_part2_asr_test' : 'IMDA-Part2-ASR',
124
+ 'imda_part3_30s_asr_test' : 'IMDA-Part3-30s-ASR',
125
+ 'imda_part4_30s_asr_test' : 'IMDA-Part4-30s-ASR',
126
+ 'imda_part5_30s_asr_test' : 'IMDA-Part5-30s-ASR',
127
+ 'imda_part6_30s_asr_test' : 'IMDA-Part6-30s-ASR',
128
+
129
 
130
  }
app/draw_diagram.py CHANGED
@@ -90,18 +90,22 @@ def draw(folder_name, category_name, dataset_name, metrics, cus_sort=True):
90
  return df_style
91
 
92
  if cur_dataset_name in [
93
- 'librispeech_test_clean',
94
- 'librispeech_test_other',
95
- 'common_voice_15_en_test',
96
- 'peoples_speech_test',
97
- 'gigaspeech_test',
98
- 'earnings21_test',
99
- 'earnings22_test',
100
- 'tedlium3_test',
101
- 'tedlium3_long_form_test',
102
- 'imda_part1_asr_test',
103
- 'imda_part2_asr_test',
104
- 'aishell_asr_zh_test',
 
 
 
 
105
  ]:
106
 
107
  chart_data_table = chart_data_table.sort_values(
 
90
  return df_style
91
 
92
  if cur_dataset_name in [
93
+ 'LibriSpeech-Clean',
94
+ 'LibriSpeech-Other',
95
+ 'CommonVoice-15-EN',
96
+ 'Peoples-Speech',
97
+ 'GigaSpeech-1',
98
+ 'Earnings-21',
99
+ 'Earnings-22',
100
+ 'TED-LIUM-3',
101
+ 'TED-LIUM-3-Long',
102
+ 'Aishell-ASR-ZH',
103
+ 'IMDA-Part1-ASR',
104
+ 'IMDA-Part2-ASR',
105
+ 'IMDA-Part3-30s-ASR',
106
+ 'IMDA-Part4-30s-ASR',
107
+ 'IMDA-Part5-30s-ASR',
108
+ 'IMDA-Part6-30s-ASR',
109
  ]:
110
 
111
  chart_data_table = chart_data_table.sort_values(
app/pages.py CHANGED
@@ -40,8 +40,8 @@ def dashboard():
40
  audio_url = "https://arxiv.org/abs/2406.16020"
41
 
42
  st.markdown("#### News")
43
- st.markdown("**Dec, 2024**: Update layout and support comparison between models with similar model sizes. Layout reorganized for better user experience. Add performance summary for each task.")
44
- st.markdown("**Sep, 2024**: Initial leaderboard online.")
45
 
46
  st.divider()
47
 
@@ -56,7 +56,6 @@ def dashboard():
56
  with center_co:
57
  st.image("./style/audio_overview.png",
58
  caption="Overview of the datasets in AudioBench.",
59
- # use_container_width = True
60
  )
61
 
62
  st.markdown('''
@@ -116,6 +115,34 @@ def asr():
116
  draw('su', 'ASR', filter_1, 'wer', cus_sort=True)
117
 
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  def cnasr():
120
  st.title("Task: Automatic Speech Recognition - Mandarin")
121
 
 
40
  audio_url = "https://arxiv.org/abs/2406.16020"
41
 
42
  st.markdown("#### News")
43
+ st.markdown("**Dec 11, 2024**: Update layout and support comparison between models with similar model sizes. Layout reorganized for better user experience. Add performance summary for each task.")
44
+ st.markdown("**Aug, 2024**: Initial leaderboard online.")
45
 
46
  st.divider()
47
 
 
56
  with center_co:
57
  st.image("./style/audio_overview.png",
58
  caption="Overview of the datasets in AudioBench.",
 
59
  )
60
 
61
  st.markdown('''
 
115
  draw('su', 'ASR', filter_1, 'wer', cus_sort=True)
116
 
117
 
118
+ def singlish_asr():
119
+ st.title("Task: Automatic Speech Recognition - Singlish")
120
+
121
+ sum = ['Overall']
122
+ dataset_lists = [
123
+ 'IMDA-Part1-ASR-Test',
124
+ 'IMDA-Part2-ASR-Test',
125
+ 'IMDA-Part3-30s-ASR-Test',
126
+ 'IMDA-Part4-30s-ASR-Test',
127
+ 'IMDA-Part5-30s-ASR-Test',
128
+ 'IMDA-Part6-30s-ASR-Test',
129
+ ]
130
+
131
+ filters_levelone = sum + dataset_lists
132
+
133
+ left, center, _, middle, right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
134
+
135
+ with left:
136
+ filter_1 = st.selectbox('Dataset', filters_levelone)
137
+
138
+ if filter_1:
139
+ if filter_1 in sum:
140
+ sum_table_mulit_metrix('singlish_asr', ['wer'])
141
+ else:
142
+ dataset_contents(singlish_asr_datasets[filter_1], metrics['wer'])
143
+ draw('su', 'singlish_asr', filter_1, 'wer')
144
+
145
+
146
  def cnasr():
147
  st.title("Task: Automatic Speech Recognition - Mandarin")
148