Spaces:
Running
Running
Update pages.py
Browse files
pages.py
CHANGED
@@ -101,105 +101,34 @@ def audio_llm():
|
|
101 |
|
102 |
with col3:
|
103 |
audio_samples_w_instruct = {
|
104 |
-
'1_ASR_IMDA_PART1_ASR_v2_141' : "
|
105 |
-
'2_ASR_IMDA_PART1_ASR_v2_2258': "
|
106 |
-
'3_ASR_IMDA_PART1_ASR_v2_2265': "
|
107 |
|
108 |
-
'4_ASR_IMDA_PART2_ASR_v2_999' : "
|
109 |
-
'5_ASR_IMDA_PART2_ASR_v2_2241': "
|
110 |
-
'6_ASR_IMDA_PART2_ASR_v2_3409': "
|
111 |
|
112 |
-
'7_ASR_IMDA_PART3_30_ASR_v2_2269': "
|
113 |
-
'8_ASR_IMDA_PART3_30_ASR_v2_1698': "
|
114 |
-
'9_ASR_IMDA_PART3_30_ASR_v2_2474': "
|
115 |
|
116 |
-
'10_ASR_IMDA_PART4_30_ASR_v2_1527': "
|
117 |
-
'11_ASR_IMDA_PART4_30_ASR_v2_3771': "
|
118 |
-
'12_ASR_IMDA_PART4_30_ASR_v2_103' : "
|
119 |
|
120 |
-
'13_ASR_IMDA_PART5_30_ASR_v2_1446': "
|
121 |
-
'14_ASR_IMDA_PART5_30_ASR_v2_2281': "
|
122 |
-
'15_ASR_IMDA_PART5_30_ASR_v2_4388': "
|
123 |
|
124 |
-
'16_ASR_IMDA_PART6_30_ASR_v2_576': "
|
125 |
-
'17_ASR_IMDA_PART6_30_ASR_v2_1413': "
|
126 |
-
'18_ASR_IMDA_PART6_30_ASR_v2_2834': "
|
127 |
|
128 |
-
'19_ASR_AIShell_zh_ASR_v2_5044': "
|
129 |
|
130 |
-
'20_ASR_LIBRISPEECH_CLEAN_ASR_V2_833': "
|
131 |
-
|
132 |
-
'21_ASR_LIBRISPEECH_OTHER_ASR_V2_656': "Example Instruction: \n\n- Can you make this audio into text?",
|
133 |
-
|
134 |
-
'22_ASR_MEDIACORP_ASR_V2_35': "Example Instruction: \n\n- Transform the audio speech into a written transcript.",
|
135 |
-
|
136 |
-
'23_ASR_MEDIACORP_ASR_V2_6': "Example Instruction: \n\n- Transform the audio speech into a written transcript.",
|
137 |
-
|
138 |
-
'24_ASR_PEOPLES_SPEECH_ASR_V2_21376': "Example Instruction: \n\n- Need this audio turned into a written piece.",
|
139 |
-
|
140 |
-
'25_ST_COVOST2_ZH-CN_EN_ST_V2_4567': "Example Instruction: \n\n- Please translate the given speech to English.",
|
141 |
-
|
142 |
-
'26_ST_COVOST2_EN_ZH-CN_ST_V2_5422': "Example Instruction: \n\n- Please translate the given speech to Chinese.",
|
143 |
-
|
144 |
-
'27_ST_COVOST2_EN_ZH-CN_ST_V2_6697': "Example Instruction: \n\n- Please translate the given speech to Chinese.",
|
145 |
-
|
146 |
-
'28_SI_ALPACA-GPT4-AUDIO_SI_V2_299': "Example Instruction: \n\n- Please follow the instruction in the speech.",
|
147 |
-
|
148 |
-
'29_SI_ALPACA-GPT4-AUDIO_SI_V2_750': "Example Instruction: \n\n- Please follow the instruction in the speech.",
|
149 |
-
|
150 |
-
'30_SI_ALPACA-GPT4-AUDIO_SI_V2_1454': "Example Instruction: \n\n- Please follow the instruction in the speech.",
|
151 |
-
|
152 |
-
'31_SI_OPENHERMES-AUDIO_SI_V2_673': "Example Instruction: \n\n- Please follow the instruction in the speech.",
|
153 |
-
|
154 |
-
'32_SQA_CN_COLLEDGE_ENTRANCE_ENGLISH_TEST_SQA_V2_572': "Example Instruction: \n\n- What does the man think the woman should do at 4:00?",
|
155 |
-
|
156 |
-
'33_SQA_IMDA_PART3_30_SQA_V2_2310': "Example Instruction: \n\n- Does Speaker2's wife cook for Speaker2 when they are at home?",
|
157 |
-
|
158 |
-
'34_SQA_IMDA_PART3_30_SQA_V2_3621': "Example Instruction: \n\n- Does the phrase \"#gai-gai#\" have a meaning in Chinese or Hokkien language?",
|
159 |
-
|
160 |
-
'35_SQA_IMDA_PART3_30_SQA_V2_4062': "Example Instruction: \n\n- What is the color of the vase mentioned in the dialogue?",
|
161 |
-
|
162 |
-
'36_DS_IMDA_PART4_30_DS_V2_849': "Example Instruction: \n\n- Condense the dialogue into a concise summary highlighting major topics and conclusions.",
|
163 |
-
|
164 |
-
'39_Paralingual_IEMOCAP_ER_V2_91': "Example Instruction: \n\n- Based on the speaker's speech patterns, what do you think they are feeling?",
|
165 |
-
|
166 |
-
'40_Paralingual_IEMOCAP_ER_V2_567': "Example Instruction: \n\n- Based on the speaker's speech patterns, what do you think they are feeling?",
|
167 |
-
|
168 |
-
'41_Paralingual_IEMOCAP_ER_V2_468': "Example Instruction: \n\n- Based on the speaker's speech patterns, what do you think they are feeling?",
|
169 |
-
|
170 |
-
'42_Paralingual_IEMOCAP_GR_V2_320': "Example Instruction: \n\n- Is it possible for you to identify whether the speaker in this recording is male or female?",
|
171 |
-
|
172 |
-
'43_Paralingual_IEMOCAP_GR_V2_129': "Example Instruction: \n\n- Is it possible for you to identify whether the speaker in this recording is male or female?",
|
173 |
-
|
174 |
-
'44_Paralingual_IEMOCAP_GR_V2_213': "Example Instruction: \n\n- Is it possible for you to identify whether the speaker in this recording is male or female?",
|
175 |
-
|
176 |
-
'45_Paralingual_IMDA_PART3_30_GR_V2_12312': "Example Instruction: \n\n- So, who's speaking in the second part of the clip? \n\n- So, who's speaking in the first part of the clip?",
|
177 |
-
|
178 |
-
'46_Paralingual_IMDA_PART3_30_GR_V2_1442': "Example Instruction: \n\n- Who starts the conversation in the dialogue?",
|
179 |
-
|
180 |
-
'47_Paralingual_IMDA_PART3_30_NR_V2_10479': "Example Instruction: \n\n- Can you guess which ethnic group this person is from based on their accent?",
|
181 |
-
|
182 |
-
'48_Paralingual_IMDA_PART3_30_NR_V2_15735': "Example Instruction: \n\n- In an analysis of the audio recording, determine the ethnic backgrounds of the speakers based on the accents used.",
|
183 |
-
|
184 |
-
'49_Paralingual_MELD_ER_V2_676': "Example Instruction: \n\n- What emotions do you think the speaker is expressing?",
|
185 |
-
|
186 |
-
'50_Paralingual_MELD_ER_V2_692': "Example Instruction: \n\n- Based on the speaker's speech patterns, what do you think they are feeling?",
|
187 |
-
|
188 |
-
'51_Paralingual_VOXCELEB1_GR_V2_2148': "Example Instruction: \n\n- May I know the gender of the speaker?",
|
189 |
-
|
190 |
-
'52_Paralingual_VOXCELEB1_GR_V2_3282': "Example Instruction: \n\n- I'd appreciate knowing the gender of the speaker, if possible.",
|
191 |
-
|
192 |
-
'53_Paralingual_VOXCELEB1_NR_V2_2286': "Example Instruction: \n\n- What's the nationality identity of the speaker?",
|
193 |
-
|
194 |
-
'54_Paralingual_VOXCELEB1_NR_V2_2742': "Example Instruction: \n\n- I'm intrigued by the speaker's nationality, could you enlighten me?",
|
195 |
-
|
196 |
-
'55_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_2': "Example Instruction: \n\n- What impact would the growth of the healthcare sector have on the country's economy in terms of employment and growth?",
|
197 |
-
|
198 |
-
'56_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_415': "Example Instruction: \n\n- Based on the statement, can you summarize the speaker's position on the recent controversial issues in Singapore?",
|
199 |
-
|
200 |
-
'57_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_460': "Example Instruction: \n\n- How does the author respond to parents' worries about masks in schools?"
|
201 |
-
|
202 |
-
}
|
203 |
|
204 |
audio_sample_names = [audio_sample_name for audio_sample_name in audio_samples_w_instruct.keys()]
|
205 |
|
@@ -220,7 +149,6 @@ def audio_llm():
|
|
220 |
st.session_state.audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
|
221 |
st.session_state.audio_array = bytes_to_array(audio_bytes)
|
222 |
|
223 |
-
st.write(st.session_state.default_instruction)
|
224 |
st.audio(st.session_state.audio_array, format="audio/wav", sample_rate=16000)
|
225 |
st.session_state.update(on_upload=False, on_record=False, on_select=False)
|
226 |
|
@@ -233,6 +161,9 @@ def audio_llm():
|
|
233 |
with col5:
|
234 |
st.slider(label='Top P', min_value=0.0, max_value=1.0, value=1.0, key='top_p')
|
235 |
|
|
|
|
|
|
|
236 |
st.markdown(
|
237 |
"""
|
238 |
<style>
|
|
|
101 |
|
102 |
with col3:
|
103 |
audio_samples_w_instruct = {
|
104 |
+
'1_ASR_IMDA_PART1_ASR_v2_141' : "- Turn the spoken language into a text format.\n\n- Please translate the content into Chinese.",
|
105 |
+
'2_ASR_IMDA_PART1_ASR_v2_2258': "- Turn the spoken language into a text format.\n\n- Please translate the content into Chinese.",
|
106 |
+
'3_ASR_IMDA_PART1_ASR_v2_2265': "- Turn the spoken language into a text format.",
|
107 |
|
108 |
+
'4_ASR_IMDA_PART2_ASR_v2_999' : "- Translate the spoken words into text format.",
|
109 |
+
'5_ASR_IMDA_PART2_ASR_v2_2241': "- Translate the spoken words into text format.",
|
110 |
+
'6_ASR_IMDA_PART2_ASR_v2_3409': "- Translate the spoken words into text format.",
|
111 |
|
112 |
+
'7_ASR_IMDA_PART3_30_ASR_v2_2269': "- Need this talk written down, please.",
|
113 |
+
'8_ASR_IMDA_PART3_30_ASR_v2_1698': "- Need this talk written down, please.",
|
114 |
+
'9_ASR_IMDA_PART3_30_ASR_v2_2474': "- Need this talk written down, please.",
|
115 |
|
116 |
+
'10_ASR_IMDA_PART4_30_ASR_v2_1527': "- Write out the dialogue as text.",
|
117 |
+
'11_ASR_IMDA_PART4_30_ASR_v2_3771': "- Write out the dialogue as text.",
|
118 |
+
'12_ASR_IMDA_PART4_30_ASR_v2_103' : "- Write out the dialogue as text.",
|
119 |
|
120 |
+
'13_ASR_IMDA_PART5_30_ASR_v2_1446': "- Translate this vocal recording into a textual format.",
|
121 |
+
'14_ASR_IMDA_PART5_30_ASR_v2_2281': "- Translate this vocal recording into a textual format.",
|
122 |
+
'15_ASR_IMDA_PART5_30_ASR_v2_4388': "- Translate this vocal recording into a textual format.",
|
123 |
|
124 |
+
'16_ASR_IMDA_PART6_30_ASR_v2_576': "- Record the spoken word in text form.",
|
125 |
+
'17_ASR_IMDA_PART6_30_ASR_v2_1413': "- Record the spoken word in text form.",
|
126 |
+
'18_ASR_IMDA_PART6_30_ASR_v2_2834': "- Record the spoken word in text form.",
|
127 |
|
128 |
+
'19_ASR_AIShell_zh_ASR_v2_5044': "- Transform the oral presentation into a text document.",
|
129 |
|
130 |
+
'20_ASR_LIBRISPEECH_CLEAN_ASR_V2_833': "- Please provide a written transcription of the speech."
|
131 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
audio_sample_names = [audio_sample_name for audio_sample_name in audio_samples_w_instruct.keys()]
|
134 |
|
|
|
149 |
st.session_state.audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
|
150 |
st.session_state.audio_array = bytes_to_array(audio_bytes)
|
151 |
|
|
|
152 |
st.audio(st.session_state.audio_array, format="audio/wav", sample_rate=16000)
|
153 |
st.session_state.update(on_upload=False, on_record=False, on_select=False)
|
154 |
|
|
|
161 |
with col5:
|
162 |
st.slider(label='Top P', min_value=0.0, max_value=1.0, value=1.0, key='top_p')
|
163 |
|
164 |
+
st.markdown("**Example Instruction:**")
|
165 |
+
st.write(st.session_state.default_instruction)
|
166 |
+
|
167 |
st.markdown(
|
168 |
"""
|
169 |
<style>
|