YingxuHe commited on
Commit
67da2ee
·
verified ·
1 Parent(s): af54a08

Update pages.py

Browse files
Files changed (1) hide show
  1. pages.py +24 -93
pages.py CHANGED
@@ -101,105 +101,34 @@ def audio_llm():
101
 
102
  with col3:
103
  audio_samples_w_instruct = {
104
- '1_ASR_IMDA_PART1_ASR_v2_141' : "Example Instruction:\n\n- Turn the spoken language into a text format.\n\n- Please translate the content into Chinese.",
105
- '2_ASR_IMDA_PART1_ASR_v2_2258': "Example Instruction:\n\n- Turn the spoken language into a text format.\n\n- Please translate the content into Chinese.",
106
- '3_ASR_IMDA_PART1_ASR_v2_2265': "Example Instruction:\n\n- Turn the spoken language into a text format.",
107
 
108
- '4_ASR_IMDA_PART2_ASR_v2_999' : "Example Instruction:\n\n- Translate the spoken words into text format.",
109
- '5_ASR_IMDA_PART2_ASR_v2_2241': "Example Instruction: \n\n- Translate the spoken words into text format.",
110
- '6_ASR_IMDA_PART2_ASR_v2_3409': "Example Instruction: \n\n- Translate the spoken words into text format.",
111
 
112
- '7_ASR_IMDA_PART3_30_ASR_v2_2269': "Example Instruction:\n\n- Need this talk written down, please.",
113
- '8_ASR_IMDA_PART3_30_ASR_v2_1698': "Example Instruction: \n\n- Need this talk written down, please.",
114
- '9_ASR_IMDA_PART3_30_ASR_v2_2474': "Example Instruction: \n\n- Need this talk written down, please.",
115
 
116
- '10_ASR_IMDA_PART4_30_ASR_v2_1527': "Example Instruction:\n\n- Write out the dialogue as text.",
117
- '11_ASR_IMDA_PART4_30_ASR_v2_3771': "Example Instruction: \n\n- Write out the dialogue as text.",
118
- '12_ASR_IMDA_PART4_30_ASR_v2_103' : "Example Instruction: \n\n- Write out the dialogue as text.",
119
 
120
- '13_ASR_IMDA_PART5_30_ASR_v2_1446': "Example Instruction: \n\n- Translate this vocal recording into a textual format.",
121
- '14_ASR_IMDA_PART5_30_ASR_v2_2281': "Example Instruction: \n\n- Translate this vocal recording into a textual format.",
122
- '15_ASR_IMDA_PART5_30_ASR_v2_4388': "Example Instruction: \n\n- Translate this vocal recording into a textual format.",
123
 
124
- '16_ASR_IMDA_PART6_30_ASR_v2_576': "Example Instruction: \n\n- Record the spoken word in text form.",
125
- '17_ASR_IMDA_PART6_30_ASR_v2_1413': "Example Instruction: \n\n- Record the spoken word in text form.",
126
- '18_ASR_IMDA_PART6_30_ASR_v2_2834': "Example Instruction: \n\n- Record the spoken word in text form.",
127
 
128
- '19_ASR_AIShell_zh_ASR_v2_5044': "Example Instruction: \n\n- Transform the oral presentation into a text document.",
129
 
130
- '20_ASR_LIBRISPEECH_CLEAN_ASR_V2_833': "Example Instruction: \n\n- Please provide a written transcription of the speech.",
131
-
132
- '21_ASR_LIBRISPEECH_OTHER_ASR_V2_656': "Example Instruction: \n\n- Can you make this audio into text?",
133
-
134
- '22_ASR_MEDIACORP_ASR_V2_35': "Example Instruction: \n\n- Transform the audio speech into a written transcript.",
135
-
136
- '23_ASR_MEDIACORP_ASR_V2_6': "Example Instruction: \n\n- Transform the audio speech into a written transcript.",
137
-
138
- '24_ASR_PEOPLES_SPEECH_ASR_V2_21376': "Example Instruction: \n\n- Need this audio turned into a written piece.",
139
-
140
- '25_ST_COVOST2_ZH-CN_EN_ST_V2_4567': "Example Instruction: \n\n- Please translate the given speech to English.",
141
-
142
- '26_ST_COVOST2_EN_ZH-CN_ST_V2_5422': "Example Instruction: \n\n- Please translate the given speech to Chinese.",
143
-
144
- '27_ST_COVOST2_EN_ZH-CN_ST_V2_6697': "Example Instruction: \n\n- Please translate the given speech to Chinese.",
145
-
146
- '28_SI_ALPACA-GPT4-AUDIO_SI_V2_299': "Example Instruction: \n\n- Please follow the instruction in the speech.",
147
-
148
- '29_SI_ALPACA-GPT4-AUDIO_SI_V2_750': "Example Instruction: \n\n- Please follow the instruction in the speech.",
149
-
150
- '30_SI_ALPACA-GPT4-AUDIO_SI_V2_1454': "Example Instruction: \n\n- Please follow the instruction in the speech.",
151
-
152
- '31_SI_OPENHERMES-AUDIO_SI_V2_673': "Example Instruction: \n\n- Please follow the instruction in the speech.",
153
-
154
- '32_SQA_CN_COLLEDGE_ENTRANCE_ENGLISH_TEST_SQA_V2_572': "Example Instruction: \n\n- What does the man think the woman should do at 4:00?",
155
-
156
- '33_SQA_IMDA_PART3_30_SQA_V2_2310': "Example Instruction: \n\n- Does Speaker2's wife cook for Speaker2 when they are at home?",
157
-
158
- '34_SQA_IMDA_PART3_30_SQA_V2_3621': "Example Instruction: \n\n- Does the phrase \"#gai-gai#\" have a meaning in Chinese or Hokkien language?",
159
-
160
- '35_SQA_IMDA_PART3_30_SQA_V2_4062': "Example Instruction: \n\n- What is the color of the vase mentioned in the dialogue?",
161
-
162
- '36_DS_IMDA_PART4_30_DS_V2_849': "Example Instruction: \n\n- Condense the dialogue into a concise summary highlighting major topics and conclusions.",
163
-
164
- '39_Paralingual_IEMOCAP_ER_V2_91': "Example Instruction: \n\n- Based on the speaker's speech patterns, what do you think they are feeling?",
165
-
166
- '40_Paralingual_IEMOCAP_ER_V2_567': "Example Instruction: \n\n- Based on the speaker's speech patterns, what do you think they are feeling?",
167
-
168
- '41_Paralingual_IEMOCAP_ER_V2_468': "Example Instruction: \n\n- Based on the speaker's speech patterns, what do you think they are feeling?",
169
-
170
- '42_Paralingual_IEMOCAP_GR_V2_320': "Example Instruction: \n\n- Is it possible for you to identify whether the speaker in this recording is male or female?",
171
-
172
- '43_Paralingual_IEMOCAP_GR_V2_129': "Example Instruction: \n\n- Is it possible for you to identify whether the speaker in this recording is male or female?",
173
-
174
- '44_Paralingual_IEMOCAP_GR_V2_213': "Example Instruction: \n\n- Is it possible for you to identify whether the speaker in this recording is male or female?",
175
-
176
- '45_Paralingual_IMDA_PART3_30_GR_V2_12312': "Example Instruction: \n\n- So, who's speaking in the second part of the clip? \n\n- So, who's speaking in the first part of the clip?",
177
-
178
- '46_Paralingual_IMDA_PART3_30_GR_V2_1442': "Example Instruction: \n\n- Who starts the conversation in the dialogue?",
179
-
180
- '47_Paralingual_IMDA_PART3_30_NR_V2_10479': "Example Instruction: \n\n- Can you guess which ethnic group this person is from based on their accent?",
181
-
182
- '48_Paralingual_IMDA_PART3_30_NR_V2_15735': "Example Instruction: \n\n- In an analysis of the audio recording, determine the ethnic backgrounds of the speakers based on the accents used.",
183
-
184
- '49_Paralingual_MELD_ER_V2_676': "Example Instruction: \n\n- What emotions do you think the speaker is expressing?",
185
-
186
- '50_Paralingual_MELD_ER_V2_692': "Example Instruction: \n\n- Based on the speaker's speech patterns, what do you think they are feeling?",
187
-
188
- '51_Paralingual_VOXCELEB1_GR_V2_2148': "Example Instruction: \n\n- May I know the gender of the speaker?",
189
-
190
- '52_Paralingual_VOXCELEB1_GR_V2_3282': "Example Instruction: \n\n- I'd appreciate knowing the gender of the speaker, if possible.",
191
-
192
- '53_Paralingual_VOXCELEB1_NR_V2_2286': "Example Instruction: \n\n- What's the nationality identity of the speaker?",
193
-
194
- '54_Paralingual_VOXCELEB1_NR_V2_2742': "Example Instruction: \n\n- I'm intrigued by the speaker's nationality, could you enlighten me?",
195
-
196
- '55_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_2': "Example Instruction: \n\n- What impact would the growth of the healthcare sector have on the country's economy in terms of employment and growth?",
197
-
198
- '56_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_415': "Example Instruction: \n\n- Based on the statement, can you summarize the speaker's position on the recent controversial issues in Singapore?",
199
-
200
- '57_SQA_PUBLIC_SPEECH_SG_TEST_SQA_V2_460': "Example Instruction: \n\n- How does the author respond to parents' worries about masks in schools?"
201
-
202
- }
203
 
204
  audio_sample_names = [audio_sample_name for audio_sample_name in audio_samples_w_instruct.keys()]
205
 
@@ -220,7 +149,6 @@ def audio_llm():
220
  st.session_state.audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
221
  st.session_state.audio_array = bytes_to_array(audio_bytes)
222
 
223
- st.write(st.session_state.default_instruction)
224
  st.audio(st.session_state.audio_array, format="audio/wav", sample_rate=16000)
225
  st.session_state.update(on_upload=False, on_record=False, on_select=False)
226
 
@@ -233,6 +161,9 @@ def audio_llm():
233
  with col5:
234
  st.slider(label='Top P', min_value=0.0, max_value=1.0, value=1.0, key='top_p')
235
 
 
 
 
236
  st.markdown(
237
  """
238
  <style>
 
101
 
102
  with col3:
103
  audio_samples_w_instruct = {
104
+ '1_ASR_IMDA_PART1_ASR_v2_141' : "- Turn the spoken language into a text format.\n\n- Please translate the content into Chinese.",
105
+ '2_ASR_IMDA_PART1_ASR_v2_2258': "- Turn the spoken language into a text format.\n\n- Please translate the content into Chinese.",
106
+ '3_ASR_IMDA_PART1_ASR_v2_2265': "- Turn the spoken language into a text format.",
107
 
108
+ '4_ASR_IMDA_PART2_ASR_v2_999' : "- Translate the spoken words into text format.",
109
+ '5_ASR_IMDA_PART2_ASR_v2_2241': "- Translate the spoken words into text format.",
110
+ '6_ASR_IMDA_PART2_ASR_v2_3409': "- Translate the spoken words into text format.",
111
 
112
+ '7_ASR_IMDA_PART3_30_ASR_v2_2269': "- Need this talk written down, please.",
113
+ '8_ASR_IMDA_PART3_30_ASR_v2_1698': "- Need this talk written down, please.",
114
+ '9_ASR_IMDA_PART3_30_ASR_v2_2474': "- Need this talk written down, please.",
115
 
116
+ '10_ASR_IMDA_PART4_30_ASR_v2_1527': "- Write out the dialogue as text.",
117
+ '11_ASR_IMDA_PART4_30_ASR_v2_3771': "- Write out the dialogue as text.",
118
+ '12_ASR_IMDA_PART4_30_ASR_v2_103' : "- Write out the dialogue as text.",
119
 
120
+ '13_ASR_IMDA_PART5_30_ASR_v2_1446': "- Translate this vocal recording into a textual format.",
121
+ '14_ASR_IMDA_PART5_30_ASR_v2_2281': "- Translate this vocal recording into a textual format.",
122
+ '15_ASR_IMDA_PART5_30_ASR_v2_4388': "- Translate this vocal recording into a textual format.",
123
 
124
+ '16_ASR_IMDA_PART6_30_ASR_v2_576': "- Record the spoken word in text form.",
125
+ '17_ASR_IMDA_PART6_30_ASR_v2_1413': "- Record the spoken word in text form.",
126
+ '18_ASR_IMDA_PART6_30_ASR_v2_2834': "- Record the spoken word in text form.",
127
 
128
+ '19_ASR_AIShell_zh_ASR_v2_5044': "- Transform the oral presentation into a text document.",
129
 
130
+ '20_ASR_LIBRISPEECH_CLEAN_ASR_V2_833': "- Please provide a written transcription of the speech."
131
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  audio_sample_names = [audio_sample_name for audio_sample_name in audio_samples_w_instruct.keys()]
134
 
 
149
  st.session_state.audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
150
  st.session_state.audio_array = bytes_to_array(audio_bytes)
151
 
 
152
  st.audio(st.session_state.audio_array, format="audio/wav", sample_rate=16000)
153
  st.session_state.update(on_upload=False, on_record=False, on_select=False)
154
 
 
161
  with col5:
162
  st.slider(label='Top P', min_value=0.0, max_value=1.0, value=1.0, key='top_p')
163
 
164
+ st.markdown("**Example Instruction:**")
165
+ st.write(st.session_state.default_instruction)
166
+
167
  st.markdown(
168
  """
169
  <style>