Spaces:
Running
Running
# frontend.py | |
import gradio as gr | |
import httpx | |
examples = [ | |
["Can you turn my English into German?", "./show_case/common_voice_en_19664034.mp3"], # En-De | |
["Can you identify the initial word that connects to 'currency_name' in this audio clip?", "./show_case/audio-1434542201-headset.wav"], # ER | |
["What do you think the speaker's message is intended to be in this audio?", "./show_case/audio-1434542201-headset.wav"], # IC | |
["What does the person say?", "./show_case/p225_002.wav"], # DFake | |
# ["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/Real.wav"], # DFake | |
["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/Fake.wav"], # DFake | |
["What emotional weight does the speaker's tone carry?\nPick one answer from A, B, C, and D.\nA: fear\nB: sadness\nC: joy\nD: neutral", "./show_case/SER(emotion)_example.wav"], #SER(emotion) | |
# ["Assess whether this speech's pronunciation is Real or Fake.", "./show_case/SVD_14154_file31512.mp3.wav_16k.wav_norm.wav_mono.wav_silence.wav"], # SVD | |
["Choose the most suitable answer from options A, B, C, and D to respond the question in next line, you may only choose A or B or C or D.\nThe number of speakers delivering this speech is what?\nA. 4\nB. 2\nC.1\nD. 3", "./show_case/SNV_example.wav"], #SNV | |
["Identify the language of the conversation you just heard.","./show_case/SLR_example.wav"], #SLR | |
["tell the gender of the speaker in this audio.","./show_case/SGR_018.wav"], #SGR | |
["What's the sound we're hearing in this audio from?","./show_case/Sound_Vocal_example.wav"], #Sound_vocal | |
["What is your best guess at the setting of this sound clip?","./show_case/Scene_example.wav"], #Sound_cochl | |
["Choose the most suitable answer from options A, B, C, and D to respond the question in next line, Please think step by step and you may only choose A or B or C or D.\nRecognize the segment where 'project' is spoken by the speaker.\nA. [5.28, 5.39]\nB. [0.92, 1.39]\nC. [4.75, 5.28]\nD. [3.86, 4.23]","./show_case/SG_audio_1.wav"], #SG | |
["What type of business does the first person's son have?","./show_case/SFT_Fisher_example.wav"] #SFT_Fisher | |
] | |
async def call_api(text: str, audio_path: str): | |
# 读取音频文件 | |
with open(audio_path, "rb") as f: | |
audio_bytes = f.read() | |
# 发送到后端API | |
async with httpx.AsyncClient() as client: | |
files = {"audio_file": (audio_path, audio_bytes)} | |
data = {"text": text} | |
response = await client.post( | |
"http://36.151.70.8:30113/process/", | |
files=files, | |
data=data | |
) | |
return response.json()["result"] | |
iface = gr.Interface( | |
fn=call_api, | |
inputs=[ | |
gr.Textbox(label="Enter text instruction", value="What does the person say?"), | |
gr.Audio(type="filepath", label="Upload Audio", value="./show_case/p225_002.wav") | |
], | |
outputs=gr.Textbox(label="Model output"), | |
examples=examples, | |
allow_flagging="never" | |
) | |
iface.launch() | |
if __name__ == '__main__': | |
# curl -X POST -F "text=What does the person say?" -F "audio_file=@./test_audio.wav" http://36.151.70.8:30113/process/ | |
pass | |