diff --git a/.gitattributes b/.gitattributes index a9259ff0147e4ff3cc1677a41c974d0babde90f3..ea72b65b2004bbf03fdfc511e3568eb09e6a64d3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -60,3 +60,21 @@ examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav filter=lfs diff=lfs merge=l examples/SQA/Spoken-Squad-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text examples/SQA/Spoken-Squad-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text examples/SQA/Spoken-Squad-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Earnings21-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Earnings21-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Earnings21-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Earnings22-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Earnings22-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Earnings22-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Tedlium3-Long-form-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Tedlium3-Long-form-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Tedlium3-Long-form-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/CN-College-Listen-MCQ-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/Public-SG-Speech-QA-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/SLUE-P2-SQA5-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/SLUE-P2-SQA5-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/SLUE-P2-SQA5-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/Spoken-Squad-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/Spoken-Squad-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/Spoken-Squad-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text diff --git a/examples/2AC/AudioCaps-Test/data-00000-of-00001.arrow b/examples/2AC/AudioCaps-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..58406c24c9292ee43c93960c4478e4d05fb95f80 --- /dev/null +++ b/examples/2AC/AudioCaps-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70213ecc31962d6d8bbc0e4d7ae2dd302c851a4af00f12b07735311f5f128288 +size 966216 diff --git a/examples/2AC/AudioCaps-Test/dataset_info.json b/examples/2AC/AudioCaps-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..82148686a795bb258e6676260855fb8cf9ef19e4 --- /dev/null +++ b/examples/2AC/AudioCaps-Test/dataset_info.json @@ -0,0 +1,160 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audiocap_id": { + "dtype": "string", + "_type": "Value" + }, + "start_time": { + "dtype": "string", + "_type": "Value" + }, + "youtube_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2AC/AudioCaps-Test/sample_0.wav b/examples/2AC/AudioCaps-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..4b2f8047fa38f9ba3acef7485b26ea02f4ada359 Binary files /dev/null and b/examples/2AC/AudioCaps-Test/sample_0.wav differ diff --git a/examples/2AC/AudioCaps-Test/sample_1.wav b/examples/2AC/AudioCaps-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..8724df3f01ccd22778f84a7a851871f1d73434fe Binary files /dev/null and b/examples/2AC/AudioCaps-Test/sample_1.wav differ diff --git a/examples/2AC/AudioCaps-Test/sample_2.wav b/examples/2AC/AudioCaps-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..48d08b19be5f0904ca976c35fbe4ae4d6c19435f Binary files /dev/null and b/examples/2AC/AudioCaps-Test/sample_2.wav differ diff --git a/examples/2AC/AudioCaps-Test/state.json b/examples/2AC/AudioCaps-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..fcc438d7c2db870c1b636d8299a9d843d607fbc6 --- /dev/null +++ b/examples/2AC/AudioCaps-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e736bf1821a473f3", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2AC/WavCaps-Test/data-00000-of-00001.arrow b/examples/2AC/WavCaps-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..ad78b874ff3abccb165ea8ef522d91d8c0f06b48 --- /dev/null +++ b/examples/2AC/WavCaps-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9636af636286b1aedad840ccef31ca7d879e824ddc6814fcb7411b7fbdf411aa +size 812352 diff --git a/examples/2AC/WavCaps-Test/dataset_info.json b/examples/2AC/WavCaps-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..acf11db0c4cc1869f7763270ddadbfe4c30f73d4 --- /dev/null +++ b/examples/2AC/WavCaps-Test/dataset_info.json @@ -0,0 +1,156 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audio_path": { + "dtype": "string", + "_type": "Value" + }, + "duration": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2AC/WavCaps-Test/sample_0.wav b/examples/2AC/WavCaps-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..ad8d45455c35860d7309e0554c6610ba6ddccb68 Binary files /dev/null and b/examples/2AC/WavCaps-Test/sample_0.wav differ diff --git a/examples/2AC/WavCaps-Test/sample_1.wav b/examples/2AC/WavCaps-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..ea10461620e829d47fb78bf4d827b95322791340 Binary files /dev/null and b/examples/2AC/WavCaps-Test/sample_1.wav differ diff --git a/examples/2AC/WavCaps-Test/sample_2.wav b/examples/2AC/WavCaps-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..b7fef91fbedf60a5d58f4f9fb93d95c1e205bf67 Binary files /dev/null and b/examples/2AC/WavCaps-Test/sample_2.wav differ diff --git a/examples/2AC/WavCaps-Test/state.json b/examples/2AC/WavCaps-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..0d52b23c8e5b93506a8af809adf9680c9cc7bf86 --- /dev/null +++ b/examples/2AC/WavCaps-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "742ab313af054565", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2AQA/AudioCaps-QA-Test/data-00000-of-00001.arrow b/examples/2AQA/AudioCaps-QA-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..91a95941f9251c2e3abe654fa45035c8015d364b --- /dev/null +++ b/examples/2AQA/AudioCaps-QA-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92b0b2fe81ee0e3a2690a444bb9b68994d89ca53db6ce174f5802293549256d1 +size 953616 diff --git a/examples/2AQA/AudioCaps-QA-Test/dataset_info.json b/examples/2AQA/AudioCaps-QA-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..c6d61c8e72325cf36fabc952fbec1ca42e49e5e8 --- /dev/null +++ b/examples/2AQA/AudioCaps-QA-Test/dataset_info.json @@ -0,0 +1,164 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audiocap_id": { + "dtype": "string", + "_type": "Value" + }, + "caption": { + "dtype": "string", + "_type": "Value" + }, + "start_time": { + "dtype": "string", + "_type": "Value" + }, + "youtube_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2AQA/AudioCaps-QA-Test/sample_0.wav b/examples/2AQA/AudioCaps-QA-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..2eec3251fe8dc9acf17f43f66f187a277cf6c6b0 Binary files /dev/null and b/examples/2AQA/AudioCaps-QA-Test/sample_0.wav differ diff --git a/examples/2AQA/AudioCaps-QA-Test/sample_1.wav b/examples/2AQA/AudioCaps-QA-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..f7e101c5918451111738962b722e47041dd59227 Binary files /dev/null and b/examples/2AQA/AudioCaps-QA-Test/sample_1.wav differ diff --git a/examples/2AQA/AudioCaps-QA-Test/sample_2.wav b/examples/2AQA/AudioCaps-QA-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..cb15b2ffff83c6ec5541c8b54a8205d58292a2d3 Binary files /dev/null and b/examples/2AQA/AudioCaps-QA-Test/sample_2.wav differ diff --git a/examples/2AQA/AudioCaps-QA-Test/state.json b/examples/2AQA/AudioCaps-QA-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..267d6cbee3f52f7b8f77f20b959ca9ce159aed16 --- /dev/null +++ b/examples/2AQA/AudioCaps-QA-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "52bc1dfcaf2a0f4b", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2AQA/Clotho-AQA-Test/data-00000-of-00001.arrow b/examples/2AQA/Clotho-AQA-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..8b76eb63bc6a3adcfad98e20600a07bc5cf84e1b --- /dev/null +++ b/examples/2AQA/Clotho-AQA-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08b4de88bf163bbfd2097769e6104ba8514380a7a097741d38e1ccc41d5b0f86 +size 2035832 diff --git a/examples/2AQA/Clotho-AQA-Test/dataset_info.json b/examples/2AQA/Clotho-AQA-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1b05abb4dce6b496c0a3c6043f27e4ca1f225320 --- /dev/null +++ b/examples/2AQA/Clotho-AQA-Test/dataset_info.json @@ -0,0 +1,147 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": {}, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2AQA/Clotho-AQA-Test/sample_0.wav b/examples/2AQA/Clotho-AQA-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..d6a07e6172778d85080c04531658efc7443ae03d Binary files /dev/null and b/examples/2AQA/Clotho-AQA-Test/sample_0.wav differ diff --git a/examples/2AQA/Clotho-AQA-Test/sample_1.wav b/examples/2AQA/Clotho-AQA-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..139bd3226ca457718b3cdab6d1e7a99dd5e4bd01 Binary files /dev/null and b/examples/2AQA/Clotho-AQA-Test/sample_1.wav differ diff --git a/examples/2AQA/Clotho-AQA-Test/sample_2.wav b/examples/2AQA/Clotho-AQA-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..b62a8feb71883e7540f521d41ac6e3eefe3862a3 Binary files /dev/null and b/examples/2AQA/Clotho-AQA-Test/sample_2.wav differ diff --git a/examples/2AQA/Clotho-AQA-Test/state.json b/examples/2AQA/Clotho-AQA-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..c58228cce70f0b257254856751c37d68dd8cd64f --- /dev/null +++ b/examples/2AQA/Clotho-AQA-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e2e76326f448d7c4", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2AQA/WavCaps-QA-Test/data-00000-of-00001.arrow b/examples/2AQA/WavCaps-QA-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..5ce3c3745ad16319f0acfb100c443202a55485af --- /dev/null +++ b/examples/2AQA/WavCaps-QA-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1736d5bcc9ca0d8d4847d7d000e6c6e63c73f6262177ea0391d180c40649da39 +size 837920 diff --git a/examples/2AQA/WavCaps-QA-Test/dataset_info.json b/examples/2AQA/WavCaps-QA-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..72ceb742ffcaf0f6ff67811fa628b1e1c7a1167e --- /dev/null +++ b/examples/2AQA/WavCaps-QA-Test/dataset_info.json @@ -0,0 +1,160 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audio_path": { + "dtype": "string", + "_type": "Value" + }, + "caption": { + "dtype": "string", + "_type": "Value" + }, + "duration": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2AQA/WavCaps-QA-Test/sample_0.wav b/examples/2AQA/WavCaps-QA-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..7639cdca2866a648ce90b4f5e385e3e6dc56c04a Binary files /dev/null and b/examples/2AQA/WavCaps-QA-Test/sample_0.wav differ diff --git a/examples/2AQA/WavCaps-QA-Test/sample_1.wav b/examples/2AQA/WavCaps-QA-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..d2cc1a6def6014328e02ea5ea25019414f8960b4 Binary files /dev/null and b/examples/2AQA/WavCaps-QA-Test/sample_1.wav differ diff --git a/examples/2AQA/WavCaps-QA-Test/sample_2.wav b/examples/2AQA/WavCaps-QA-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..9629f69dd6d6b8a713b9122b03ee04ec4aae8857 Binary files /dev/null and b/examples/2AQA/WavCaps-QA-Test/sample_2.wav differ diff --git a/examples/2AQA/WavCaps-QA-Test/state.json b/examples/2AQA/WavCaps-QA-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..a85966493650dfae84811c91b9f42d7c2811ad9c --- /dev/null +++ b/examples/2AQA/WavCaps-QA-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "40995a6cc1fe3dc7", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2AR/VoxCeleb-Accent-Test/data-00000-of-00001.arrow b/examples/2AR/VoxCeleb-Accent-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..4d554c3929f3b9461fca53182975f730f3e8566d --- /dev/null +++ b/examples/2AR/VoxCeleb-Accent-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5307150e5b08f9bfd7b81d56ca7ffbee1d731d002417d20fc3ec0713bc764533 +size 730864 diff --git a/examples/2AR/VoxCeleb-Accent-Test/dataset_info.json b/examples/2AR/VoxCeleb-Accent-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..913e85b36737f9004f81286043e7493d6f61b737 --- /dev/null +++ b/examples/2AR/VoxCeleb-Accent-Test/dataset_info.json @@ -0,0 +1,168 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Gender": { + "dtype": "string", + "_type": "Value" + }, + "Nationality": { + "dtype": "string", + "_type": "Value" + }, + "VGGFace1 ID": { + "dtype": "string", + "_type": "Value" + }, + "VoxCeleb1 ID": { + "dtype": "string", + "_type": "Value" + }, + "index": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2AR/VoxCeleb-Accent-Test/sample_0.wav b/examples/2AR/VoxCeleb-Accent-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..ae8061522976216887910263ba9414a7e60685eb Binary files /dev/null and b/examples/2AR/VoxCeleb-Accent-Test/sample_0.wav differ diff --git a/examples/2AR/VoxCeleb-Accent-Test/sample_1.wav b/examples/2AR/VoxCeleb-Accent-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..209cf78dcde2791b463e32f9a7245514655790aa Binary files /dev/null and b/examples/2AR/VoxCeleb-Accent-Test/sample_1.wav differ diff --git a/examples/2AR/VoxCeleb-Accent-Test/sample_2.wav b/examples/2AR/VoxCeleb-Accent-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..0d44634575f30bfe9c2fa2c2765ae34c192c9c90 Binary files /dev/null and b/examples/2AR/VoxCeleb-Accent-Test/sample_2.wav differ diff --git a/examples/2AR/VoxCeleb-Accent-Test/state.json b/examples/2AR/VoxCeleb-Accent-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..5354f99a49cf31f0949fd2359846d46859efd5ff --- /dev/null +++ b/examples/2AR/VoxCeleb-Accent-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "fa91a59f90c22c3c", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow b/examples/2ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..4560d6338d0bda974170bff980b354867e681e90 --- /dev/null +++ b/examples/2ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9a561ce9aed8ba4c02f86c90883029e1bb566e2b66986b17874f3bb5884d67d +size 489552 diff --git a/examples/2ASR/Common-Voice-15-En-Test/dataset_info.json b/examples/2ASR/Common-Voice-15-En-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..08f8bd6abcb7df02ab18d592990cc082baa8bfa3 --- /dev/null +++ b/examples/2ASR/Common-Voice-15-En-Test/dataset_info.json @@ -0,0 +1,188 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "accents": { + "dtype": "null", + "_type": "Value" + }, + "age": { + "dtype": "null", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "down_votes": { + "dtype": "int64", + "_type": "Value" + }, + "gender": { + "dtype": "null", + "_type": "Value" + }, + "language": { + "dtype": "string", + "_type": "Value" + }, + "locale": { + "dtype": "string", + "_type": "Value" + }, + "segment": { + "dtype": "null", + "_type": "Value" + }, + "up_votes": { + "dtype": "int64", + "_type": "Value" + }, + "variant": { + "dtype": "null", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/Common-Voice-15-En-Test/sample_0.wav b/examples/2ASR/Common-Voice-15-En-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..42753b756f05c733803356b486de2df1b1224de4 Binary files /dev/null and b/examples/2ASR/Common-Voice-15-En-Test/sample_0.wav differ diff --git a/examples/2ASR/Common-Voice-15-En-Test/sample_1.wav b/examples/2ASR/Common-Voice-15-En-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..643e1d9e9e461c2465856a18fbf89bb27f577a18 Binary files /dev/null and b/examples/2ASR/Common-Voice-15-En-Test/sample_1.wav differ diff --git a/examples/2ASR/Common-Voice-15-En-Test/sample_2.wav b/examples/2ASR/Common-Voice-15-En-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..eb0894ce127ebe7c2fadb5b11feea3e5b0ace14f Binary files /dev/null and b/examples/2ASR/Common-Voice-15-En-Test/sample_2.wav differ diff --git a/examples/2ASR/Common-Voice-15-En-Test/state.json b/examples/2ASR/Common-Voice-15-En-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..1ff74abf94d1cbf9804c3911eac7edf199fb36a2 --- /dev/null +++ b/examples/2ASR/Common-Voice-15-En-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "468db91ad949e4d4", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/Earnings21-Test/data-00000-of-00001.arrow b/examples/2ASR/Earnings21-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..543eb95281d82eea0d8930770f2a47b8bd52340c --- /dev/null +++ b/examples/2ASR/Earnings21-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fbca261ae8ac8ccc96993ea11f17836bdcfef1070835784f159b79990a5a298 +size 429108160 diff --git a/examples/2ASR/Earnings21-Test/dataset_info.json b/examples/2ASR/Earnings21-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1bf27e3aaa89f2fa43812252ac2377fab8ae1708 --- /dev/null +++ b/examples/2ASR/Earnings21-Test/dataset_info.json @@ -0,0 +1,152 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/Earnings21-Test/sample_0.wav b/examples/2ASR/Earnings21-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..877a0f85e77ae3ccb28f14601c2e8765e945c6d8 --- /dev/null +++ b/examples/2ASR/Earnings21-Test/sample_0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8fb994964e1b0df9f4675ceaa73d55da55a096f5b94d002d9f7b07c997fc83e +size 97593644 diff --git a/examples/2ASR/Earnings21-Test/sample_1.wav b/examples/2ASR/Earnings21-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..e20907a027c9da25fc6544ec87f07185b23c3567 --- /dev/null +++ b/examples/2ASR/Earnings21-Test/sample_1.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd6ba77731011a6dc02e5854a600a2036713be4c2d71abf63fd6a89b86083c4f +size 178791280 diff --git a/examples/2ASR/Earnings21-Test/sample_2.wav b/examples/2ASR/Earnings21-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..0924db55e5560d30768fc9fa8a6e5931f475a642 --- /dev/null +++ b/examples/2ASR/Earnings21-Test/sample_2.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a1d15425069b003730e79f0df467103f4ac7670f87a0539a97c82973a02943e +size 150700076 diff --git a/examples/2ASR/Earnings21-Test/state.json b/examples/2ASR/Earnings21-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..ce1987be7235db123fcdddfe4a75272abf7513d4 --- /dev/null +++ b/examples/2ASR/Earnings21-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "8cc0ad99446f1aba", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/Earnings22-Test/data-00000-of-00001.arrow b/examples/2ASR/Earnings22-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..803b349472ac02701fad19e91bdc02942939196d --- /dev/null +++ b/examples/2ASR/Earnings22-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:497dd6d287df9a8be5194b7875ae88f275127986d3fc538601382e80244bbb7b +size 332277848 diff --git a/examples/2ASR/Earnings22-Test/dataset_info.json b/examples/2ASR/Earnings22-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1bf27e3aaa89f2fa43812252ac2377fab8ae1708 --- /dev/null +++ b/examples/2ASR/Earnings22-Test/dataset_info.json @@ -0,0 +1,152 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/Earnings22-Test/sample_0.wav b/examples/2ASR/Earnings22-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..c31c73e0e078edb570b30c9ddcd8679e40ac62dd --- /dev/null +++ b/examples/2ASR/Earnings22-Test/sample_0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6530f950b07b4747ca26fa70586ec563cf20c222cab6e53fc2e94e28f7a3d867 +size 167237960 diff --git a/examples/2ASR/Earnings22-Test/sample_1.wav b/examples/2ASR/Earnings22-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..e7fc4350e24534cdda9135085b77b08356adf39b --- /dev/null +++ b/examples/2ASR/Earnings22-Test/sample_1.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c4034503f1974ecfdbd050df7e75d97cbae4933ea62af829eefe6e17295c6f4 +size 38863630 diff --git a/examples/2ASR/Earnings22-Test/sample_2.wav b/examples/2ASR/Earnings22-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..12c42f881e73d8b6f1905855b844c1acc816553f --- /dev/null +++ b/examples/2ASR/Earnings22-Test/sample_2.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba099990e90f3db2901b585ce601b71960ab54ef76f5bf143a74d816ea615f07 +size 124561076 diff --git a/examples/2ASR/Earnings22-Test/state.json b/examples/2ASR/Earnings22-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..ac26bdba683e345445fdeb81908f03626f2ced73 --- /dev/null +++ b/examples/2ASR/Earnings22-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "331c061bce6e651c", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/GigaSpeech-Test2/data-00000-of-00001.arrow b/examples/2ASR/GigaSpeech-Test2/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..9f584469430bb4f4aecca47ac52ea723be454e83 --- /dev/null +++ b/examples/2ASR/GigaSpeech-Test2/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e46b3463f6df894fa055f08fa9ea1f44b4cea43a1be43af2a2bd8c5e8fee3ec5 +size 573048 diff --git a/examples/2ASR/GigaSpeech-Test2/dataset_info.json b/examples/2ASR/GigaSpeech-Test2/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1dd0025578e934e74b979da9e81789eedd9a2f29 --- /dev/null +++ b/examples/2ASR/GigaSpeech-Test2/dataset_info.json @@ -0,0 +1,180 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audio_id": { + "dtype": "string", + "_type": "Value" + }, + "begin_time": { + "dtype": "float64", + "_type": "Value" + }, + "category": { + "dtype": "int64", + "_type": "Value" + }, + "end_time": { + "dtype": "float64", + "_type": "Value" + }, + "segment_id": { + "dtype": "string", + "_type": "Value" + }, + "source": { + "dtype": "int64", + "_type": "Value" + }, + "speaker": { + "dtype": "string", + "_type": "Value" + }, + "url": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/GigaSpeech-Test2/sample_0.wav b/examples/2ASR/GigaSpeech-Test2/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..0d232b1996ee07bc47f24d06fe8b860ee1b63b11 Binary files /dev/null and b/examples/2ASR/GigaSpeech-Test2/sample_0.wav differ diff --git a/examples/2ASR/GigaSpeech-Test2/sample_1.wav b/examples/2ASR/GigaSpeech-Test2/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..ee5fced84fd0691c2b4b288be9a5ad73ac67bc6e Binary files /dev/null and b/examples/2ASR/GigaSpeech-Test2/sample_1.wav differ diff --git a/examples/2ASR/GigaSpeech-Test2/sample_2.wav b/examples/2ASR/GigaSpeech-Test2/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..2a9edb2ea3e8a96c7ab70b232249205e765f02d0 Binary files /dev/null and b/examples/2ASR/GigaSpeech-Test2/sample_2.wav differ diff --git a/examples/2ASR/GigaSpeech-Test2/state.json b/examples/2ASR/GigaSpeech-Test2/state.json new file mode 100644 index 0000000000000000000000000000000000000000..8bd5fd3d45201fc6807cb2364c48d5ba722bb4bf --- /dev/null +++ b/examples/2ASR/GigaSpeech-Test2/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "68d371cc267ff1d2", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/IMDA-Part1-ASR-Test/data-00000-of-00001.arrow b/examples/2ASR/IMDA-Part1-ASR-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..247e84a3ceefb4298605f6f8348e8ce97064e8b2 --- /dev/null +++ b/examples/2ASR/IMDA-Part1-ASR-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ae1f6a0a5d17026374d88e7219dd33b884793985b371f3a6463755e4421f44e +size 427360 diff --git a/examples/2ASR/IMDA-Part1-ASR-Test/dataset_info.json b/examples/2ASR/IMDA-Part1-ASR-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ede02d74b595679e2a6f3b2256ab4c69e535f09e --- /dev/null +++ b/examples/2ASR/IMDA-Part1-ASR-Test/dataset_info.json @@ -0,0 +1,200 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "conversation_id": { + "dtype": "string", + "_type": "Value" + }, + "partition": { + "dtype": "string", + "_type": "Value" + }, + "settings": { + "channel": { + "dtype": "string", + "_type": "Value" + }, + "session": { + "dtype": "string", + "_type": "Value" + } + }, + "speaker": { + "device_c0": { + "dtype": "string", + "_type": "Value" + }, + "device_c1": { + "dtype": "string", + "_type": "Value" + }, + "device_c2": { + "dtype": "string", + "_type": "Value" + }, + "ethnic_group": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "string", + "_type": "Value" + }, + "part1_id": { + "dtype": "string", + "_type": "Value" + }, + "part2_id": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/IMDA-Part1-ASR-Test/sample_0.wav b/examples/2ASR/IMDA-Part1-ASR-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..b100f0ace853e729d509b43b5b9e00601dbbc4e8 Binary files /dev/null and b/examples/2ASR/IMDA-Part1-ASR-Test/sample_0.wav differ diff --git a/examples/2ASR/IMDA-Part1-ASR-Test/sample_1.wav b/examples/2ASR/IMDA-Part1-ASR-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..adbeb1917d8397ebb3ab9b6b216c607bfe62e881 Binary files /dev/null and b/examples/2ASR/IMDA-Part1-ASR-Test/sample_1.wav differ diff --git a/examples/2ASR/IMDA-Part1-ASR-Test/sample_2.wav b/examples/2ASR/IMDA-Part1-ASR-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..4f4800df16ae7a179504dcfe0c30468936287ae4 Binary files /dev/null and b/examples/2ASR/IMDA-Part1-ASR-Test/sample_2.wav differ diff --git a/examples/2ASR/IMDA-Part1-ASR-Test/state.json b/examples/2ASR/IMDA-Part1-ASR-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..03121b2bd71d513ba2edeec803b648f34087ad6e --- /dev/null +++ b/examples/2ASR/IMDA-Part1-ASR-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "ddfb47abed13c356", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/IMDA-Part2-ASR-Test2/data-00000-of-00001.arrow b/examples/2ASR/IMDA-Part2-ASR-Test2/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..896c4fc75e607933d5155b06f9d4c67812c71847 --- /dev/null +++ b/examples/2ASR/IMDA-Part2-ASR-Test2/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6203852fc63ee5b00dd73f6857d2a22a3f8a8dc3b87917bb99d1cef8448150a5 +size 438120 diff --git a/examples/2ASR/IMDA-Part2-ASR-Test2/dataset_info.json b/examples/2ASR/IMDA-Part2-ASR-Test2/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..af0260da818c604e190e3b198aebef1a391419ec --- /dev/null +++ b/examples/2ASR/IMDA-Part2-ASR-Test2/dataset_info.json @@ -0,0 +1,92 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "conversation_id": { + "dtype": "string", + "_type": "Value" + }, + "partition": { + "dtype": "string", + "_type": "Value" + }, + "settings": { + "channel": { + "dtype": "string", + "_type": "Value" + }, + "session": { + "dtype": "string", + "_type": "Value" + } + }, + "speaker": { + "device_c0": { + "dtype": "string", + "_type": "Value" + }, + "device_c1": { + "dtype": "string", + "_type": "Value" + }, + "device_c2": { + "dtype": "string", + "_type": "Value" + }, + "ethnic_group": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "string", + "_type": "Value" + }, + "part1_id": { + "dtype": "string", + "_type": "Value" + }, + "part2_id": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/IMDA-Part2-ASR-Test2/sample_0.wav b/examples/2ASR/IMDA-Part2-ASR-Test2/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..d79fde19cec7b066b9f3f546d6ff6366dfa4daef Binary files /dev/null and b/examples/2ASR/IMDA-Part2-ASR-Test2/sample_0.wav differ diff --git a/examples/2ASR/IMDA-Part2-ASR-Test2/sample_1.wav b/examples/2ASR/IMDA-Part2-ASR-Test2/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..9fac54a02b8b9ad1ee6502ef41ef1d50a7213de5 Binary files /dev/null and b/examples/2ASR/IMDA-Part2-ASR-Test2/sample_1.wav differ diff --git a/examples/2ASR/IMDA-Part2-ASR-Test2/sample_2.wav b/examples/2ASR/IMDA-Part2-ASR-Test2/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..d866ad7a1ddfc3541ce3adbecf00f688489cdd3f Binary files /dev/null and b/examples/2ASR/IMDA-Part2-ASR-Test2/sample_2.wav differ diff --git a/examples/2ASR/IMDA-Part2-ASR-Test2/state.json b/examples/2ASR/IMDA-Part2-ASR-Test2/state.json new file mode 100644 index 0000000000000000000000000000000000000000..3c6b051944e6842913bce863ca9c239da3f8e0be --- /dev/null +++ b/examples/2ASR/IMDA-Part2-ASR-Test2/state.json @@ -0,0 +1,18 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "9f3d440792a605d2", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/LibriSpeech-Test-Clean/data-00000-of-00001.arrow b/examples/2ASR/LibriSpeech-Test-Clean/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..ba9c5364de7c43d55140acfff92587f682f7c260 --- /dev/null +++ b/examples/2ASR/LibriSpeech-Test-Clean/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e050bcd936107ea58a083a64fc36c85afc55b1f570d8285e5eb93b4244ca3702 +size 491728 diff --git a/examples/2ASR/LibriSpeech-Test-Clean/dataset_info.json b/examples/2ASR/LibriSpeech-Test-Clean/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..5a91f71cfb2044e6060c8f395ee4b798384d32d8 --- /dev/null +++ b/examples/2ASR/LibriSpeech-Test-Clean/dataset_info.json @@ -0,0 +1,164 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/LibriSpeech-Test-Clean/sample_0.wav b/examples/2ASR/LibriSpeech-Test-Clean/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..051a14c05f7c270da3d842024c5936075cb5c2e6 Binary files /dev/null and b/examples/2ASR/LibriSpeech-Test-Clean/sample_0.wav differ diff --git a/examples/2ASR/LibriSpeech-Test-Clean/sample_1.wav b/examples/2ASR/LibriSpeech-Test-Clean/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..30816d1c205dd136109c6abfca19abf249813c68 Binary files /dev/null and b/examples/2ASR/LibriSpeech-Test-Clean/sample_1.wav differ diff --git a/examples/2ASR/LibriSpeech-Test-Clean/sample_2.wav b/examples/2ASR/LibriSpeech-Test-Clean/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..a228ce116181b6b19b741cd9ffc2e1853704adc5 Binary files /dev/null and b/examples/2ASR/LibriSpeech-Test-Clean/sample_2.wav differ diff --git a/examples/2ASR/LibriSpeech-Test-Clean/state.json b/examples/2ASR/LibriSpeech-Test-Clean/state.json new file mode 100644 index 0000000000000000000000000000000000000000..dda0f24f40bffbb5dbb1f236bc44f6e715655e67 --- /dev/null +++ b/examples/2ASR/LibriSpeech-Test-Clean/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "d9f5d173c305ae96", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/LibriSpeech-Test-Other/data-00000-of-00001.arrow b/examples/2ASR/LibriSpeech-Test-Other/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..f962e9f689dbb890c217156e0c6cbaddb2780490 --- /dev/null +++ b/examples/2ASR/LibriSpeech-Test-Other/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2375da90470a7502211721ccf088ed76808ef698133028c7a74e27606d6f3949 +size 1025488 diff --git a/examples/2ASR/LibriSpeech-Test-Other/dataset_info.json b/examples/2ASR/LibriSpeech-Test-Other/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..5a91f71cfb2044e6060c8f395ee4b798384d32d8 --- /dev/null +++ b/examples/2ASR/LibriSpeech-Test-Other/dataset_info.json @@ -0,0 +1,164 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/LibriSpeech-Test-Other/sample_0.wav b/examples/2ASR/LibriSpeech-Test-Other/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..c274d02bd3392db09ea1a95bb050a248627b91cc Binary files /dev/null and b/examples/2ASR/LibriSpeech-Test-Other/sample_0.wav differ diff --git a/examples/2ASR/LibriSpeech-Test-Other/sample_1.wav b/examples/2ASR/LibriSpeech-Test-Other/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..eea20cd08457ea3ae2d55e91c7240602ae30436f Binary files /dev/null and b/examples/2ASR/LibriSpeech-Test-Other/sample_1.wav differ diff --git a/examples/2ASR/LibriSpeech-Test-Other/sample_2.wav b/examples/2ASR/LibriSpeech-Test-Other/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..b7cc7a7ab0cb2cb02a5253dcecede16c688acf86 Binary files /dev/null and b/examples/2ASR/LibriSpeech-Test-Other/sample_2.wav differ diff --git a/examples/2ASR/LibriSpeech-Test-Other/state.json b/examples/2ASR/LibriSpeech-Test-Other/state.json new file mode 100644 index 0000000000000000000000000000000000000000..952838bee518f052b1de767f99c58c1282b17596 --- /dev/null +++ b/examples/2ASR/LibriSpeech-Test-Other/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "8bd0648dc412be04", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/Peoples-Speech-Test/data-00000-of-00001.arrow b/examples/2ASR/Peoples-Speech-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..900558873c787e1b6e8cecf5934a8b61fa5237e4 --- /dev/null +++ b/examples/2ASR/Peoples-Speech-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4299dbcf7bfc7b223f4a653fda3176a8d3e24c09581a41a806b86fc360527bb4 +size 439504 diff --git a/examples/2ASR/Peoples-Speech-Test/dataset_info.json b/examples/2ASR/Peoples-Speech-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..a8d496de62c861269b2a5c8bf9826fedc8abf807 --- /dev/null +++ b/examples/2ASR/Peoples-Speech-Test/dataset_info.json @@ -0,0 +1,156 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "duration_ms": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/Peoples-Speech-Test/sample_0.wav b/examples/2ASR/Peoples-Speech-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..ff25cea2d23ec61f9199873fc67227221c1bebca Binary files /dev/null and b/examples/2ASR/Peoples-Speech-Test/sample_0.wav differ diff --git a/examples/2ASR/Peoples-Speech-Test/sample_1.wav b/examples/2ASR/Peoples-Speech-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..4e7ddfc1977129dfb02d6c4424b362bcd60c1a15 Binary files /dev/null and b/examples/2ASR/Peoples-Speech-Test/sample_1.wav differ diff --git a/examples/2ASR/Peoples-Speech-Test/sample_2.wav b/examples/2ASR/Peoples-Speech-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..9f590a810351e28a60b5da87f6821f06f8916790 Binary files /dev/null and b/examples/2ASR/Peoples-Speech-Test/sample_2.wav differ diff --git a/examples/2ASR/Peoples-Speech-Test/state.json b/examples/2ASR/Peoples-Speech-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..60db649d02fbab6497da719d1c536be91f9bda39 --- /dev/null +++ b/examples/2ASR/Peoples-Speech-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "de704174c1b2e1ea", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/Tedlium3-Long-form-Test/data-00000-of-00001.arrow b/examples/2ASR/Tedlium3-Long-form-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..9ab6d2f53dcf630417c03a739060cdb58f7afdb4 --- /dev/null +++ b/examples/2ASR/Tedlium3-Long-form-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c292f812d20458d9589f8a14933e7b031b1b35e5a496449f4913c3af6d066bc8 +size 98923056 diff --git a/examples/2ASR/Tedlium3-Long-form-Test/dataset_info.json b/examples/2ASR/Tedlium3-Long-form-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..55009f0312ac6d6605288017abbf50e0bafefdc3 --- /dev/null +++ b/examples/2ASR/Tedlium3-Long-form-Test/dataset_info.json @@ -0,0 +1,56 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "file": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/Tedlium3-Long-form-Test/sample_0.wav b/examples/2ASR/Tedlium3-Long-form-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..41dbac609aefb991db732192675b67164a3106d5 --- /dev/null +++ b/examples/2ASR/Tedlium3-Long-form-Test/sample_0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac85b08c75fad06d968942b517e53495961ff861c6e794b576ecce3b406bcbf8 +size 51095404 diff --git a/examples/2ASR/Tedlium3-Long-form-Test/sample_1.wav b/examples/2ASR/Tedlium3-Long-form-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..17eccb9b542ec47bf29066074f7435f6b06f42cd --- /dev/null +++ b/examples/2ASR/Tedlium3-Long-form-Test/sample_1.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0361b7dc4a9437a698a1d28fb3e73718810a6d685f7932f2a04997ad0475b182 +size 36730348 diff --git a/examples/2ASR/Tedlium3-Long-form-Test/sample_2.wav b/examples/2ASR/Tedlium3-Long-form-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..7aaa0adc7dcdd7bc19c70c6295b943e8be3931f1 --- /dev/null +++ b/examples/2ASR/Tedlium3-Long-form-Test/sample_2.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8cb8f81de8eecb106d072fb26bd87e749fd9fcd28e5cc06949ddb2c542eff3c +size 11046766 diff --git a/examples/2ASR/Tedlium3-Long-form-Test/state.json b/examples/2ASR/Tedlium3-Long-form-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..3a56b9cee5a756794d32d1b5f03c01a501c41606 --- /dev/null +++ b/examples/2ASR/Tedlium3-Long-form-Test/state.json @@ -0,0 +1,18 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f95b9bf4e3dea7c1", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/Tedlium3-Test/data-00000-of-00001.arrow b/examples/2ASR/Tedlium3-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..043a6aa5f5647a465c24325fac5bd8ab7056a64c --- /dev/null +++ b/examples/2ASR/Tedlium3-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:335b82cbd0d97571df4e6b52cb0d3b81f1410f019212964143d4a6c20333f3cc +size 907976 diff --git a/examples/2ASR/Tedlium3-Test/dataset_info.json b/examples/2ASR/Tedlium3-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..55009f0312ac6d6605288017abbf50e0bafefdc3 --- /dev/null +++ b/examples/2ASR/Tedlium3-Test/dataset_info.json @@ -0,0 +1,56 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "file": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/Tedlium3-Test/sample_0.wav b/examples/2ASR/Tedlium3-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..a07fc005b1f77a01b066c0ef962b04e634f4c356 Binary files /dev/null and b/examples/2ASR/Tedlium3-Test/sample_0.wav differ diff --git a/examples/2ASR/Tedlium3-Test/sample_1.wav b/examples/2ASR/Tedlium3-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..f864baa998ef015b529fc69d8bccca6f284233f1 Binary files /dev/null and b/examples/2ASR/Tedlium3-Test/sample_1.wav differ diff --git a/examples/2ASR/Tedlium3-Test/sample_2.wav b/examples/2ASR/Tedlium3-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..66c6482678614fbd0c658553c4c727a50d80c57a Binary files /dev/null and b/examples/2ASR/Tedlium3-Test/sample_2.wav differ diff --git a/examples/2ASR/Tedlium3-Test/state.json b/examples/2ASR/Tedlium3-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..ffb37e795661eaf0f656a4272372d0919a492fe0 --- /dev/null +++ b/examples/2ASR/Tedlium3-Test/state.json @@ -0,0 +1,18 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "fb20b90d5641df89", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2CNASR/Aishell-ASR-ZH-Test/data-00000-of-00001.arrow b/examples/2CNASR/Aishell-ASR-ZH-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..e3c04ad74452482acd4a29e1a7407a5fc23cbf57 --- /dev/null +++ b/examples/2CNASR/Aishell-ASR-ZH-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66386a59d99f85838ad5d64c7f7b3108c15a22782f61783ec2debb208d7a2f8d +size 569936 diff --git a/examples/2CNASR/Aishell-ASR-ZH-Test/dataset_info.json b/examples/2CNASR/Aishell-ASR-ZH-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..051243e1e6b3046a83599b80eb901679ff2608d8 --- /dev/null +++ b/examples/2CNASR/Aishell-ASR-ZH-Test/dataset_info.json @@ -0,0 +1,144 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "id": { + "dtype": "string", + "_type": "Value" + }, + "speaker": { + "gender": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2CNASR/Aishell-ASR-ZH-Test/sample_0.wav b/examples/2CNASR/Aishell-ASR-ZH-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..a69d64c8284caa8ca7ef3f5ecaf6ebc0519020ef Binary files /dev/null and b/examples/2CNASR/Aishell-ASR-ZH-Test/sample_0.wav differ diff --git a/examples/2CNASR/Aishell-ASR-ZH-Test/sample_1.wav b/examples/2CNASR/Aishell-ASR-ZH-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..a149a1ab68e19b0029225518d217608c573d37e2 Binary files /dev/null and b/examples/2CNASR/Aishell-ASR-ZH-Test/sample_1.wav differ diff --git a/examples/2CNASR/Aishell-ASR-ZH-Test/sample_2.wav b/examples/2CNASR/Aishell-ASR-ZH-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..b2816f1cb90418b611227d1e6d044e886712b426 Binary files /dev/null and b/examples/2CNASR/Aishell-ASR-ZH-Test/sample_2.wav differ diff --git a/examples/2CNASR/Aishell-ASR-ZH-Test/state.json b/examples/2CNASR/Aishell-ASR-ZH-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..4495f512dec2e1549747a3cd9e31137e0afb8081 --- /dev/null +++ b/examples/2CNASR/Aishell-ASR-ZH-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f9833c929864587b", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ER/IEMOCAP-Emotion-Test/data-00000-of-00001.arrow b/examples/2ER/IEMOCAP-Emotion-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..c2bcf1e6ce6f0a0047dd85cce0bbf11e0184b823 --- /dev/null +++ b/examples/2ER/IEMOCAP-Emotion-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2d440f3076e388bbeeec29027bd4549d005f3a8bbb342bdc9a77802c79f01cf +size 838160 diff --git a/examples/2ER/IEMOCAP-Emotion-Test/dataset_info.json b/examples/2ER/IEMOCAP-Emotion-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ae1585a301a57eef40e3c39259d56a7e70e2be43 --- /dev/null +++ b/examples/2ER/IEMOCAP-Emotion-Test/dataset_info.json @@ -0,0 +1,168 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Emotion": { + "dtype": "int64", + "_type": "Value" + }, + "End Time": { + "dtype": "float64", + "_type": "Value" + }, + "Gender": { + "dtype": "int64", + "_type": "Value" + }, + "Start Time": { + "dtype": "float64", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ER/IEMOCAP-Emotion-Test/sample_0.wav b/examples/2ER/IEMOCAP-Emotion-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..3aea288a199cf828777f07051ce17bb65dd122b9 Binary files /dev/null and b/examples/2ER/IEMOCAP-Emotion-Test/sample_0.wav differ diff --git a/examples/2ER/IEMOCAP-Emotion-Test/sample_1.wav b/examples/2ER/IEMOCAP-Emotion-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..f20cf5efc4a86d62d733d80fc2cde556ea107245 Binary files /dev/null and b/examples/2ER/IEMOCAP-Emotion-Test/sample_1.wav differ diff --git a/examples/2ER/IEMOCAP-Emotion-Test/sample_2.wav b/examples/2ER/IEMOCAP-Emotion-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..6085d420403bb54190cde8d1cffef75b35f2fa88 Binary files /dev/null and b/examples/2ER/IEMOCAP-Emotion-Test/sample_2.wav differ diff --git a/examples/2ER/IEMOCAP-Emotion-Test/state.json b/examples/2ER/IEMOCAP-Emotion-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..f9e210ccb738232f4a7ce004649cc0811b5622e7 --- /dev/null +++ b/examples/2ER/IEMOCAP-Emotion-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "78bf80b897adbddb", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ER/MELD-Emotion-Test/data-00000-of-00001.arrow b/examples/2ER/MELD-Emotion-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..5c264b825c805731d035e34124efed546055316f --- /dev/null +++ b/examples/2ER/MELD-Emotion-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fe36d4394fb74d9d92909224e84e0de896cabb5fc94c878b23c3380594a65bb +size 352408 diff --git a/examples/2ER/MELD-Emotion-Test/dataset_info.json b/examples/2ER/MELD-Emotion-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..506cf34c0d7158134159a3234f9f98b8e6b74f28 --- /dev/null +++ b/examples/2ER/MELD-Emotion-Test/dataset_info.json @@ -0,0 +1,184 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Emotion": { + "dtype": "int64", + "_type": "Value" + }, + "EndTime": { + "dtype": "string", + "_type": "Value" + }, + "Episode": { + "dtype": "int64", + "_type": "Value" + }, + "Gender": { + "dtype": "null", + "_type": "Value" + }, + "Season": { + "dtype": "int64", + "_type": "Value" + }, + "Sentiment": { + "dtype": "int64", + "_type": "Value" + }, + "Speaker": { + "dtype": "string", + "_type": "Value" + }, + "StartTime": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ER/MELD-Emotion-Test/sample_0.wav b/examples/2ER/MELD-Emotion-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..819fccfa77653af1d839db36a4d89d6c5073676d Binary files /dev/null and b/examples/2ER/MELD-Emotion-Test/sample_0.wav differ diff --git a/examples/2ER/MELD-Emotion-Test/sample_1.wav b/examples/2ER/MELD-Emotion-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..a21acc5a4a0831f75c28e76a93e0339f98a5dab9 Binary files /dev/null and b/examples/2ER/MELD-Emotion-Test/sample_1.wav differ diff --git a/examples/2ER/MELD-Emotion-Test/sample_2.wav b/examples/2ER/MELD-Emotion-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..98c1a4ec43768374663eb5cae1305480110d87e8 Binary files /dev/null and b/examples/2ER/MELD-Emotion-Test/sample_2.wav differ diff --git a/examples/2ER/MELD-Emotion-Test/state.json b/examples/2ER/MELD-Emotion-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..9c219f8e85ce34e78d9b81244ed1bd84435ebb4c --- /dev/null +++ b/examples/2ER/MELD-Emotion-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a5a596edab97a213", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ER/MELD-Sentiment-Test/data-00000-of-00001.arrow b/examples/2ER/MELD-Sentiment-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..0e0726e555e52b2e486741a8cf021542e7c2dd11 --- /dev/null +++ b/examples/2ER/MELD-Sentiment-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2388eb71f8718170fe81707866ea2fb68a654153cb6c8a4f503de2e4ef13804d +size 232392 diff --git a/examples/2ER/MELD-Sentiment-Test/dataset_info.json b/examples/2ER/MELD-Sentiment-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..506cf34c0d7158134159a3234f9f98b8e6b74f28 --- /dev/null +++ b/examples/2ER/MELD-Sentiment-Test/dataset_info.json @@ -0,0 +1,184 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Emotion": { + "dtype": "int64", + "_type": "Value" + }, + "EndTime": { + "dtype": "string", + "_type": "Value" + }, + "Episode": { + "dtype": "int64", + "_type": "Value" + }, + "Gender": { + "dtype": "null", + "_type": "Value" + }, + "Season": { + "dtype": "int64", + "_type": "Value" + }, + "Sentiment": { + "dtype": "int64", + "_type": "Value" + }, + "Speaker": { + "dtype": "string", + "_type": "Value" + }, + "StartTime": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ER/MELD-Sentiment-Test/sample_0.wav b/examples/2ER/MELD-Sentiment-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..025684f18597120ad16569fb77cc1700b855f6c9 Binary files /dev/null and b/examples/2ER/MELD-Sentiment-Test/sample_0.wav differ diff --git a/examples/2ER/MELD-Sentiment-Test/sample_1.wav b/examples/2ER/MELD-Sentiment-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..8d64fff1bab98e316eec6c61842e48f7b8f333c4 Binary files /dev/null and b/examples/2ER/MELD-Sentiment-Test/sample_1.wav differ diff --git a/examples/2ER/MELD-Sentiment-Test/sample_2.wav b/examples/2ER/MELD-Sentiment-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..f639c22e1b1e24a5d6db354da2e8cf424cdb45be Binary files /dev/null and b/examples/2ER/MELD-Sentiment-Test/sample_2.wav differ diff --git a/examples/2ER/MELD-Sentiment-Test/state.json b/examples/2ER/MELD-Sentiment-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..917ccc849fc69d74df055b821dd46d31b29e2e8d --- /dev/null +++ b/examples/2ER/MELD-Sentiment-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a052e830551840d2", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2GR/IEMOCAP-Gender-Test/data-00000-of-00001.arrow b/examples/2GR/IEMOCAP-Gender-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..55669f769aac298829eac258c09015ed16c75897 --- /dev/null +++ b/examples/2GR/IEMOCAP-Gender-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89d1237586d95f75b99aaea0bd47680ad91deaaa3a4c20d94571e76bbc1e951f +size 411576 diff --git a/examples/2GR/IEMOCAP-Gender-Test/dataset_info.json b/examples/2GR/IEMOCAP-Gender-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ae1585a301a57eef40e3c39259d56a7e70e2be43 --- /dev/null +++ b/examples/2GR/IEMOCAP-Gender-Test/dataset_info.json @@ -0,0 +1,168 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Emotion": { + "dtype": "int64", + "_type": "Value" + }, + "End Time": { + "dtype": "float64", + "_type": "Value" + }, + "Gender": { + "dtype": "int64", + "_type": "Value" + }, + "Start Time": { + "dtype": "float64", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2GR/IEMOCAP-Gender-Test/sample_0.wav b/examples/2GR/IEMOCAP-Gender-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..0f29a85f3b63eb74da8cd026aab5aa13498e0125 Binary files /dev/null and b/examples/2GR/IEMOCAP-Gender-Test/sample_0.wav differ diff --git a/examples/2GR/IEMOCAP-Gender-Test/sample_1.wav b/examples/2GR/IEMOCAP-Gender-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..56cec7eeb6836d092e76201787aa22b9436c13f2 Binary files /dev/null and b/examples/2GR/IEMOCAP-Gender-Test/sample_1.wav differ diff --git a/examples/2GR/IEMOCAP-Gender-Test/sample_2.wav b/examples/2GR/IEMOCAP-Gender-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..5c68747f7620a99d23ad13f8d2fd7386ed49332c Binary files /dev/null and b/examples/2GR/IEMOCAP-Gender-Test/sample_2.wav differ diff --git a/examples/2GR/IEMOCAP-Gender-Test/state.json b/examples/2GR/IEMOCAP-Gender-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..909b8a4a39ef78e0e286d8b51244d38d68e3aa31 --- /dev/null +++ b/examples/2GR/IEMOCAP-Gender-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "7a4eb80e3f03a3f4", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2GR/VoxCeleb-Gender-Test/data-00000-of-00001.arrow b/examples/2GR/VoxCeleb-Gender-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..c522d812d46f2f810770b551d21077850da7c6a2 --- /dev/null +++ b/examples/2GR/VoxCeleb-Gender-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b5fb1ac8505719dd9952b830c5d064d837a406bf932641a9bdc5de448d9c6c +size 609480 diff --git a/examples/2GR/VoxCeleb-Gender-Test/dataset_info.json b/examples/2GR/VoxCeleb-Gender-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..913e85b36737f9004f81286043e7493d6f61b737 --- /dev/null +++ b/examples/2GR/VoxCeleb-Gender-Test/dataset_info.json @@ -0,0 +1,168 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Gender": { + "dtype": "string", + "_type": "Value" + }, + "Nationality": { + "dtype": "string", + "_type": "Value" + }, + "VGGFace1 ID": { + "dtype": "string", + "_type": "Value" + }, + "VoxCeleb1 ID": { + "dtype": "string", + "_type": "Value" + }, + "index": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2GR/VoxCeleb-Gender-Test/sample_0.wav b/examples/2GR/VoxCeleb-Gender-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..2f6e333859e59c14c99e08c0ed07ec34da06b7cc Binary files /dev/null and b/examples/2GR/VoxCeleb-Gender-Test/sample_0.wav differ diff --git a/examples/2GR/VoxCeleb-Gender-Test/sample_1.wav b/examples/2GR/VoxCeleb-Gender-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..cdc77dfd8a54374fbf21b32128129fde94d44c2c Binary files /dev/null and b/examples/2GR/VoxCeleb-Gender-Test/sample_1.wav differ diff --git a/examples/2GR/VoxCeleb-Gender-Test/sample_2.wav b/examples/2GR/VoxCeleb-Gender-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..8b3634c13fc5cefe14475fc673f9458aa7c79815 Binary files /dev/null and b/examples/2GR/VoxCeleb-Gender-Test/sample_2.wav differ diff --git a/examples/2GR/VoxCeleb-Gender-Test/state.json b/examples/2GR/VoxCeleb-Gender-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..d0591ec49e94a4c129e72d00c70af869214dafd8 --- /dev/null +++ b/examples/2GR/VoxCeleb-Gender-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "29964e6c779e5e22", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2SI/ALPACA-Audio-Test/data-00000-of-00001.arrow b/examples/2SI/ALPACA-Audio-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..0e9f779ec899f578e1d5291ff7b261efe4f4d32c --- /dev/null +++ b/examples/2SI/ALPACA-Audio-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba310d4adba762052395b3be53afa08451034a8845e8f9f244fa92c84f7358c6 +size 540072 diff --git a/examples/2SI/ALPACA-Audio-Test/dataset_info.json b/examples/2SI/ALPACA-Audio-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..882789dac9ac95ae77467581f55b4f296746e7a9 --- /dev/null +++ b/examples/2SI/ALPACA-Audio-Test/dataset_info.json @@ -0,0 +1,176 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "index": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2SI/ALPACA-Audio-Test/sample_0.wav b/examples/2SI/ALPACA-Audio-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..70248c9fd2ff3b5792099536311d05bed53fba47 Binary files /dev/null and b/examples/2SI/ALPACA-Audio-Test/sample_0.wav differ diff --git a/examples/2SI/ALPACA-Audio-Test/sample_1.wav b/examples/2SI/ALPACA-Audio-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..0fa85b0e999023c1118dd960236ae84689438330 Binary files /dev/null and b/examples/2SI/ALPACA-Audio-Test/sample_1.wav differ diff --git a/examples/2SI/ALPACA-Audio-Test/sample_2.wav b/examples/2SI/ALPACA-Audio-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..b5f187db3678a198b23df8b683f8492ae5ff60ac Binary files /dev/null and b/examples/2SI/ALPACA-Audio-Test/sample_2.wav differ diff --git a/examples/2SI/ALPACA-Audio-Test/state.json b/examples/2SI/ALPACA-Audio-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..6ba2848fa1bbfd0cf127c67ce6315770b72d590d --- /dev/null +++ b/examples/2SI/ALPACA-Audio-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f46fe3d489641513", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2SI/OpenHermes-Audio-Test/data-00000-of-00001.arrow b/examples/2SI/OpenHermes-Audio-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..f78a87653eaed972a87688964c3465cafe9d1cdd --- /dev/null +++ b/examples/2SI/OpenHermes-Audio-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13ec7bd9b5a3ee4bd5a05803c327343b6e461e8ec08413484fd77c43cd6b26bc +size 435240 diff --git a/examples/2SI/OpenHermes-Audio-Test/dataset_info.json b/examples/2SI/OpenHermes-Audio-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ac515de4ff190d0b7bd0ce417dfbe2b34d6c0bb8 --- /dev/null +++ b/examples/2SI/OpenHermes-Audio-Test/dataset_info.json @@ -0,0 +1,188 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "index": { + "dtype": "string", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2SI/OpenHermes-Audio-Test/sample_0.wav b/examples/2SI/OpenHermes-Audio-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..add65880600b56384cc5350e64c30be65d0ec19d Binary files /dev/null and b/examples/2SI/OpenHermes-Audio-Test/sample_0.wav differ diff --git a/examples/2SI/OpenHermes-Audio-Test/sample_1.wav b/examples/2SI/OpenHermes-Audio-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..8fa3cc5ffd5e8304b28e2c00f15edfda78c293b2 Binary files /dev/null and b/examples/2SI/OpenHermes-Audio-Test/sample_1.wav differ diff --git a/examples/2SI/OpenHermes-Audio-Test/sample_2.wav b/examples/2SI/OpenHermes-Audio-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..47dc6714c50f2650cb528c81edf0f18a8f3148e9 Binary files /dev/null and b/examples/2SI/OpenHermes-Audio-Test/sample_2.wav differ diff --git a/examples/2SI/OpenHermes-Audio-Test/state.json b/examples/2SI/OpenHermes-Audio-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..9a19e3c159f16d99ee14394ad0aab31b7594a7eb --- /dev/null +++ b/examples/2SI/OpenHermes-Audio-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "bec0fd435c621121", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2SQA/CN-College-Listen-MCQ-Test/data-00000-of-00001.arrow b/examples/2SQA/CN-College-Listen-MCQ-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..5b514d7eea4a68baf4f63ab12cce2774d09fdc8e --- /dev/null +++ b/examples/2SQA/CN-College-Listen-MCQ-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e3578b2ed58ee0fa7527ef31e35664a28c27505b0f9ff79754680b1d7330c70 +size 3518760 diff --git a/examples/2SQA/CN-College-Listen-MCQ-Test/dataset_info.json b/examples/2SQA/CN-College-Listen-MCQ-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..623b82f8d7d18ff8870b56c298783f1b5f32dc57 --- /dev/null +++ b/examples/2SQA/CN-College-Listen-MCQ-Test/dataset_info.json @@ -0,0 +1,160 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audio_name": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "dtype": "string", + "_type": "Value" + }, + "mc_answer": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2SQA/CN-College-Listen-MCQ-Test/sample_0.wav b/examples/2SQA/CN-College-Listen-MCQ-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..cf00703fe4fb0275a04b27b4ab0abf44a8e39cae --- /dev/null +++ b/examples/2SQA/CN-College-Listen-MCQ-Test/sample_0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a49fd4f0b22817155860bcd528696eac0ad50bc1c9b239ae328e46936b53d1e +size 2115952 diff --git a/examples/2SQA/CN-College-Listen-MCQ-Test/sample_1.wav b/examples/2SQA/CN-College-Listen-MCQ-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..8aea2bff27874d78fbee581ad8849d0ab0ac9fc8 Binary files /dev/null and b/examples/2SQA/CN-College-Listen-MCQ-Test/sample_1.wav differ diff --git a/examples/2SQA/CN-College-Listen-MCQ-Test/sample_2.wav b/examples/2SQA/CN-College-Listen-MCQ-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..750f1b8414060167651ac33128408b510b1545ca Binary files /dev/null and b/examples/2SQA/CN-College-Listen-MCQ-Test/sample_2.wav differ diff --git a/examples/2SQA/CN-College-Listen-MCQ-Test/state.json b/examples/2SQA/CN-College-Listen-MCQ-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..1afd2605e58a9e5304f9f324f51af5e7d535d636 --- /dev/null +++ b/examples/2SQA/CN-College-Listen-MCQ-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b4fb19374756e22d", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2SQA/DREAM-TTS-MCQ-Test/data-00000-of-00001.arrow b/examples/2SQA/DREAM-TTS-MCQ-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..236abb453583027bafdc652c39704a6870afaec8 --- /dev/null +++ b/examples/2SQA/DREAM-TTS-MCQ-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:683f5edfae8193f93c22fc1bf2514edee75cc54f565b0e997d4addbfc3376a4d +size 1272504 diff --git a/examples/2SQA/DREAM-TTS-MCQ-Test/dataset_info.json b/examples/2SQA/DREAM-TTS-MCQ-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..b12863f3596549cd265ea212a5964a91c271a7cf --- /dev/null +++ b/examples/2SQA/DREAM-TTS-MCQ-Test/dataset_info.json @@ -0,0 +1,156 @@ +{ + "citation": "", + "description": "", + "features": { + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "dialogue": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "dialogue_id": { + "dtype": "string", + "_type": "Value" + }, + "id": { + "dtype": "int64", + "_type": "Value" + }, + "mc_answer": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2SQA/DREAM-TTS-MCQ-Test/sample_0.wav b/examples/2SQA/DREAM-TTS-MCQ-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..ea5e271157879441098607537ffd8481426eca1f Binary files /dev/null and b/examples/2SQA/DREAM-TTS-MCQ-Test/sample_0.wav differ diff --git a/examples/2SQA/DREAM-TTS-MCQ-Test/sample_1.wav b/examples/2SQA/DREAM-TTS-MCQ-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..15f7e2c92aa5c8e199277474dfbb81d27c9ef002 Binary files /dev/null and b/examples/2SQA/DREAM-TTS-MCQ-Test/sample_1.wav differ diff --git a/examples/2SQA/DREAM-TTS-MCQ-Test/sample_2.wav b/examples/2SQA/DREAM-TTS-MCQ-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..55ad57e609bfdbffb7be3006b19f1b5594996eb5 Binary files /dev/null and b/examples/2SQA/DREAM-TTS-MCQ-Test/sample_2.wav differ diff --git a/examples/2SQA/DREAM-TTS-MCQ-Test/state.json b/examples/2SQA/DREAM-TTS-MCQ-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..9429aff4cfdd4360de95ee6eaf6f11a40d6ceefa --- /dev/null +++ b/examples/2SQA/DREAM-TTS-MCQ-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "4ae1a389c9652fd2", + "_format_columns": [ + "answer", + "context", + "instruction", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2SQA/Public-SG-Speech-QA-Test/data-00000-of-00001.arrow b/examples/2SQA/Public-SG-Speech-QA-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..d1c7960aac65d29f4df237c5245a2aa6c89d0055 --- /dev/null +++ b/examples/2SQA/Public-SG-Speech-QA-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41ebee1aeb1c9a28136ca99e86afcdf9f583aba6c003a0a99d94477419dc9f6f +size 3555008 diff --git a/examples/2SQA/Public-SG-Speech-QA-Test/dataset_info.json b/examples/2SQA/Public-SG-Speech-QA-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..5b865ccbdc90f460864fa658307b1fd6e12b5ac6 --- /dev/null +++ b/examples/2SQA/Public-SG-Speech-QA-Test/dataset_info.json @@ -0,0 +1,150 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Answer Score": { + "dtype": "float64", + "_type": "Value" + }, + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Gender": { + "dtype": "int64", + "_type": "Value" + }, + "Question Score": { + "dtype": "float64", + "_type": "Value" + }, + "Speaker": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2SQA/Public-SG-Speech-QA-Test/sample_0.wav b/examples/2SQA/Public-SG-Speech-QA-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..298380997737d4c67d07f69122cc44a28437254d --- /dev/null +++ b/examples/2SQA/Public-SG-Speech-QA-Test/sample_0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b62a055ccc0592fb34892fc14c743745bd3ee5ff04475f8231ca064cf5ab292c +size 1599216 diff --git a/examples/2SQA/Public-SG-Speech-QA-Test/sample_1.wav b/examples/2SQA/Public-SG-Speech-QA-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..8b87c520e6acc02af8e3ed71f1961be81e93f4ce Binary files /dev/null and b/examples/2SQA/Public-SG-Speech-QA-Test/sample_1.wav differ diff --git a/examples/2SQA/Public-SG-Speech-QA-Test/sample_2.wav b/examples/2SQA/Public-SG-Speech-QA-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..2b168de69ee8289ea666ea3d3a82849f085256d3 Binary files /dev/null and b/examples/2SQA/Public-SG-Speech-QA-Test/sample_2.wav differ diff --git a/examples/2SQA/Public-SG-Speech-QA-Test/state.json b/examples/2SQA/Public-SG-Speech-QA-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..0f2668023e6a567ada41c9ea082f99773f2e8f6e --- /dev/null +++ b/examples/2SQA/Public-SG-Speech-QA-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "ede505a635b66631", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2SQA/SLUE-P2-SQA5-Test/data-00000-of-00001.arrow b/examples/2SQA/SLUE-P2-SQA5-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..fdc22e998f5d035a4a21db496fc26bb1eb9e7b47 --- /dev/null +++ b/examples/2SQA/SLUE-P2-SQA5-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91fad195c759891c783f8448a7cb42362b53f091aacad15195a7d262da356464 +size 5469256 diff --git a/examples/2SQA/SLUE-P2-SQA5-Test/dataset_info.json b/examples/2SQA/SLUE-P2-SQA5-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..6c681f17c89b019564bbd2d0509865a05ce4db3a --- /dev/null +++ b/examples/2SQA/SLUE-P2-SQA5-Test/dataset_info.json @@ -0,0 +1,197 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "array": { + "feature": { + "dtype": "float64", + "_type": "Value" + }, + "_type": "Sequence" + }, + "path": { + "dtype": "null", + "_type": "Value" + }, + "sampling_rate": { + "dtype": "int64", + "_type": "Value" + } + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "document_id": { + "dtype": "string", + "_type": "Value" + }, + "document_speaker_id": { + "dtype": "string", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "normalized_document_text": { + "dtype": "string", + "_type": "Value" + }, + "normalized_question_text": { + "dtype": "string", + "_type": "Value" + }, + "question_speaker_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2SQA/SLUE-P2-SQA5-Test/sample_0.wav b/examples/2SQA/SLUE-P2-SQA5-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..7236fa9aac8e0e10818025ab656ddc4ad2bbf812 --- /dev/null +++ b/examples/2SQA/SLUE-P2-SQA5-Test/sample_0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:398b25e597d18e0e027bb7fda9cc025cff93b716bc683d88a553d34c338285eb +size 1280044 diff --git a/examples/2SQA/SLUE-P2-SQA5-Test/sample_1.wav b/examples/2SQA/SLUE-P2-SQA5-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..7a80d4c3399ae9256db0ccded497c8fe90aecb5c --- /dev/null +++ b/examples/2SQA/SLUE-P2-SQA5-Test/sample_1.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36769a3e93db8fc6ef00b05552a3c36b08c35c578f93d142a60415d2909f9e7e +size 1280044 diff --git a/examples/2SQA/SLUE-P2-SQA5-Test/sample_2.wav b/examples/2SQA/SLUE-P2-SQA5-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..92f61a3d6c5de05f4b6070eb892bf22139beeff5 --- /dev/null +++ b/examples/2SQA/SLUE-P2-SQA5-Test/sample_2.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43e31031a1e0e8cde85470353830b7c4513d4d9f9e5ca394127b80b4938d970a +size 1280044 diff --git a/examples/2SQA/SLUE-P2-SQA5-Test/state.json b/examples/2SQA/SLUE-P2-SQA5-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..64fcf1074f14e6cc130ef25406cd0718f61798a7 --- /dev/null +++ b/examples/2SQA/SLUE-P2-SQA5-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "400b504ce3034854", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2SQA/Spoken-Squad-Test/data-00000-of-00001.arrow b/examples/2SQA/Spoken-Squad-Test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..01c3308e636c1ca7b513cc8e863074d83085e4d3 --- /dev/null +++ b/examples/2SQA/Spoken-Squad-Test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8c870f847afa404da100216dd6192919e7e00218c046beecf577d752e958e14 +size 6854040 diff --git a/examples/2SQA/Spoken-Squad-Test/dataset_info.json b/examples/2SQA/Spoken-Squad-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..5b50e517bf0760f04194c6b7c720d733151674aa --- /dev/null +++ b/examples/2SQA/Spoken-Squad-Test/dataset_info.json @@ -0,0 +1,172 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "paragraph_id": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + }, + "topic_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2SQA/Spoken-Squad-Test/sample_0.wav b/examples/2SQA/Spoken-Squad-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..2b928fafe992bf5c3ecf4b03389d186cff11421b --- /dev/null +++ b/examples/2SQA/Spoken-Squad-Test/sample_0.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:304b38f43745d431d607788b839a14409b8e8e2032a093f408302d79287efc3b +size 2143532 diff --git a/examples/2SQA/Spoken-Squad-Test/sample_1.wav b/examples/2SQA/Spoken-Squad-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..10820a48e3ffd1a5ecb2198457b3891122d161a1 --- /dev/null +++ b/examples/2SQA/Spoken-Squad-Test/sample_1.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97c29b943c6244c1a6dee8bbd871055408ed9735349c3fd57f2d285cd3628041 +size 1789484 diff --git a/examples/2SQA/Spoken-Squad-Test/sample_2.wav b/examples/2SQA/Spoken-Squad-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..82953be741c78b78f28232b6b13d2392ff533c2e --- /dev/null +++ b/examples/2SQA/Spoken-Squad-Test/sample_2.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26c3bc8688fbd99e8dd6eff774a7994ae34ec6fddfb1242f3bbae500a12b7746 +size 2903852 diff --git a/examples/2SQA/Spoken-Squad-Test/state.json b/examples/2SQA/Spoken-Squad-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..bad03ce21b0c5b24ac005094f5a25236804a54a7 --- /dev/null +++ b/examples/2SQA/Spoken-Squad-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "416952584cf805a4", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ST/Covost2-EN-ID-test/data-00000-of-00001.arrow b/examples/2ST/Covost2-EN-ID-test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..35d28390925a96fd109f6c96b0109cbe4c18a228 --- /dev/null +++ b/examples/2ST/Covost2-EN-ID-test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e114114e362e4b40cbffbfe8f9857f64de0cac09471062c6f6c4f6e73b235de7 +size 549296 diff --git a/examples/2ST/Covost2-EN-ID-test/dataset_info.json b/examples/2ST/Covost2-EN-ID-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..75447219133e63a2e07347f4f15add29dc2f358f --- /dev/null +++ b/examples/2ST/Covost2-EN-ID-test/dataset_info.json @@ -0,0 +1,154 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ST/Covost2-EN-ID-test/sample_0.wav b/examples/2ST/Covost2-EN-ID-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..5e70047fc638745caac9c90c2539cc9f18168f8a Binary files /dev/null and b/examples/2ST/Covost2-EN-ID-test/sample_0.wav differ diff --git a/examples/2ST/Covost2-EN-ID-test/sample_1.wav b/examples/2ST/Covost2-EN-ID-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..832716522492f3cbeb066ed58c45eab2e9036a2f Binary files /dev/null and b/examples/2ST/Covost2-EN-ID-test/sample_1.wav differ diff --git a/examples/2ST/Covost2-EN-ID-test/sample_2.wav b/examples/2ST/Covost2-EN-ID-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..2ccb333e9339d1fda08f7c84fe4fe2cd0b22508c Binary files /dev/null and b/examples/2ST/Covost2-EN-ID-test/sample_2.wav differ diff --git a/examples/2ST/Covost2-EN-ID-test/state.json b/examples/2ST/Covost2-EN-ID-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..d460d81ba05ec305339ae4a50c65c2654e706950 --- /dev/null +++ b/examples/2ST/Covost2-EN-ID-test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "17a5c97a84a7f33c", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ST/Covost2-EN-TA-test/data-00000-of-00001.arrow b/examples/2ST/Covost2-EN-TA-test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..50796f046accb5692f9772e9beeacf58537e9a1d --- /dev/null +++ b/examples/2ST/Covost2-EN-TA-test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a1667eba0de4088fbe20e9f6c9beaacf5beb556a1d817c92af178757da3bc78 +size 561528 diff --git a/examples/2ST/Covost2-EN-TA-test/dataset_info.json b/examples/2ST/Covost2-EN-TA-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..75447219133e63a2e07347f4f15add29dc2f358f --- /dev/null +++ b/examples/2ST/Covost2-EN-TA-test/dataset_info.json @@ -0,0 +1,154 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ST/Covost2-EN-TA-test/sample_0.wav b/examples/2ST/Covost2-EN-TA-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..ad0f73bf6dbfe42e17635b063161d6154cfcf28a Binary files /dev/null and b/examples/2ST/Covost2-EN-TA-test/sample_0.wav differ diff --git a/examples/2ST/Covost2-EN-TA-test/sample_1.wav b/examples/2ST/Covost2-EN-TA-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..e5d11995c4bd1e3484f0c15d828c678300d899b2 Binary files /dev/null and b/examples/2ST/Covost2-EN-TA-test/sample_1.wav differ diff --git a/examples/2ST/Covost2-EN-TA-test/sample_2.wav b/examples/2ST/Covost2-EN-TA-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..8910c6f24aebdc2aab0e2517f304448129282655 Binary files /dev/null and b/examples/2ST/Covost2-EN-TA-test/sample_2.wav differ diff --git a/examples/2ST/Covost2-EN-TA-test/state.json b/examples/2ST/Covost2-EN-TA-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..de82636ce42828a45d6c8c4ca1edde9874d87cc3 --- /dev/null +++ b/examples/2ST/Covost2-EN-TA-test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e9d273226522711f", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ST/Covost2-EN-ZH-test/data-00000-of-00001.arrow b/examples/2ST/Covost2-EN-ZH-test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..02b18146d4be540926d80ef57b9bd914ef51aec2 --- /dev/null +++ b/examples/2ST/Covost2-EN-ZH-test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4e2a6c95fe3b8adbc4c2f55bd53fdb56b064d436c49ee20ac2e4333114ceac7 +size 600520 diff --git a/examples/2ST/Covost2-EN-ZH-test/dataset_info.json b/examples/2ST/Covost2-EN-ZH-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..75447219133e63a2e07347f4f15add29dc2f358f --- /dev/null +++ b/examples/2ST/Covost2-EN-ZH-test/dataset_info.json @@ -0,0 +1,154 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ST/Covost2-EN-ZH-test/sample_0.wav b/examples/2ST/Covost2-EN-ZH-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..f428967146cecccb88b318b8388e3b897cd14fca Binary files /dev/null and b/examples/2ST/Covost2-EN-ZH-test/sample_0.wav differ diff --git a/examples/2ST/Covost2-EN-ZH-test/sample_1.wav b/examples/2ST/Covost2-EN-ZH-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..e91b4565eb72ab11f760c9b276daf4f6998f9e5f Binary files /dev/null and b/examples/2ST/Covost2-EN-ZH-test/sample_1.wav differ diff --git a/examples/2ST/Covost2-EN-ZH-test/sample_2.wav b/examples/2ST/Covost2-EN-ZH-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..38f60afc485334971149ebeb3985573093dd9a96 Binary files /dev/null and b/examples/2ST/Covost2-EN-ZH-test/sample_2.wav differ diff --git a/examples/2ST/Covost2-EN-ZH-test/state.json b/examples/2ST/Covost2-EN-ZH-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..7dafba85d0dbd927e3c9aae50f0d1a3cf3e2c71c --- /dev/null +++ b/examples/2ST/Covost2-EN-ZH-test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "86243bad639f0cb6", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ST/Covost2-ID-EN-test/data-00000-of-00001.arrow b/examples/2ST/Covost2-ID-EN-test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..037f35bf9787de62b458d9a89969d0abcd8b02eb --- /dev/null +++ b/examples/2ST/Covost2-ID-EN-test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1785e6d316adfd52a99ce27e0b14fd68fa410a93f5082fab34e8f35a89563d8c +size 529872 diff --git a/examples/2ST/Covost2-ID-EN-test/dataset_info.json b/examples/2ST/Covost2-ID-EN-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..75447219133e63a2e07347f4f15add29dc2f358f --- /dev/null +++ b/examples/2ST/Covost2-ID-EN-test/dataset_info.json @@ -0,0 +1,154 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ST/Covost2-ID-EN-test/sample_0.wav b/examples/2ST/Covost2-ID-EN-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..60cf3a26a30e046d87c72a4e4d4f15cf54732039 Binary files /dev/null and b/examples/2ST/Covost2-ID-EN-test/sample_0.wav differ diff --git a/examples/2ST/Covost2-ID-EN-test/sample_1.wav b/examples/2ST/Covost2-ID-EN-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..0b37d62b0b132a59e5ce6d2c4551c1d701143efb Binary files /dev/null and b/examples/2ST/Covost2-ID-EN-test/sample_1.wav differ diff --git a/examples/2ST/Covost2-ID-EN-test/sample_2.wav b/examples/2ST/Covost2-ID-EN-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..5eda0ef756def2b097ee2bef2a92b8b9e99ff5c3 Binary files /dev/null and b/examples/2ST/Covost2-ID-EN-test/sample_2.wav differ diff --git a/examples/2ST/Covost2-ID-EN-test/state.json b/examples/2ST/Covost2-ID-EN-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..592b67b79c5171bdccfcf07d28855f756e7f4f23 --- /dev/null +++ b/examples/2ST/Covost2-ID-EN-test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "69b492f6dd79179e", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ST/Covost2-TA-EN-test/data-00000-of-00001.arrow b/examples/2ST/Covost2-TA-EN-test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..d4966d98c3fec664dfe65f4a2094d48ef716ac78 --- /dev/null +++ b/examples/2ST/Covost2-TA-EN-test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f3b8c1a000b5085ab5af45333fd8c4fc37508c74595f1dba8668f7da20b4d21 +size 477584 diff --git a/examples/2ST/Covost2-TA-EN-test/dataset_info.json b/examples/2ST/Covost2-TA-EN-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..0a34b7bc1ac66816e090968b4de959503e2c4168 --- /dev/null +++ b/examples/2ST/Covost2-TA-EN-test/dataset_info.json @@ -0,0 +1,154 @@ +{ + "citation": "", + "description": "", + "features": { + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ST/Covost2-TA-EN-test/sample_0.wav b/examples/2ST/Covost2-TA-EN-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..642a3386437533748a4df9cbf7c45ab6f5bd0b6f Binary files /dev/null and b/examples/2ST/Covost2-TA-EN-test/sample_0.wav differ diff --git a/examples/2ST/Covost2-TA-EN-test/sample_1.wav b/examples/2ST/Covost2-TA-EN-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..a9c003707ecdee7e8918014b2f77a69adbe71d93 Binary files /dev/null and b/examples/2ST/Covost2-TA-EN-test/sample_1.wav differ diff --git a/examples/2ST/Covost2-TA-EN-test/sample_2.wav b/examples/2ST/Covost2-TA-EN-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..cd0ce42ab1336cc1591caa5cd56acf6db9afbc29 Binary files /dev/null and b/examples/2ST/Covost2-TA-EN-test/sample_2.wav differ diff --git a/examples/2ST/Covost2-TA-EN-test/state.json b/examples/2ST/Covost2-TA-EN-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..c7aef0119e13ade230e9d5c50ca9b66df7193400 --- /dev/null +++ b/examples/2ST/Covost2-TA-EN-test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "6f095ca26fe268ab", + "_format_columns": [ + "answer", + "context", + "instruction", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ST/Covost2-ZH-EN-test/data-00000-of-00001.arrow b/examples/2ST/Covost2-ZH-EN-test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..dc342fcd295e1e1f058737f14d28154c8718cab1 --- /dev/null +++ b/examples/2ST/Covost2-ZH-EN-test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acab5090424e3e32cac285d3a346f48308a64629aa5fba171addc37fbf4f5337 +size 554256 diff --git a/examples/2ST/Covost2-ZH-EN-test/dataset_info.json b/examples/2ST/Covost2-ZH-EN-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..75447219133e63a2e07347f4f15add29dc2f358f --- /dev/null +++ b/examples/2ST/Covost2-ZH-EN-test/dataset_info.json @@ -0,0 +1,154 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ST/Covost2-ZH-EN-test/sample_0.wav b/examples/2ST/Covost2-ZH-EN-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..a0add517f30ee8b82cef1be3aba2d471645bd648 Binary files /dev/null and b/examples/2ST/Covost2-ZH-EN-test/sample_0.wav differ diff --git a/examples/2ST/Covost2-ZH-EN-test/sample_1.wav b/examples/2ST/Covost2-ZH-EN-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..6fad51ce295d62bb0b68826f1d1f3c3f4a2756e9 Binary files /dev/null and b/examples/2ST/Covost2-ZH-EN-test/sample_1.wav differ diff --git a/examples/2ST/Covost2-ZH-EN-test/sample_2.wav b/examples/2ST/Covost2-ZH-EN-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..e7bebe13c8efed3bb9b8c9b9a46831c8152240b5 Binary files /dev/null and b/examples/2ST/Covost2-ZH-EN-test/sample_2.wav differ diff --git a/examples/2ST/Covost2-ZH-EN-test/state.json b/examples/2ST/Covost2-ZH-EN-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..4199c3b1fc42c18396a4f54e001d032ad37e6d97 --- /dev/null +++ b/examples/2ST/Covost2-ZH-EN-test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "977bd2807131826b", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/AC/AudioCaps-Test/data-00000-of-00001.arrow b/examples/AC/AudioCaps-Test/data-00000-of-00001.arrow index 58406c24c9292ee43c93960c4478e4d05fb95f80..7aec04f73d9fe156417abc9a21aa1d6c732b16fc 100644 --- a/examples/AC/AudioCaps-Test/data-00000-of-00001.arrow +++ b/examples/AC/AudioCaps-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70213ecc31962d6d8bbc0e4d7ae2dd302c851a4af00f12b07735311f5f128288 -size 966216 +oid sha256:f374a52ffbc12ad01d0573db305765d7b447f2d4a45590b87d9bb20aaa443d80 +size 972840 diff --git a/examples/AC/AudioCaps-Test/dataset_info.json b/examples/AC/AudioCaps-Test/dataset_info.json index 82148686a795bb258e6676260855fb8cf9ef19e4..c5138402a850f4b4605862059b4c7ab8debdb92b 100644 --- a/examples/AC/AudioCaps-Test/dataset_info.json +++ b/examples/AC/AudioCaps-Test/dataset_info.json @@ -100,6 +100,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/AC/AudioCaps-Test/sample_0.wav b/examples/AC/AudioCaps-Test/sample_0.wav index 4b2f8047fa38f9ba3acef7485b26ea02f4ada359..4d69d901b51460ad829bd5c3b96bd16b4a62909e 100644 Binary files a/examples/AC/AudioCaps-Test/sample_0.wav and b/examples/AC/AudioCaps-Test/sample_0.wav differ diff --git a/examples/AC/AudioCaps-Test/sample_1.wav b/examples/AC/AudioCaps-Test/sample_1.wav index 8724df3f01ccd22778f84a7a851871f1d73434fe..fb2163d74f884d02d085d1680e467f5fcfdb91d3 100644 Binary files a/examples/AC/AudioCaps-Test/sample_1.wav and b/examples/AC/AudioCaps-Test/sample_1.wav differ diff --git a/examples/AC/AudioCaps-Test/sample_2.wav b/examples/AC/AudioCaps-Test/sample_2.wav index 48d08b19be5f0904ca976c35fbe4ae4d6c19435f..27962998a41716585567178d244d99ad6f8684e7 100644 Binary files a/examples/AC/AudioCaps-Test/sample_2.wav and b/examples/AC/AudioCaps-Test/sample_2.wav differ diff --git a/examples/AC/AudioCaps-Test/state.json b/examples/AC/AudioCaps-Test/state.json index fcc438d7c2db870c1b636d8299a9d843d607fbc6..014ade4c15956eca02b3b36e38274d20e16d0618 100644 --- a/examples/AC/AudioCaps-Test/state.json +++ b/examples/AC/AudioCaps-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "e736bf1821a473f3", + "_fingerprint": "7dd956b95601f713", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/AC/WavCaps-Test/data-00000-of-00001.arrow b/examples/AC/WavCaps-Test/data-00000-of-00001.arrow index ad78b874ff3abccb165ea8ef522d91d8c0f06b48..e8bc45db1c473357c16ab9c1effc971d920f3d54 100644 --- a/examples/AC/WavCaps-Test/data-00000-of-00001.arrow +++ b/examples/AC/WavCaps-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9636af636286b1aedad840ccef31ca7d879e824ddc6814fcb7411b7fbdf411aa -size 812352 +oid sha256:150d89e21ac2457a6906cc5066b9ada8ea777a8242277803fb946cbdce5489e9 +size 1221920 diff --git a/examples/AC/WavCaps-Test/dataset_info.json b/examples/AC/WavCaps-Test/dataset_info.json index acf11db0c4cc1869f7763270ddadbfe4c30f73d4..8ac6e1be5f6fbdee81efb2e5d107213ff13d5377 100644 --- a/examples/AC/WavCaps-Test/dataset_info.json +++ b/examples/AC/WavCaps-Test/dataset_info.json @@ -96,6 +96,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/AC/WavCaps-Test/sample_0.wav b/examples/AC/WavCaps-Test/sample_0.wav index ad8d45455c35860d7309e0554c6610ba6ddccb68..3ae6c0b454d470c5565fc6770051d08ca2bf693f 100644 Binary files a/examples/AC/WavCaps-Test/sample_0.wav and b/examples/AC/WavCaps-Test/sample_0.wav differ diff --git a/examples/AC/WavCaps-Test/sample_1.wav b/examples/AC/WavCaps-Test/sample_1.wav index ea10461620e829d47fb78bf4d827b95322791340..0579abdb9ca1dc82c841c8024cdbd4fc5dbd0f9e 100644 Binary files a/examples/AC/WavCaps-Test/sample_1.wav and b/examples/AC/WavCaps-Test/sample_1.wav differ diff --git a/examples/AC/WavCaps-Test/sample_2.wav b/examples/AC/WavCaps-Test/sample_2.wav index b7fef91fbedf60a5d58f4f9fb93d95c1e205bf67..4c647b74ede7e40740775fd68323fb57229d1383 100644 Binary files a/examples/AC/WavCaps-Test/sample_2.wav and b/examples/AC/WavCaps-Test/sample_2.wav differ diff --git a/examples/AC/WavCaps-Test/state.json b/examples/AC/WavCaps-Test/state.json index 0d52b23c8e5b93506a8af809adf9680c9cc7bf86..77521aca2021217bce1aaaeb9ffca5a48285c78d 100644 --- a/examples/AC/WavCaps-Test/state.json +++ b/examples/AC/WavCaps-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "742ab313af054565", + "_fingerprint": "22a6dfe54867e49c", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/AQA/AudioCaps-QA-Test/data-00000-of-00001.arrow b/examples/AQA/AudioCaps-QA-Test/data-00000-of-00001.arrow index 91a95941f9251c2e3abe654fa45035c8015d364b..717dcdea3195bf8df2c15be325436ad84575d624 100644 --- a/examples/AQA/AudioCaps-QA-Test/data-00000-of-00001.arrow +++ b/examples/AQA/AudioCaps-QA-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:92b0b2fe81ee0e3a2690a444bb9b68994d89ca53db6ce174f5802293549256d1 -size 953616 +oid sha256:98001fd22061906f2d3c6713c94f33ef08e48a8fa4dfdae81abdfa7de80658f6 +size 977096 diff --git a/examples/AQA/AudioCaps-QA-Test/dataset_info.json b/examples/AQA/AudioCaps-QA-Test/dataset_info.json index c6d61c8e72325cf36fabc952fbec1ca42e49e5e8..a271f8f69652bbad9be548c5545f57ef7d351f0e 100644 --- a/examples/AQA/AudioCaps-QA-Test/dataset_info.json +++ b/examples/AQA/AudioCaps-QA-Test/dataset_info.json @@ -104,6 +104,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/AQA/AudioCaps-QA-Test/sample_0.wav b/examples/AQA/AudioCaps-QA-Test/sample_0.wav index 2eec3251fe8dc9acf17f43f66f187a277cf6c6b0..21e1b511fd264d1a7659a9e513407961bf087cdb 100644 Binary files a/examples/AQA/AudioCaps-QA-Test/sample_0.wav and b/examples/AQA/AudioCaps-QA-Test/sample_0.wav differ diff --git a/examples/AQA/AudioCaps-QA-Test/sample_1.wav b/examples/AQA/AudioCaps-QA-Test/sample_1.wav index f7e101c5918451111738962b722e47041dd59227..4c39f41dc29ac5cb08966dfdd8a73f904ddb1823 100644 Binary files a/examples/AQA/AudioCaps-QA-Test/sample_1.wav and b/examples/AQA/AudioCaps-QA-Test/sample_1.wav differ diff --git a/examples/AQA/AudioCaps-QA-Test/sample_2.wav b/examples/AQA/AudioCaps-QA-Test/sample_2.wav index cb15b2ffff83c6ec5541c8b54a8205d58292a2d3..35e1e28a00d6c890d2a92f65fd54f6dc6b071e97 100644 Binary files a/examples/AQA/AudioCaps-QA-Test/sample_2.wav and b/examples/AQA/AudioCaps-QA-Test/sample_2.wav differ diff --git a/examples/AQA/AudioCaps-QA-Test/state.json b/examples/AQA/AudioCaps-QA-Test/state.json index 267d6cbee3f52f7b8f77f20b959ca9ce159aed16..d8870c294493bc158c42378391238a63e792d3b6 100644 --- a/examples/AQA/AudioCaps-QA-Test/state.json +++ b/examples/AQA/AudioCaps-QA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "52bc1dfcaf2a0f4b", + "_fingerprint": "60b01046f3ad5343", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/AQA/Clotho-AQA-Test/data-00000-of-00001.arrow b/examples/AQA/Clotho-AQA-Test/data-00000-of-00001.arrow index 8b76eb63bc6a3adcfad98e20600a07bc5cf84e1b..8ece8019ead9d050e03de2b21ac40ba33ef6c018 100644 --- a/examples/AQA/Clotho-AQA-Test/data-00000-of-00001.arrow +++ b/examples/AQA/Clotho-AQA-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:08b4de88bf163bbfd2097769e6104ba8514380a7a097741d38e1ccc41d5b0f86 -size 2035832 +oid sha256:3dc4022c4dff1427b545e9890aa4fdcbfa3017b1b101ca4c366f52715ca57e7d +size 1735400 diff --git a/examples/AQA/Clotho-AQA-Test/dataset_info.json b/examples/AQA/Clotho-AQA-Test/dataset_info.json index 1b05abb4dce6b496c0a3c6043f27e4ca1f225320..6c56b97992cd0d890362752b2624919a9de2a1ee 100644 --- a/examples/AQA/Clotho-AQA-Test/dataset_info.json +++ b/examples/AQA/Clotho-AQA-Test/dataset_info.json @@ -87,6 +87,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/AQA/Clotho-AQA-Test/sample_0.wav b/examples/AQA/Clotho-AQA-Test/sample_0.wav index d6a07e6172778d85080c04531658efc7443ae03d..f5f0a048306163ed4345c00aadeae8426b9d797a 100644 Binary files a/examples/AQA/Clotho-AQA-Test/sample_0.wav and b/examples/AQA/Clotho-AQA-Test/sample_0.wav differ diff --git a/examples/AQA/Clotho-AQA-Test/sample_1.wav b/examples/AQA/Clotho-AQA-Test/sample_1.wav index 139bd3226ca457718b3cdab6d1e7a99dd5e4bd01..e1d918453e30499c4e72944444dcbc7b91919383 100644 Binary files a/examples/AQA/Clotho-AQA-Test/sample_1.wav and b/examples/AQA/Clotho-AQA-Test/sample_1.wav differ diff --git a/examples/AQA/Clotho-AQA-Test/sample_2.wav b/examples/AQA/Clotho-AQA-Test/sample_2.wav index b62a8feb71883e7540f521d41ac6e3eefe3862a3..a62fd4ca863efed677bdde085e94389031fcdbf4 100644 Binary files a/examples/AQA/Clotho-AQA-Test/sample_2.wav and b/examples/AQA/Clotho-AQA-Test/sample_2.wav differ diff --git a/examples/AQA/Clotho-AQA-Test/state.json b/examples/AQA/Clotho-AQA-Test/state.json index c58228cce70f0b257254856751c37d68dd8cd64f..4283473fd35325b09ea63487bc79660b9bfd8083 100644 --- a/examples/AQA/Clotho-AQA-Test/state.json +++ b/examples/AQA/Clotho-AQA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "e2e76326f448d7c4", + "_fingerprint": "9728812a68aca05b", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/AQA/WavCaps-QA-Test/data-00000-of-00001.arrow b/examples/AQA/WavCaps-QA-Test/data-00000-of-00001.arrow index 5ce3c3745ad16319f0acfb100c443202a55485af..126d54db4d30af711a2e8d1f91ac2e86fb839faa 100644 --- a/examples/AQA/WavCaps-QA-Test/data-00000-of-00001.arrow +++ b/examples/AQA/WavCaps-QA-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1736d5bcc9ca0d8d4847d7d000e6c6e63c73f6262177ea0391d180c40649da39 -size 837920 +oid sha256:7c724e98a1d3d280ee37873a4580f4784796e523172424c0a5fa9db5551ed60f +size 954704 diff --git a/examples/AQA/WavCaps-QA-Test/dataset_info.json b/examples/AQA/WavCaps-QA-Test/dataset_info.json index 72ceb742ffcaf0f6ff67811fa628b1e1c7a1167e..aa59daad62b54e79ee3ad8bb220dcef211f82c84 100644 --- a/examples/AQA/WavCaps-QA-Test/dataset_info.json +++ b/examples/AQA/WavCaps-QA-Test/dataset_info.json @@ -100,6 +100,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/AQA/WavCaps-QA-Test/sample_0.wav b/examples/AQA/WavCaps-QA-Test/sample_0.wav index 7639cdca2866a648ce90b4f5e385e3e6dc56c04a..ed90d471475217726db780c39603232512bf3785 100644 Binary files a/examples/AQA/WavCaps-QA-Test/sample_0.wav and b/examples/AQA/WavCaps-QA-Test/sample_0.wav differ diff --git a/examples/AQA/WavCaps-QA-Test/sample_1.wav b/examples/AQA/WavCaps-QA-Test/sample_1.wav index d2cc1a6def6014328e02ea5ea25019414f8960b4..9882a2cca41a0736468480868d4a2a41de7ededb 100644 Binary files a/examples/AQA/WavCaps-QA-Test/sample_1.wav and b/examples/AQA/WavCaps-QA-Test/sample_1.wav differ diff --git a/examples/AQA/WavCaps-QA-Test/sample_2.wav b/examples/AQA/WavCaps-QA-Test/sample_2.wav index 9629f69dd6d6b8a713b9122b03ee04ec4aae8857..36983956624f0737ac2cb3da85677cfd5d530e54 100644 Binary files a/examples/AQA/WavCaps-QA-Test/sample_2.wav and b/examples/AQA/WavCaps-QA-Test/sample_2.wav differ diff --git a/examples/AQA/WavCaps-QA-Test/state.json b/examples/AQA/WavCaps-QA-Test/state.json index a85966493650dfae84811c91b9f42d7c2811ad9c..caa4e1e8d47964acfec8a4601e6cc62cfa0cd9e0 100644 --- a/examples/AQA/WavCaps-QA-Test/state.json +++ b/examples/AQA/WavCaps-QA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "40995a6cc1fe3dc7", + "_fingerprint": "2b00ba42b5d66bed", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/AR/VoxCeleb-Accent-Test/data-00000-of-00001.arrow b/examples/AR/VoxCeleb-Accent-Test/data-00000-of-00001.arrow index 4d554c3929f3b9461fca53182975f730f3e8566d..521f220a8ad758da13b15cf1207ca1d56019c410 100644 --- a/examples/AR/VoxCeleb-Accent-Test/data-00000-of-00001.arrow +++ b/examples/AR/VoxCeleb-Accent-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5307150e5b08f9bfd7b81d56ca7ffbee1d731d002417d20fc3ec0713bc764533 -size 730864 +oid sha256:378d8cf14e6cf16ca63841ef7833b021b65c81f95dc093abac169ecb030ebb0a +size 1213568 diff --git a/examples/AR/VoxCeleb-Accent-Test/dataset_info.json b/examples/AR/VoxCeleb-Accent-Test/dataset_info.json index 913e85b36737f9004f81286043e7493d6f61b737..91b217e4844573d2e3a2d0072b2276b37723047c 100644 --- a/examples/AR/VoxCeleb-Accent-Test/dataset_info.json +++ b/examples/AR/VoxCeleb-Accent-Test/dataset_info.json @@ -108,6 +108,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_0.wav b/examples/AR/VoxCeleb-Accent-Test/sample_0.wav index ae8061522976216887910263ba9414a7e60685eb..430d0d31ed6af0e3d219b84402902587aacfd716 100644 Binary files a/examples/AR/VoxCeleb-Accent-Test/sample_0.wav and b/examples/AR/VoxCeleb-Accent-Test/sample_0.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_1.wav b/examples/AR/VoxCeleb-Accent-Test/sample_1.wav index 209cf78dcde2791b463e32f9a7245514655790aa..c13b7c8d4d9189f4a6ce7b106e9c5419f0e06352 100644 Binary files a/examples/AR/VoxCeleb-Accent-Test/sample_1.wav and b/examples/AR/VoxCeleb-Accent-Test/sample_1.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_2.wav b/examples/AR/VoxCeleb-Accent-Test/sample_2.wav index 0d44634575f30bfe9c2fa2c2765ae34c192c9c90..cb33b1aabb60d6150a60e2f3c296c184bba786ac 100644 Binary files a/examples/AR/VoxCeleb-Accent-Test/sample_2.wav and b/examples/AR/VoxCeleb-Accent-Test/sample_2.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/state.json b/examples/AR/VoxCeleb-Accent-Test/state.json index 5354f99a49cf31f0949fd2359846d46859efd5ff..1a799ec15938c1058eb0d2806c8d2125c6d64cd6 100644 --- a/examples/AR/VoxCeleb-Accent-Test/state.json +++ b/examples/AR/VoxCeleb-Accent-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "fa91a59f90c22c3c", + "_fingerprint": "8e8e0515e988a016", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow b/examples/ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow index 4560d6338d0bda974170bff980b354867e681e90..3c64ebed9243ab13ed4112031125c9ca02ace6a2 100644 --- a/examples/ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow +++ b/examples/ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f9a561ce9aed8ba4c02f86c90883029e1bb566e2b66986b17874f3bb5884d67d -size 489552 +oid sha256:63619af0016e2250bd7196654e0e3ce15a5b4d4d524829e2b0eb23f5de67fce2 +size 734592 diff --git a/examples/ASR/Common-Voice-15-En-Test/dataset_info.json b/examples/ASR/Common-Voice-15-En-Test/dataset_info.json index 08f8bd6abcb7df02ab18d592990cc082baa8bfa3..532307de6238db4f6c7e9d7084dc690a975d8920 100644 --- a/examples/ASR/Common-Voice-15-En-Test/dataset_info.json +++ b/examples/ASR/Common-Voice-15-En-Test/dataset_info.json @@ -128,6 +128,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_0.wav b/examples/ASR/Common-Voice-15-En-Test/sample_0.wav index 42753b756f05c733803356b486de2df1b1224de4..d1259db1843cdd79bc9a3bb4778067fd209a65f3 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_0.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_0.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_1.wav b/examples/ASR/Common-Voice-15-En-Test/sample_1.wav index 643e1d9e9e461c2465856a18fbf89bb27f577a18..2b2a7d92a7ec2749ba9ef870edc34c5b5fc99ed0 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_1.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_1.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_2.wav b/examples/ASR/Common-Voice-15-En-Test/sample_2.wav index eb0894ce127ebe7c2fadb5b11feea3e5b0ace14f..7f5ceed8701b8ba50e59431c5e9f7b95ed7c1727 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_2.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_2.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/state.json b/examples/ASR/Common-Voice-15-En-Test/state.json index 1ff74abf94d1cbf9804c3911eac7edf199fb36a2..22446de8f160a7d55b4ac6835a39fd0c0fffb62c 100644 --- a/examples/ASR/Common-Voice-15-En-Test/state.json +++ b/examples/ASR/Common-Voice-15-En-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "468db91ad949e4d4", + "_fingerprint": "23bec5037b5ce6a4", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ASR/Earnings21-Test/data-00000-of-00001.arrow b/examples/ASR/Earnings21-Test/data-00000-of-00001.arrow index 543eb95281d82eea0d8930770f2a47b8bd52340c..9c10b6cdcd155433a916ee1ad952e6a4e5a16bb9 100644 --- a/examples/ASR/Earnings21-Test/data-00000-of-00001.arrow +++ b/examples/ASR/Earnings21-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5fbca261ae8ac8ccc96993ea11f17836bdcfef1070835784f159b79990a5a298 -size 429108160 +oid sha256:bc0f66d55bb0faeadc7193c43aa71ea56431335340f30303e2cb25c9b2a79744 +size 367003808 diff --git a/examples/ASR/Earnings21-Test/dataset_info.json b/examples/ASR/Earnings21-Test/dataset_info.json index 1bf27e3aaa89f2fa43812252ac2377fab8ae1708..83b8d9f868f8dda4f2f055e065c0e23ebbdc4734 100644 --- a/examples/ASR/Earnings21-Test/dataset_info.json +++ b/examples/ASR/Earnings21-Test/dataset_info.json @@ -92,6 +92,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ASR/Earnings21-Test/sample_0.wav b/examples/ASR/Earnings21-Test/sample_0.wav index 877a0f85e77ae3ccb28f14601c2e8765e945c6d8..39ae668314a511d89d654992908b84760b40efd2 100644 --- a/examples/ASR/Earnings21-Test/sample_0.wav +++ b/examples/ASR/Earnings21-Test/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8fb994964e1b0df9f4675ceaa73d55da55a096f5b94d002d9f7b07c997fc83e -size 97593644 +oid sha256:8276aa4b4bd55749772152b8c7adf6fe558ad8e4f26624504612d72c3760fffa +size 81919596 diff --git a/examples/ASR/Earnings21-Test/sample_1.wav b/examples/ASR/Earnings21-Test/sample_1.wav index e20907a027c9da25fc6544ec87f07185b23c3567..c8ca1696c117ec01d3a742ca6ea5a48042288b55 100644 --- a/examples/ASR/Earnings21-Test/sample_1.wav +++ b/examples/ASR/Earnings21-Test/sample_1.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd6ba77731011a6dc02e5854a600a2036713be4c2d71abf63fd6a89b86083c4f -size 178791280 +oid sha256:8a57a2bcbf2a7df4da219a0233b66326e1e5225bc6719f596775a01fe2a87ba8 +size 138517676 diff --git a/examples/ASR/Earnings21-Test/sample_2.wav b/examples/ASR/Earnings21-Test/sample_2.wav index 0924db55e5560d30768fc9fa8a6e5931f475a642..61397a47ae54eb428c7702d53649005438bae9bd 100644 --- a/examples/ASR/Earnings21-Test/sample_2.wav +++ b/examples/ASR/Earnings21-Test/sample_2.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1a1d15425069b003730e79f0df467103f4ac7670f87a0539a97c82973a02943e -size 150700076 +oid sha256:c1b4a765dcbb96ca99ddbc01224a4f36d66bad376beb7e04f7a8a4964c02a46a +size 144361004 diff --git a/examples/ASR/Earnings21-Test/state.json b/examples/ASR/Earnings21-Test/state.json index ce1987be7235db123fcdddfe4a75272abf7513d4..cf8c9f552b4bb777dc628a2cf84b39eb0a86a964 100644 --- a/examples/ASR/Earnings21-Test/state.json +++ b/examples/ASR/Earnings21-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "8cc0ad99446f1aba", + "_fingerprint": "6d8e11ac5a63a2d2", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ASR/Earnings22-Test/data-00000-of-00001.arrow b/examples/ASR/Earnings22-Test/data-00000-of-00001.arrow index 803b349472ac02701fad19e91bdc02942939196d..50e2b79aa719907dcf6d81b810c16503f59bd2c0 100644 --- a/examples/ASR/Earnings22-Test/data-00000-of-00001.arrow +++ b/examples/ASR/Earnings22-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:497dd6d287df9a8be5194b7875ae88f275127986d3fc538601382e80244bbb7b -size 332277848 +oid sha256:46d31942482d6db16028f7690fc0bfe00321da1b00fe6357040bf392d9ecd2f1 +size 333814624 diff --git a/examples/ASR/Earnings22-Test/sample_0.wav b/examples/ASR/Earnings22-Test/sample_0.wav index c31c73e0e078edb570b30c9ddcd8679e40ac62dd..e630bf409bf71fe86b572d2681802566a0c26d39 100644 --- a/examples/ASR/Earnings22-Test/sample_0.wav +++ b/examples/ASR/Earnings22-Test/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6530f950b07b4747ca26fa70586ec563cf20c222cab6e53fc2e94e28f7a3d867 -size 167237960 +oid sha256:d17808f5da0d8a7ecefeaa914e5be18186dd1735523c9a4a6e1738bf3cfc77e5 +size 152305196 diff --git a/examples/ASR/Earnings22-Test/sample_1.wav b/examples/ASR/Earnings22-Test/sample_1.wav index e7fc4350e24534cdda9135085b77b08356adf39b..5a74e84d2d502a40842368c1b8120c1b64126eac 100644 --- a/examples/ASR/Earnings22-Test/sample_1.wav +++ b/examples/ASR/Earnings22-Test/sample_1.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7c4034503f1974ecfdbd050df7e75d97cbae4933ea62af829eefe6e17295c6f4 -size 38863630 +oid sha256:ec1b0aada93420d121572445d6e300df9037249dba2df85a81f16533759b29eb +size 85850534 diff --git a/examples/ASR/Earnings22-Test/sample_2.wav b/examples/ASR/Earnings22-Test/sample_2.wav index 12c42f881e73d8b6f1905855b844c1acc816553f..721687e4d2ac3d2062818facf1256ee9acdcb280 100644 --- a/examples/ASR/Earnings22-Test/sample_2.wav +++ b/examples/ASR/Earnings22-Test/sample_2.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba099990e90f3db2901b585ce601b71960ab54ef76f5bf143a74d816ea615f07 -size 124561076 +oid sha256:e5022646a7ea99298c8b02f6589c9d3943cf3649e337ad4a1579e40a7dcd3b33 +size 94064684 diff --git a/examples/ASR/Earnings22-Test/state.json b/examples/ASR/Earnings22-Test/state.json index ac26bdba683e345445fdeb81908f03626f2ced73..ae076d2547c5302d7bfe3408a69af25738b0730e 100644 --- a/examples/ASR/Earnings22-Test/state.json +++ b/examples/ASR/Earnings22-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "331c061bce6e651c", + "_fingerprint": "f71b90ac0caefff8", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/GigaSpeech-Test/data-00000-of-00001.arrow b/examples/ASR/GigaSpeech-Test/data-00000-of-00001.arrow index 9f584469430bb4f4aecca47ac52ea723be454e83..0ed825009d02f0dc8e0ba110a51fb0038a27ffc8 100644 --- a/examples/ASR/GigaSpeech-Test/data-00000-of-00001.arrow +++ b/examples/ASR/GigaSpeech-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e46b3463f6df894fa055f08fa9ea1f44b4cea43a1be43af2a2bd8c5e8fee3ec5 -size 573048 +oid sha256:d24f90d8303622d11e227e0149a726a9a06dcad84ff5deb89cb20ff2289482b0 +size 670440 diff --git a/examples/ASR/GigaSpeech-Test/dataset_info.json b/examples/ASR/GigaSpeech-Test/dataset_info.json index 1dd0025578e934e74b979da9e81789eedd9a2f29..4a71f4f719892bbe1f5c71332420787fe5dc92fe 100644 --- a/examples/ASR/GigaSpeech-Test/dataset_info.json +++ b/examples/ASR/GigaSpeech-Test/dataset_info.json @@ -120,6 +120,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ASR/GigaSpeech-Test/sample_0.wav b/examples/ASR/GigaSpeech-Test/sample_0.wav index 0d232b1996ee07bc47f24d06fe8b860ee1b63b11..f08bbdf0a0af40f77b8c73c44976c63a78d2fc39 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_0.wav and b/examples/ASR/GigaSpeech-Test/sample_0.wav differ diff --git a/examples/ASR/GigaSpeech-Test/sample_1.wav b/examples/ASR/GigaSpeech-Test/sample_1.wav index ee5fced84fd0691c2b4b288be9a5ad73ac67bc6e..62d149d28202bbf67bc52cf39c3c87da9e934a4c 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_1.wav and b/examples/ASR/GigaSpeech-Test/sample_1.wav differ diff --git a/examples/ASR/GigaSpeech-Test/sample_2.wav b/examples/ASR/GigaSpeech-Test/sample_2.wav index 2a9edb2ea3e8a96c7ab70b232249205e765f02d0..3ce3d6a5694f07d04ab17fde1e29f308312e5517 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_2.wav and b/examples/ASR/GigaSpeech-Test/sample_2.wav differ diff --git a/examples/ASR/GigaSpeech-Test/state.json b/examples/ASR/GigaSpeech-Test/state.json index 8bd5fd3d45201fc6807cb2364c48d5ba722bb4bf..8fca1d869d122159c4afa7b0214a240abe2f382d 100644 --- a/examples/ASR/GigaSpeech-Test/state.json +++ b/examples/ASR/GigaSpeech-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "68d371cc267ff1d2", + "_fingerprint": "67d1ab1b99556a9f", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ASR/IMDA-Part1-ASR-Test/data-00000-of-00001.arrow b/examples/ASR/IMDA-Part1-ASR-Test/data-00000-of-00001.arrow index d290d6e2f73409b2f0ba18473a22149cf17268a6..343ae9ac8e6b6002920c3e4731c14c2d7a22ab77 100644 --- a/examples/ASR/IMDA-Part1-ASR-Test/data-00000-of-00001.arrow +++ b/examples/ASR/IMDA-Part1-ASR-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd9b1d6729d5ef43f4d811def9a4796bfb383083727e3c4a0c79303089afb6ed -size 423624 +oid sha256:9b70a131a1075b0729e4e29e03bc3ce0d3406a84cc1e7b3f1329891149687cfb +size 445576 diff --git a/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json b/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json index af0260da818c604e190e3b198aebef1a391419ec..ede02d74b595679e2a6f3b2256ab4c69e535f09e 100644 --- a/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json +++ b/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json @@ -85,6 +85,114 @@ "_type": "Value" } } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } } }, "homepage": "", diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav index 2d696049b68e6f64944055f3bf5b7a01c8bcdad6..37142cc04ab5d70e5c1a3f00c48c5555c1b44b31 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav index 6f926f6a97132a8df675f05d2f14f1dec232a704..d66fd5602e0d455844807ef1f2a176aaaee1610b 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav index f03f65739284f1757c34e88313998578f6dd67d5..dc6701f3abb29f0fc5957697385ecc4a7d6c740b 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/state.json b/examples/ASR/IMDA-Part1-ASR-Test/state.json index f1cdd75238904ec8645cf1ef5a1a7e20981862a2..7c287183cce740341697c42ff3ca2cb4344caddf 100644 --- a/examples/ASR/IMDA-Part1-ASR-Test/state.json +++ b/examples/ASR/IMDA-Part1-ASR-Test/state.json @@ -4,12 +4,18 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "9c67b4f2f347692a", + "_fingerprint": "6de71e0f4c76af43", "_format_columns": [ "context", "instruction", "answer", - "other_attributes" + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/ASR/IMDA-Part2-ASR-Test/data-00000-of-00001.arrow b/examples/ASR/IMDA-Part2-ASR-Test/data-00000-of-00001.arrow index 896c4fc75e607933d5155b06f9d4c67812c71847..8b1ccb41fde7b391d58735d4e315bf9a51d3835a 100644 --- a/examples/ASR/IMDA-Part2-ASR-Test/data-00000-of-00001.arrow +++ b/examples/ASR/IMDA-Part2-ASR-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6203852fc63ee5b00dd73f6857d2a22a3f8a8dc3b87917bb99d1cef8448150a5 -size 438120 +oid sha256:ff2dab78b2f99f296e7164af2595d76e3d055c156549b4e56d4de3dd24e56c90 +size 316456 diff --git a/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json b/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json index af0260da818c604e190e3b198aebef1a391419ec..ede02d74b595679e2a6f3b2256ab4c69e535f09e 100644 --- a/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json +++ b/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json @@ -85,6 +85,114 @@ "_type": "Value" } } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } } }, "homepage": "", diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav index d79fde19cec7b066b9f3f546d6ff6366dfa4daef..e49eb3e417896071b646badc40adc4b92f6b99da 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav index 9fac54a02b8b9ad1ee6502ef41ef1d50a7213de5..56cd23b4ba9c56fd053ae80c292ea8aa93e92293 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav index d866ad7a1ddfc3541ce3adbecf00f688489cdd3f..e1d594ef2f027e10f0bc02ffb197a5129fc3fbe7 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/state.json b/examples/ASR/IMDA-Part2-ASR-Test/state.json index 3c6b051944e6842913bce863ca9c239da3f8e0be..9e63a4d63533a0a5fd894966479ccc22f2528377 100644 --- a/examples/ASR/IMDA-Part2-ASR-Test/state.json +++ b/examples/ASR/IMDA-Part2-ASR-Test/state.json @@ -4,12 +4,18 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "9f3d440792a605d2", + "_fingerprint": "58564e4bc21961b9", "_format_columns": [ "context", "instruction", "answer", - "other_attributes" + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/ASR/LibriSpeech-Test-Clean/data-00000-of-00001.arrow b/examples/ASR/LibriSpeech-Test-Clean/data-00000-of-00001.arrow index ba9c5364de7c43d55140acfff92587f682f7c260..eaf91b2e98a654896139a7f3429aa7d63a6052ad 100644 --- a/examples/ASR/LibriSpeech-Test-Clean/data-00000-of-00001.arrow +++ b/examples/ASR/LibriSpeech-Test-Clean/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e050bcd936107ea58a083a64fc36c85afc55b1f570d8285e5eb93b4244ca3702 -size 491728 +oid sha256:6d8400ec1502f226dd9f82fac53b5f3d8360fffaf859b3b56fb854157ac75580 +size 638008 diff --git a/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json b/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json index 5a91f71cfb2044e6060c8f395ee4b798384d32d8..43a7d2a02a5e9b58fc92641d1fa33f66bbb3ffb8 100644 --- a/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json +++ b/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json @@ -104,6 +104,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav index 051a14c05f7c270da3d842024c5936075cb5c2e6..07a99e97bf2b54b1c4028cf9280ec6cae995adb8 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav index 30816d1c205dd136109c6abfca19abf249813c68..9bc645b1dd32cc7e4be13999d3bb3190d6559376 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav index a228ce116181b6b19b741cd9ffc2e1853704adc5..b6d879dd4e7dc07638f71ca0808f7c9395efb420 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/state.json b/examples/ASR/LibriSpeech-Test-Clean/state.json index dda0f24f40bffbb5dbb1f236bc44f6e715655e67..3f21062dca2ac77c037dd729833f9e181bcffd92 100644 --- a/examples/ASR/LibriSpeech-Test-Clean/state.json +++ b/examples/ASR/LibriSpeech-Test-Clean/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "d9f5d173c305ae96", + "_fingerprint": "5f41ed9e62814ad1", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ASR/LibriSpeech-Test-Other/data-00000-of-00001.arrow b/examples/ASR/LibriSpeech-Test-Other/data-00000-of-00001.arrow index f962e9f689dbb890c217156e0c6cbaddb2780490..41472f0ea104954c43203897429ad80fdcccc688 100644 --- a/examples/ASR/LibriSpeech-Test-Other/data-00000-of-00001.arrow +++ b/examples/ASR/LibriSpeech-Test-Other/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2375da90470a7502211721ccf088ed76808ef698133028c7a74e27606d6f3949 -size 1025488 +oid sha256:3c5fede20b21cb95627071cce4e51da3efd9c695fbf605db431da96b220ed081 +size 490816 diff --git a/examples/ASR/LibriSpeech-Test-Other/dataset_info.json b/examples/ASR/LibriSpeech-Test-Other/dataset_info.json index 5a91f71cfb2044e6060c8f395ee4b798384d32d8..43a7d2a02a5e9b58fc92641d1fa33f66bbb3ffb8 100644 --- a/examples/ASR/LibriSpeech-Test-Other/dataset_info.json +++ b/examples/ASR/LibriSpeech-Test-Other/dataset_info.json @@ -104,6 +104,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_0.wav b/examples/ASR/LibriSpeech-Test-Other/sample_0.wav index c274d02bd3392db09ea1a95bb050a248627b91cc..f0d20ff28c7910013946cc22d27bc14b642a397c 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_0.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_0.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_1.wav b/examples/ASR/LibriSpeech-Test-Other/sample_1.wav index eea20cd08457ea3ae2d55e91c7240602ae30436f..6faf2b05a1da557f1039edf9b67f714ab51c4bed 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_1.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_1.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_2.wav b/examples/ASR/LibriSpeech-Test-Other/sample_2.wav index b7cc7a7ab0cb2cb02a5253dcecede16c688acf86..20fb0bcad6556cfca16b1bf5466d1755061c631a 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_2.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_2.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/state.json b/examples/ASR/LibriSpeech-Test-Other/state.json index 952838bee518f052b1de767f99c58c1282b17596..bd657759ba4398444807dfcc3134ccd9d1bb79f8 100644 --- a/examples/ASR/LibriSpeech-Test-Other/state.json +++ b/examples/ASR/LibriSpeech-Test-Other/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "8bd0648dc412be04", + "_fingerprint": "cdae4114b2fdba28", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ASR/Peoples-Speech-Test/data-00000-of-00001.arrow b/examples/ASR/Peoples-Speech-Test/data-00000-of-00001.arrow index 900558873c787e1b6e8cecf5934a8b61fa5237e4..a15c777f5e033628e063edf6af6ccf6c171869bc 100644 --- a/examples/ASR/Peoples-Speech-Test/data-00000-of-00001.arrow +++ b/examples/ASR/Peoples-Speech-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4299dbcf7bfc7b223f4a653fda3176a8d3e24c09581a41a806b86fc360527bb4 -size 439504 +oid sha256:7380189d307b0bfd90a24bc66224cb1d7fef1a2561670b907e78004dd59e9675 +size 946072 diff --git a/examples/ASR/Peoples-Speech-Test/dataset_info.json b/examples/ASR/Peoples-Speech-Test/dataset_info.json index a8d496de62c861269b2a5c8bf9826fedc8abf807..726c69526174b236cc38180a6b27905c9b182714 100644 --- a/examples/ASR/Peoples-Speech-Test/dataset_info.json +++ b/examples/ASR/Peoples-Speech-Test/dataset_info.json @@ -96,6 +96,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ASR/Peoples-Speech-Test/sample_0.wav b/examples/ASR/Peoples-Speech-Test/sample_0.wav index ff25cea2d23ec61f9199873fc67227221c1bebca..272fccbc0970764c68d7927c25d39ce307b484c1 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_0.wav and b/examples/ASR/Peoples-Speech-Test/sample_0.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/sample_1.wav b/examples/ASR/Peoples-Speech-Test/sample_1.wav index 4e7ddfc1977129dfb02d6c4424b362bcd60c1a15..99f1c35821539e7cf65c0ece46d29ac8f137fbd7 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_1.wav and b/examples/ASR/Peoples-Speech-Test/sample_1.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/sample_2.wav b/examples/ASR/Peoples-Speech-Test/sample_2.wav index 9f590a810351e28a60b5da87f6821f06f8916790..d9572909c9ffdc4d725b589d351ade4b0aa2998b 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_2.wav and b/examples/ASR/Peoples-Speech-Test/sample_2.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/state.json b/examples/ASR/Peoples-Speech-Test/state.json index 60db649d02fbab6497da719d1c536be91f9bda39..61144ef80bc11b38566cc904feca7995f1ab162c 100644 --- a/examples/ASR/Peoples-Speech-Test/state.json +++ b/examples/ASR/Peoples-Speech-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "de704174c1b2e1ea", + "_fingerprint": "a31e8115b04802d9", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ASR/Tedlium3-Long-form-Test/data-00000-of-00001.arrow b/examples/ASR/Tedlium3-Long-form-Test/data-00000-of-00001.arrow index 9ab6d2f53dcf630417c03a739060cdb58f7afdb4..663557dcadf2e292e0aef28df0003fd1c4aa6430 100644 --- a/examples/ASR/Tedlium3-Long-form-Test/data-00000-of-00001.arrow +++ b/examples/ASR/Tedlium3-Long-form-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c292f812d20458d9589f8a14933e7b031b1b35e5a496449f4913c3af6d066bc8 -size 98923056 +oid sha256:7fae0d1b2077588d2333f81d62d8b4aff7ddbdd84c2770d068c4274b4932b1bf +size 94387448 diff --git a/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json b/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json index 55009f0312ac6d6605288017abbf50e0bafefdc3..10047f79d2f1a1b8a3f0185fa89ecb0170a4d82c 100644 --- a/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json +++ b/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json @@ -49,6 +49,132 @@ "dtype": "string", "_type": "Value" } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } } }, "homepage": "", diff --git a/examples/ASR/Tedlium3-Long-form-Test/sample_0.wav b/examples/ASR/Tedlium3-Long-form-Test/sample_0.wav index 41dbac609aefb991db732192675b67164a3106d5..2eff19f1f7ce5c124d20e874f244975410f7113c 100644 --- a/examples/ASR/Tedlium3-Long-form-Test/sample_0.wav +++ b/examples/ASR/Tedlium3-Long-form-Test/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ac85b08c75fad06d968942b517e53495961ff861c6e794b576ecce3b406bcbf8 -size 51095404 +oid sha256:32c1ab4c6363f3db0f5fca8bdc5f2eb780522a08da3cd00bf6a4b1ea7e3cc0f8 +size 37362604 diff --git a/examples/ASR/Tedlium3-Long-form-Test/sample_1.wav b/examples/ASR/Tedlium3-Long-form-Test/sample_1.wav index 17eccb9b542ec47bf29066074f7435f6b06f42cd..a218d274caefb6e72dcdf1ef14639eca3d92c54c 100644 --- a/examples/ASR/Tedlium3-Long-form-Test/sample_1.wav +++ b/examples/ASR/Tedlium3-Long-form-Test/sample_1.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0361b7dc4a9437a698a1d28fb3e73718810a6d685f7932f2a04997ad0475b182 -size 36730348 +oid sha256:bb2d7db63f67644b5a8883727929c9f2e96631c45900321734aa020589ade4c5 +size 29474026 diff --git a/examples/ASR/Tedlium3-Long-form-Test/sample_2.wav b/examples/ASR/Tedlium3-Long-form-Test/sample_2.wav index 7aaa0adc7dcdd7bc19c70c6295b943e8be3931f1..8132b0bb9c72b9c26882c19cdb542c31580e3271 100644 --- a/examples/ASR/Tedlium3-Long-form-Test/sample_2.wav +++ b/examples/ASR/Tedlium3-Long-form-Test/sample_2.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b8cb8f81de8eecb106d072fb26bd87e749fd9fcd28e5cc06949ddb2c542eff3c -size 11046766 +oid sha256:2e4219fea24ab3ffdc256b7f6c5f0d0ce8dcd9eb0f80cf15a9d1b3afa6b6e279 +size 26867124 diff --git a/examples/ASR/Tedlium3-Long-form-Test/state.json b/examples/ASR/Tedlium3-Long-form-Test/state.json index 3a56b9cee5a756794d32d1b5f03c01a501c41606..802648bca0c3237cf834eb90ae139e55941d4e34 100644 --- a/examples/ASR/Tedlium3-Long-form-Test/state.json +++ b/examples/ASR/Tedlium3-Long-form-Test/state.json @@ -4,12 +4,19 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "f95b9bf4e3dea7c1", + "_fingerprint": "58eff5b352a6c4af", "_format_columns": [ "context", "instruction", "answer", - "other_attributes" + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/ASR/Tedlium3-Test/data-00000-of-00001.arrow b/examples/ASR/Tedlium3-Test/data-00000-of-00001.arrow index 043a6aa5f5647a465c24325fac5bd8ab7056a64c..37faafbdb7e866e84dc5ab62fc6e79e51ee85ae1 100644 --- a/examples/ASR/Tedlium3-Test/data-00000-of-00001.arrow +++ b/examples/ASR/Tedlium3-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:335b82cbd0d97571df4e6b52cb0d3b81f1410f019212964143d4a6c20333f3cc -size 907976 +oid sha256:85722f6b2cedad0e78d58b44bae464bad6828555dd239e13758b4002dbc47277 +size 779144 diff --git a/examples/ASR/Tedlium3-Test/dataset_info.json b/examples/ASR/Tedlium3-Test/dataset_info.json index 55009f0312ac6d6605288017abbf50e0bafefdc3..10047f79d2f1a1b8a3f0185fa89ecb0170a4d82c 100644 --- a/examples/ASR/Tedlium3-Test/dataset_info.json +++ b/examples/ASR/Tedlium3-Test/dataset_info.json @@ -49,6 +49,132 @@ "dtype": "string", "_type": "Value" } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } } }, "homepage": "", diff --git a/examples/ASR/Tedlium3-Test/sample_0.wav b/examples/ASR/Tedlium3-Test/sample_0.wav index a07fc005b1f77a01b066c0ef962b04e634f4c356..b8aea73a6c3619a9b9044110a1f8a6d98613724a 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_0.wav and b/examples/ASR/Tedlium3-Test/sample_0.wav differ diff --git a/examples/ASR/Tedlium3-Test/sample_1.wav b/examples/ASR/Tedlium3-Test/sample_1.wav index f864baa998ef015b529fc69d8bccca6f284233f1..5d0764a8f20943f4bb99690206c17dfff7985307 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_1.wav and b/examples/ASR/Tedlium3-Test/sample_1.wav differ diff --git a/examples/ASR/Tedlium3-Test/sample_2.wav b/examples/ASR/Tedlium3-Test/sample_2.wav index 66c6482678614fbd0c658553c4c727a50d80c57a..1a4418fb82d58a63b8d6658a096b619913bb614d 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_2.wav and b/examples/ASR/Tedlium3-Test/sample_2.wav differ diff --git a/examples/ASR/Tedlium3-Test/state.json b/examples/ASR/Tedlium3-Test/state.json index ffb37e795661eaf0f656a4272372d0919a492fe0..05687cd786f8e6e1ceb43737d38165b6602b7dc9 100644 --- a/examples/ASR/Tedlium3-Test/state.json +++ b/examples/ASR/Tedlium3-Test/state.json @@ -4,12 +4,19 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "fb20b90d5641df89", + "_fingerprint": "564760102352a6d3", "_format_columns": [ "context", "instruction", "answer", - "other_attributes" + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/data-00000-of-00001.arrow b/examples/CNASR/Aishell-ASR-ZH-Test/data-00000-of-00001.arrow index e3c04ad74452482acd4a29e1a7407a5fc23cbf57..759ddab5ac1222d92dda592d8dc030cd78b47d73 100644 --- a/examples/CNASR/Aishell-ASR-ZH-Test/data-00000-of-00001.arrow +++ b/examples/CNASR/Aishell-ASR-ZH-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:66386a59d99f85838ad5d64c7f7b3108c15a22782f61783ec2debb208d7a2f8d -size 569936 +oid sha256:4da460a2c54c584f866c17ff3c2922180da4674e31cd0ff2381b1416ccea8672 +size 458328 diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json b/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json index 051243e1e6b3046a83599b80eb901679ff2608d8..c42d75c27c529687386dbb50124a3b199bd3b176 100644 --- a/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json +++ b/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json @@ -102,6 +102,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "mowe_audio": { "answer": { "dtype": "string", diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav index a69d64c8284caa8ca7ef3f5ecaf6ebc0519020ef..0631f9745cbc39d30899c534252b902cb0c33ba0 100644 Binary files a/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav index a149a1ab68e19b0029225518d217608c573d37e2..23da00fdc805d70ec90066b602512d110280554d 100644 Binary files a/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav index b2816f1cb90418b611227d1e6d044e886712b426..7d4f68158ad3c22144160b44d2b94dd2489a4f4d 100644 Binary files a/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/state.json b/examples/CNASR/Aishell-ASR-ZH-Test/state.json index 4495f512dec2e1549747a3cd9e31137e0afb8081..72520f06a2dfd1a4c01b59451d2c6ebb5cbf7868 100644 --- a/examples/CNASR/Aishell-ASR-ZH-Test/state.json +++ b/examples/CNASR/Aishell-ASR-ZH-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "f9833c929864587b", + "_fingerprint": "c55bbfbc80134880", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "mowe_audio", "qwen_audio_chat" ], diff --git a/examples/ER/IEMOCAP-Emotion-Test/data-00000-of-00001.arrow b/examples/ER/IEMOCAP-Emotion-Test/data-00000-of-00001.arrow index c2bcf1e6ce6f0a0047dd85cce0bbf11e0184b823..1830ee319657fddd5b9dc6afb1cb56ca5aed08a7 100644 --- a/examples/ER/IEMOCAP-Emotion-Test/data-00000-of-00001.arrow +++ b/examples/ER/IEMOCAP-Emotion-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2d440f3076e388bbeeec29027bd4549d005f3a8bbb342bdc9a77802c79f01cf -size 838160 +oid sha256:f1c10a21b55a9a758bd1ff39ac8cf3833285016c6ff6c171296ac2a981bde2dd +size 549984 diff --git a/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json b/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json index ae1585a301a57eef40e3c39259d56a7e70e2be43..ec9982ce23f02e4a30ed69b54c9da27522206ad7 100644 --- a/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json +++ b/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json @@ -108,6 +108,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav index 3aea288a199cf828777f07051ce17bb65dd122b9..69f31212a1bf5d0220e889032a34835cc4f8414b 100644 Binary files a/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav and b/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav index f20cf5efc4a86d62d733d80fc2cde556ea107245..f8c1d3734b3687c8a2205aad61368bffa54cadf2 100644 Binary files a/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav and b/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav index 6085d420403bb54190cde8d1cffef75b35f2fa88..12e93e96005e4c422ed6f789bf1ac0273e6fc483 100644 Binary files a/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav and b/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/state.json b/examples/ER/IEMOCAP-Emotion-Test/state.json index f9e210ccb738232f4a7ce004649cc0811b5622e7..47aac6246491fd600c3ee9f77d9ece88074be1e1 100644 --- a/examples/ER/IEMOCAP-Emotion-Test/state.json +++ b/examples/ER/IEMOCAP-Emotion-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "78bf80b897adbddb", + "_fingerprint": "e76359f2b84e8913", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ER/MELD-Emotion-Test/data-00000-of-00001.arrow b/examples/ER/MELD-Emotion-Test/data-00000-of-00001.arrow index 5c264b825c805731d035e34124efed546055316f..696725cbba34fa35029967897648879fd58c05b5 100644 --- a/examples/ER/MELD-Emotion-Test/data-00000-of-00001.arrow +++ b/examples/ER/MELD-Emotion-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0fe36d4394fb74d9d92909224e84e0de896cabb5fc94c878b23c3380594a65bb -size 352408 +oid sha256:f90c18d98e82805348d2c55e1c957e9ec94cfe977f0193322cf04b1eb090a818 +size 435464 diff --git a/examples/ER/MELD-Emotion-Test/dataset_info.json b/examples/ER/MELD-Emotion-Test/dataset_info.json index 506cf34c0d7158134159a3234f9f98b8e6b74f28..4452de7db0b48a7e233a40373a253c8d4cca6984 100644 --- a/examples/ER/MELD-Emotion-Test/dataset_info.json +++ b/examples/ER/MELD-Emotion-Test/dataset_info.json @@ -124,6 +124,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ER/MELD-Emotion-Test/sample_0.wav b/examples/ER/MELD-Emotion-Test/sample_0.wav index 819fccfa77653af1d839db36a4d89d6c5073676d..ae16f804b90a7eeabb027d788c1b7e291a50405f 100644 Binary files a/examples/ER/MELD-Emotion-Test/sample_0.wav and b/examples/ER/MELD-Emotion-Test/sample_0.wav differ diff --git a/examples/ER/MELD-Emotion-Test/sample_1.wav b/examples/ER/MELD-Emotion-Test/sample_1.wav index a21acc5a4a0831f75c28e76a93e0339f98a5dab9..621748fea89f2ae2cb00ccf4c5bc60722757966c 100644 Binary files a/examples/ER/MELD-Emotion-Test/sample_1.wav and b/examples/ER/MELD-Emotion-Test/sample_1.wav differ diff --git a/examples/ER/MELD-Emotion-Test/sample_2.wav b/examples/ER/MELD-Emotion-Test/sample_2.wav index 98c1a4ec43768374663eb5cae1305480110d87e8..389dfb97dbb804a09de0bf8f007f59be149eb2c8 100644 Binary files a/examples/ER/MELD-Emotion-Test/sample_2.wav and b/examples/ER/MELD-Emotion-Test/sample_2.wav differ diff --git a/examples/ER/MELD-Emotion-Test/state.json b/examples/ER/MELD-Emotion-Test/state.json index 9c219f8e85ce34e78d9b81244ed1bd84435ebb4c..176649fbdd530cb7834be5416a7c4810f251679a 100644 --- a/examples/ER/MELD-Emotion-Test/state.json +++ b/examples/ER/MELD-Emotion-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "a5a596edab97a213", + "_fingerprint": "af1e1756291ebf0e", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ER/MELD-Sentiment-Test/data-00000-of-00001.arrow b/examples/ER/MELD-Sentiment-Test/data-00000-of-00001.arrow index 0e0726e555e52b2e486741a8cf021542e7c2dd11..d61f1e0e2139e636ce349dba968fb539be883d1e 100644 --- a/examples/ER/MELD-Sentiment-Test/data-00000-of-00001.arrow +++ b/examples/ER/MELD-Sentiment-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2388eb71f8718170fe81707866ea2fb68a654153cb6c8a4f503de2e4ef13804d -size 232392 +oid sha256:92fb7fe8450dd0779a51b115355d40fc9e827e843be92f791e95a189c956f988 +size 205056 diff --git a/examples/ER/MELD-Sentiment-Test/dataset_info.json b/examples/ER/MELD-Sentiment-Test/dataset_info.json index 506cf34c0d7158134159a3234f9f98b8e6b74f28..4452de7db0b48a7e233a40373a253c8d4cca6984 100644 --- a/examples/ER/MELD-Sentiment-Test/dataset_info.json +++ b/examples/ER/MELD-Sentiment-Test/dataset_info.json @@ -124,6 +124,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ER/MELD-Sentiment-Test/sample_0.wav b/examples/ER/MELD-Sentiment-Test/sample_0.wav index 025684f18597120ad16569fb77cc1700b855f6c9..34c3f4a45a3e8ed05f716b36d0770ec367449f4d 100644 Binary files a/examples/ER/MELD-Sentiment-Test/sample_0.wav and b/examples/ER/MELD-Sentiment-Test/sample_0.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/sample_1.wav b/examples/ER/MELD-Sentiment-Test/sample_1.wav index 8d64fff1bab98e316eec6c61842e48f7b8f333c4..aa2cd17de168f8dc8c16081cfb5cf2567c7a8701 100644 Binary files a/examples/ER/MELD-Sentiment-Test/sample_1.wav and b/examples/ER/MELD-Sentiment-Test/sample_1.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/sample_2.wav b/examples/ER/MELD-Sentiment-Test/sample_2.wav index f639c22e1b1e24a5d6db354da2e8cf424cdb45be..99a6b96db3614af9fc6b3fc25800ceed3df78bb3 100644 Binary files a/examples/ER/MELD-Sentiment-Test/sample_2.wav and b/examples/ER/MELD-Sentiment-Test/sample_2.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/state.json b/examples/ER/MELD-Sentiment-Test/state.json index 917ccc849fc69d74df055b821dd46d31b29e2e8d..803fbcc33947edac767d6061118da9cb6318c792 100644 --- a/examples/ER/MELD-Sentiment-Test/state.json +++ b/examples/ER/MELD-Sentiment-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "a052e830551840d2", + "_fingerprint": "7785c7413a306461", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/GR/IEMOCAP-Gender-Test/data-00000-of-00001.arrow b/examples/GR/IEMOCAP-Gender-Test/data-00000-of-00001.arrow index 55669f769aac298829eac258c09015ed16c75897..9565d6bf740c6b2cf101ed6305208a8384a91b14 100644 --- a/examples/GR/IEMOCAP-Gender-Test/data-00000-of-00001.arrow +++ b/examples/GR/IEMOCAP-Gender-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89d1237586d95f75b99aaea0bd47680ad91deaaa3a4c20d94571e76bbc1e951f -size 411576 +oid sha256:c8a9d624ae7d40b7e7a024e67478150837531b368f9f9f34b4e396d2881af059 +size 431064 diff --git a/examples/GR/IEMOCAP-Gender-Test/dataset_info.json b/examples/GR/IEMOCAP-Gender-Test/dataset_info.json index ae1585a301a57eef40e3c39259d56a7e70e2be43..ec9982ce23f02e4a30ed69b54c9da27522206ad7 100644 --- a/examples/GR/IEMOCAP-Gender-Test/dataset_info.json +++ b/examples/GR/IEMOCAP-Gender-Test/dataset_info.json @@ -108,6 +108,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_0.wav b/examples/GR/IEMOCAP-Gender-Test/sample_0.wav index 0f29a85f3b63eb74da8cd026aab5aa13498e0125..f052a2e401793a6a2cad7fd12ec9e7dc305ea5b2 100644 Binary files a/examples/GR/IEMOCAP-Gender-Test/sample_0.wav and b/examples/GR/IEMOCAP-Gender-Test/sample_0.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_1.wav b/examples/GR/IEMOCAP-Gender-Test/sample_1.wav index 56cec7eeb6836d092e76201787aa22b9436c13f2..fa42b5bdbf1f708fcb039e99845040df41d84da4 100644 Binary files a/examples/GR/IEMOCAP-Gender-Test/sample_1.wav and b/examples/GR/IEMOCAP-Gender-Test/sample_1.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_2.wav b/examples/GR/IEMOCAP-Gender-Test/sample_2.wav index 5c68747f7620a99d23ad13f8d2fd7386ed49332c..5f81af300bee9be5ef0de8ab0c7e120a4d0b917b 100644 Binary files a/examples/GR/IEMOCAP-Gender-Test/sample_2.wav and b/examples/GR/IEMOCAP-Gender-Test/sample_2.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/state.json b/examples/GR/IEMOCAP-Gender-Test/state.json index 909b8a4a39ef78e0e286d8b51244d38d68e3aa31..ed84f0cd8521cebcc00f6456b76bc9d78d767f38 100644 --- a/examples/GR/IEMOCAP-Gender-Test/state.json +++ b/examples/GR/IEMOCAP-Gender-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "7a4eb80e3f03a3f4", + "_fingerprint": "339f506943f7e884", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/GR/VoxCeleb-Gender-Test/data-00000-of-00001.arrow b/examples/GR/VoxCeleb-Gender-Test/data-00000-of-00001.arrow index c522d812d46f2f810770b551d21077850da7c6a2..cd2247677be4407d4033f79be586c6763f82a2fa 100644 --- a/examples/GR/VoxCeleb-Gender-Test/data-00000-of-00001.arrow +++ b/examples/GR/VoxCeleb-Gender-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7b5fb1ac8505719dd9952b830c5d064d837a406bf932641a9bdc5de448d9c6c -size 609480 +oid sha256:850b8dff1601e9d93b3c6f7d02d35b0865f26da47d953dfb912cc2d62eca4d55 +size 517400 diff --git a/examples/GR/VoxCeleb-Gender-Test/dataset_info.json b/examples/GR/VoxCeleb-Gender-Test/dataset_info.json index 913e85b36737f9004f81286043e7493d6f61b737..91b217e4844573d2e3a2d0072b2276b37723047c 100644 --- a/examples/GR/VoxCeleb-Gender-Test/dataset_info.json +++ b/examples/GR/VoxCeleb-Gender-Test/dataset_info.json @@ -108,6 +108,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_0.wav b/examples/GR/VoxCeleb-Gender-Test/sample_0.wav index 2f6e333859e59c14c99e08c0ed07ec34da06b7cc..1108051bf59741d780dd1895721cef626655ad4d 100644 Binary files a/examples/GR/VoxCeleb-Gender-Test/sample_0.wav and b/examples/GR/VoxCeleb-Gender-Test/sample_0.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_1.wav b/examples/GR/VoxCeleb-Gender-Test/sample_1.wav index cdc77dfd8a54374fbf21b32128129fde94d44c2c..170de49b786e11c8d6f9ea105f30cef9eceb5862 100644 Binary files a/examples/GR/VoxCeleb-Gender-Test/sample_1.wav and b/examples/GR/VoxCeleb-Gender-Test/sample_1.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_2.wav b/examples/GR/VoxCeleb-Gender-Test/sample_2.wav index 8b3634c13fc5cefe14475fc673f9458aa7c79815..b520f722b1880dc2c15fb2090a174016982e9eff 100644 Binary files a/examples/GR/VoxCeleb-Gender-Test/sample_2.wav and b/examples/GR/VoxCeleb-Gender-Test/sample_2.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/state.json b/examples/GR/VoxCeleb-Gender-Test/state.json index d0591ec49e94a4c129e72d00c70af869214dafd8..59877853a608c9f52a3b29cf3f9db4524b1addce 100644 --- a/examples/GR/VoxCeleb-Gender-Test/state.json +++ b/examples/GR/VoxCeleb-Gender-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "29964e6c779e5e22", + "_fingerprint": "d16ef5cdce6dd4c2", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/SI/ALPACA-Audio-Test/data-00000-of-00001.arrow b/examples/SI/ALPACA-Audio-Test/data-00000-of-00001.arrow index 0e9f779ec899f578e1d5291ff7b261efe4f4d32c..1accfadab38a5c4af77efd77d5ff5336afc76126 100644 --- a/examples/SI/ALPACA-Audio-Test/data-00000-of-00001.arrow +++ b/examples/SI/ALPACA-Audio-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ba310d4adba762052395b3be53afa08451034a8845e8f9f244fa92c84f7358c6 -size 540072 +oid sha256:0ed7346f8dc284a9900db47ddaaae252e0145aa23dfa3ab5336fbf7c9f2d8346 +size 400680 diff --git a/examples/SI/ALPACA-Audio-Test/dataset_info.json b/examples/SI/ALPACA-Audio-Test/dataset_info.json index 882789dac9ac95ae77467581f55b4f296746e7a9..408aaad5f7e22888fd0fa1ac9c64f759cf6d77bd 100644 --- a/examples/SI/ALPACA-Audio-Test/dataset_info.json +++ b/examples/SI/ALPACA-Audio-Test/dataset_info.json @@ -104,6 +104,28 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/SI/ALPACA-Audio-Test/sample_0.wav b/examples/SI/ALPACA-Audio-Test/sample_0.wav index 70248c9fd2ff3b5792099536311d05bed53fba47..39211d1c195a9613197680da766e2f46e1b7deb5 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_0.wav and b/examples/SI/ALPACA-Audio-Test/sample_0.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/sample_1.wav b/examples/SI/ALPACA-Audio-Test/sample_1.wav index 0fa85b0e999023c1118dd960236ae84689438330..e22fc1b4899053c55b2c93f61a17ae04290603dd 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_1.wav and b/examples/SI/ALPACA-Audio-Test/sample_1.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/sample_2.wav b/examples/SI/ALPACA-Audio-Test/sample_2.wav index b5f187db3678a198b23df8b683f8492ae5ff60ac..f61f6006c97c647388826d1ecc9dde7f2e8a8c2f 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_2.wav and b/examples/SI/ALPACA-Audio-Test/sample_2.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/state.json b/examples/SI/ALPACA-Audio-Test/state.json index 6ba2848fa1bbfd0cf127c67ce6315770b72d590d..3bd9c038b001c84bbd46e26df7c121a7ea8e1315 100644 --- a/examples/SI/ALPACA-Audio-Test/state.json +++ b/examples/SI/ALPACA-Audio-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "f46fe3d489641513", + "_fingerprint": "41ebac7aa5808e92", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/SI/OpenHermes-Audio-Test/data-00000-of-00001.arrow b/examples/SI/OpenHermes-Audio-Test/data-00000-of-00001.arrow index f78a87653eaed972a87688964c3465cafe9d1cdd..dfcf87a518efbd9ab701363db34a83336cce8922 100644 --- a/examples/SI/OpenHermes-Audio-Test/data-00000-of-00001.arrow +++ b/examples/SI/OpenHermes-Audio-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:13ec7bd9b5a3ee4bd5a05803c327343b6e461e8ec08413484fd77c43cd6b26bc -size 435240 +oid sha256:cf78046f1beb5c172249e9692bbec2e4ce37ecfd96d6e7984aa2f8516316b061 +size 358552 diff --git a/examples/SI/OpenHermes-Audio-Test/dataset_info.json b/examples/SI/OpenHermes-Audio-Test/dataset_info.json index ac515de4ff190d0b7bd0ce417dfbe2b34d6c0bb8..c047f4a40521a5ca6dd80262091713a20934eee1 100644 --- a/examples/SI/OpenHermes-Audio-Test/dataset_info.json +++ b/examples/SI/OpenHermes-Audio-Test/dataset_info.json @@ -116,6 +116,28 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/SI/OpenHermes-Audio-Test/sample_0.wav b/examples/SI/OpenHermes-Audio-Test/sample_0.wav index add65880600b56384cc5350e64c30be65d0ec19d..fc6251969fc752f60c52056c83739e8b206578ea 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_0.wav and b/examples/SI/OpenHermes-Audio-Test/sample_0.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/sample_1.wav b/examples/SI/OpenHermes-Audio-Test/sample_1.wav index 8fa3cc5ffd5e8304b28e2c00f15edfda78c293b2..c64f49698924ca6c035902b254cabfd6015cf6f7 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_1.wav and b/examples/SI/OpenHermes-Audio-Test/sample_1.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/sample_2.wav b/examples/SI/OpenHermes-Audio-Test/sample_2.wav index 47dc6714c50f2650cb528c81edf0f18a8f3148e9..8086c0e0a6b16c63cc8fa165089a3cc45e65e6e3 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_2.wav and b/examples/SI/OpenHermes-Audio-Test/sample_2.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/state.json b/examples/SI/OpenHermes-Audio-Test/state.json index 9a19e3c159f16d99ee14394ad0aab31b7594a7eb..fe7dccc553f7fd6116624ea9dd30fc50e026c50a 100644 --- a/examples/SI/OpenHermes-Audio-Test/state.json +++ b/examples/SI/OpenHermes-Audio-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "bec0fd435c621121", + "_fingerprint": "ec1d0f482d452867", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/data-00000-of-00001.arrow b/examples/SQA/CN-College-Listen-MCQ-Test/data-00000-of-00001.arrow index 5b514d7eea4a68baf4f63ab12cce2774d09fdc8e..fe2a76df0110f305cac0f247824a2f80fe1b9921 100644 --- a/examples/SQA/CN-College-Listen-MCQ-Test/data-00000-of-00001.arrow +++ b/examples/SQA/CN-College-Listen-MCQ-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e3578b2ed58ee0fa7527ef31e35664a28c27505b0f9ff79754680b1d7330c70 -size 3518760 +oid sha256:105fa5bf62671eaceea2e79ecb95d0d2f911bb0b6e034d7fbb94c2e6af0f4fc7 +size 1315864 diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json b/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json index 623b82f8d7d18ff8870b56c298783f1b5f32dc57..4771b4afa178372d926d8585373b25efe7c24093 100644 --- a/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json +++ b/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json @@ -100,6 +100,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav index cf00703fe4fb0275a04b27b4ab0abf44a8e39cae..78afc269eab7bbaedde6c651ba9b0a80117893e6 100644 --- a/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav +++ b/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a49fd4f0b22817155860bcd528696eac0ad50bc1c9b239ae328e46936b53d1e -size 2115952 +oid sha256:8b8dff3a6dfd3f7841ad4ab275fabc97879ebda7530bdfb07c335d3f3b785efb +size 369138 diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav index 8aea2bff27874d78fbee581ad8849d0ab0ac9fc8..2950fb2efc80385a38eafaf8e5323e235dc6ef5a 100644 Binary files a/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav index 750f1b8414060167651ac33128408b510b1545ca..8e38a92819daf71488f83afff75446fafa16653f 100644 Binary files a/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/state.json b/examples/SQA/CN-College-Listen-MCQ-Test/state.json index 1afd2605e58a9e5304f9f324f51af5e7d535d636..209d66d20de4ced39cf612551d829563dd12715c 100644 --- a/examples/SQA/CN-College-Listen-MCQ-Test/state.json +++ b/examples/SQA/CN-College-Listen-MCQ-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "b4fb19374756e22d", + "_fingerprint": "d6665c93899c985b", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/data-00000-of-00001.arrow b/examples/SQA/DREAM-TTS-MCQ-Test/data-00000-of-00001.arrow index 236abb453583027bafdc652c39704a6870afaec8..e56610034a86dde6caecb02baf61a5c8e9679099 100644 --- a/examples/SQA/DREAM-TTS-MCQ-Test/data-00000-of-00001.arrow +++ b/examples/SQA/DREAM-TTS-MCQ-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:683f5edfae8193f93c22fc1bf2514edee75cc54f565b0e997d4addbfc3376a4d -size 1272504 +oid sha256:bf928d4caee9289ce3f57f0c15aa654a86ffb341a5e95c56182fb4a7f88cf330 +size 1982432 diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json b/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json index b12863f3596549cd265ea212a5964a91c271a7cf..1e091c2cd9b8341f9d611e06e1ae03d1ce1f1524 100644 --- a/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json +++ b/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json @@ -114,6 +114,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/sample_0.wav b/examples/SQA/DREAM-TTS-MCQ-Test/sample_0.wav index a7133d2c60825df4c133cf3578b84a0252c4a091..1b02057e2dcce381d2c274bff02eefc3641b5277 100644 --- a/examples/SQA/DREAM-TTS-MCQ-Test/sample_0.wav +++ b/examples/SQA/DREAM-TTS-MCQ-Test/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:140e86d457811ef438fbbb13e457224fcd9f98a45451965b7183c335042b2003 -size 220930 +oid sha256:922ec67bb0b3e8afab2222a1060101cd7103700863127a77d9f67aa09d9ae162 +size 403266 diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav b/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav index 15f7e2c92aa5c8e199277474dfbb81d27c9ef002..8ffc084d780c4f8ed4b5401dd1c373a29fa21834 100644 Binary files a/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav and b/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav differ diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/sample_2.wav b/examples/SQA/DREAM-TTS-MCQ-Test/sample_2.wav index c30502c27386e06b63b6cf6e6297088178a63535..0c392067385b635cd401aca11b0ab1c40206dcaf 100644 --- a/examples/SQA/DREAM-TTS-MCQ-Test/sample_2.wav +++ b/examples/SQA/DREAM-TTS-MCQ-Test/sample_2.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5a339d7324c19ba4228bb9f1de7b947e52cf4591b856a48f08e2c0d2d54a79cd -size 142424 +oid sha256:a9f56e4d56f5384e968cd7be064056fb3039f6c5fea2cc9cb24155b1914385ab +size 225708 diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/state.json b/examples/SQA/DREAM-TTS-MCQ-Test/state.json index 9429aff4cfdd4360de95ee6eaf6f11a40d6ceefa..93713a14d71ecaaf7a461735519e8172057aaa74 100644 --- a/examples/SQA/DREAM-TTS-MCQ-Test/state.json +++ b/examples/SQA/DREAM-TTS-MCQ-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "4ae1a389c9652fd2", + "_fingerprint": "aa689dcb170b0cb8", "_format_columns": [ "answer", "context", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "qwen_audio_chat" ], diff --git a/examples/SQA/Public-SG-Speech-QA-Test/data-00000-of-00001.arrow b/examples/SQA/Public-SG-Speech-QA-Test/data-00000-of-00001.arrow index d1c7960aac65d29f4df237c5245a2aa6c89d0055..eaf658cce0723c6d74acdf89e74dab71c772c87d 100644 --- a/examples/SQA/Public-SG-Speech-QA-Test/data-00000-of-00001.arrow +++ b/examples/SQA/Public-SG-Speech-QA-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:41ebee1aeb1c9a28136ca99e86afcdf9f583aba6c003a0a99d94477419dc9f6f -size 3555008 +oid sha256:0f873a0cc4f7e5c1cfb6d39f055b240d3db2dc6de0e16d5132327fd9c9fb684f +size 4337968 diff --git a/examples/SQA/Public-SG-Speech-QA-Test/dataset_info.json b/examples/SQA/Public-SG-Speech-QA-Test/dataset_info.json index 5b865ccbdc90f460864fa658307b1fd6e12b5ac6..203817254af5fa6bd621c9a43dfaaf6d61ad4922 100644 --- a/examples/SQA/Public-SG-Speech-QA-Test/dataset_info.json +++ b/examples/SQA/Public-SG-Speech-QA-Test/dataset_info.json @@ -108,6 +108,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/SQA/Public-SG-Speech-QA-Test/sample_0.wav b/examples/SQA/Public-SG-Speech-QA-Test/sample_0.wav index 298380997737d4c67d07f69122cc44a28437254d..a7f47bc0a68166d38f958a1cc884dcd2342c98f6 100644 --- a/examples/SQA/Public-SG-Speech-QA-Test/sample_0.wav +++ b/examples/SQA/Public-SG-Speech-QA-Test/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b62a055ccc0592fb34892fc14c743745bd3ee5ff04475f8231ca064cf5ab292c -size 1599216 +oid sha256:9d1d958bf731fbddd5078588014e3bb8c7cd1fe8744267aef62c6b5220cb1b93 +size 1120356 diff --git a/examples/SQA/Public-SG-Speech-QA-Test/sample_1.wav b/examples/SQA/Public-SG-Speech-QA-Test/sample_1.wav index 90ec1058956b790eaba74d366ad8aeb3ebffcfeb..e03c1449caeea8580f557d651d28eb079ded9884 100644 --- a/examples/SQA/Public-SG-Speech-QA-Test/sample_1.wav +++ b/examples/SQA/Public-SG-Speech-QA-Test/sample_1.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f125d54b780e025d73756918a11f7bbe9badf35feef0a220bdde91c275d154e2 -size 950336 +oid sha256:fca54dcb4dcd6a5285211d2f772b24543612d03bcf199e783140d1439f49b2f1 +size 1515128 diff --git a/examples/SQA/Public-SG-Speech-QA-Test/sample_2.wav b/examples/SQA/Public-SG-Speech-QA-Test/sample_2.wav index 35e65e20490020e270e104069f6937c3909dc0a3..d99371b71f61db6a6cf7ab4f134eee04bd6b979f 100644 --- a/examples/SQA/Public-SG-Speech-QA-Test/sample_2.wav +++ b/examples/SQA/Public-SG-Speech-QA-Test/sample_2.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e682e04d2afa9162c4b0545f05177e076e5a6fc6a09314ced5097c49bf55082e -size 984132 +oid sha256:318f83b136dc48cc2094bbf974094dbde6c3872a5f0c0bce99b206644bd9822d +size 1681722 diff --git a/examples/SQA/Public-SG-Speech-QA-Test/state.json b/examples/SQA/Public-SG-Speech-QA-Test/state.json index 0f2668023e6a567ada41c9ea082f99773f2e8f6e..3a16cade1f2154b599ff0ee007e06680bcb75c72 100644 --- a/examples/SQA/Public-SG-Speech-QA-Test/state.json +++ b/examples/SQA/Public-SG-Speech-QA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "ede505a635b66631", + "_fingerprint": "caccddb9c34b3f21", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "qwen_audio_chat" ], diff --git a/examples/SQA/SLUE-P2-SQA5-Test/data-00000-of-00001.arrow b/examples/SQA/SLUE-P2-SQA5-Test/data-00000-of-00001.arrow index fdc22e998f5d035a4a21db496fc26bb1eb9e7b47..0302dfcdbbcc17118c9a209ffb3c8018c110b852 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/data-00000-of-00001.arrow +++ b/examples/SQA/SLUE-P2-SQA5-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:91fad195c759891c783f8448a7cb42362b53f091aacad15195a7d262da356464 -size 5469256 +oid sha256:8f4a692d5fcaee04885bcd8277fda55578e6135813ed8ae2cdf1fade8757109e +size 5222928 diff --git a/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json b/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json index 6c681f17c89b019564bbd2d0509865a05ce4db3a..e1666dfd561139c081bd34bc111a1ef188c97c19 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json +++ b/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json @@ -137,6 +137,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/SQA/SLUE-P2-SQA5-Test/sample_0.wav b/examples/SQA/SLUE-P2-SQA5-Test/sample_0.wav index 7236fa9aac8e0e10818025ab656ddc4ad2bbf812..38fa05167e518b91abbbe3892e2ab3488c3fc66d 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/sample_0.wav +++ b/examples/SQA/SLUE-P2-SQA5-Test/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:398b25e597d18e0e027bb7fda9cc025cff93b716bc683d88a553d34c338285eb +oid sha256:2ea083853548cf7fb7e9659ee3e29f290c22eeef99b5316251f7e80023cbe882 size 1280044 diff --git a/examples/SQA/SLUE-P2-SQA5-Test/sample_1.wav b/examples/SQA/SLUE-P2-SQA5-Test/sample_1.wav index 7a80d4c3399ae9256db0ccded497c8fe90aecb5c..ee433bdb91f66bd32b140fb05fd9f6a0d51be495 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/sample_1.wav +++ b/examples/SQA/SLUE-P2-SQA5-Test/sample_1.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:36769a3e93db8fc6ef00b05552a3c36b08c35c578f93d142a60415d2909f9e7e +oid sha256:9f67f52bd5eb7a6402b642dc3edc201ef924651fb52dc7fe2d4b2197bbe6bbd6 size 1280044 diff --git a/examples/SQA/SLUE-P2-SQA5-Test/sample_2.wav b/examples/SQA/SLUE-P2-SQA5-Test/sample_2.wav index 92f61a3d6c5de05f4b6070eb892bf22139beeff5..559d54546827e6e9117535bbc50f915adfa61b5a 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/sample_2.wav +++ b/examples/SQA/SLUE-P2-SQA5-Test/sample_2.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:43e31031a1e0e8cde85470353830b7c4513d4d9f9e5ca394127b80b4938d970a +oid sha256:85c1b5d9896f4db27484d593385d62cd3b3887e1d590b87373d2d8fd97f7da05 size 1280044 diff --git a/examples/SQA/SLUE-P2-SQA5-Test/state.json b/examples/SQA/SLUE-P2-SQA5-Test/state.json index 64fcf1074f14e6cc130ef25406cd0718f61798a7..9269d6748a4b83722969b0997a02f1a9ebf3807c 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/state.json +++ b/examples/SQA/SLUE-P2-SQA5-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "400b504ce3034854", + "_fingerprint": "e3c5a96704e595fe", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/SQA/Spoken-Squad-Test/data-00000-of-00001.arrow b/examples/SQA/Spoken-Squad-Test/data-00000-of-00001.arrow index 01c3308e636c1ca7b513cc8e863074d83085e4d3..e30bb1b71a5945f5dd699df02203b7fb96d2bfb1 100644 --- a/examples/SQA/Spoken-Squad-Test/data-00000-of-00001.arrow +++ b/examples/SQA/Spoken-Squad-Test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8c870f847afa404da100216dd6192919e7e00218c046beecf577d752e958e14 -size 6854040 +oid sha256:17b3ee4f250469b7e6dd37590ccbb6b452f7f19b04bcaf5b062d2651aa6e7aec +size 7216392 diff --git a/examples/SQA/Spoken-Squad-Test/dataset_info.json b/examples/SQA/Spoken-Squad-Test/dataset_info.json index 5b50e517bf0760f04194c6b7c720d733151674aa..59461f89e6a39255aabddc04d03a28a8b503d28f 100644 --- a/examples/SQA/Spoken-Squad-Test/dataset_info.json +++ b/examples/SQA/Spoken-Squad-Test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/SQA/Spoken-Squad-Test/sample_0.wav b/examples/SQA/Spoken-Squad-Test/sample_0.wav index 2b928fafe992bf5c3ecf4b03389d186cff11421b..91a45dc9ee12ffac6bfb8838dc863b04567ccb07 100644 --- a/examples/SQA/Spoken-Squad-Test/sample_0.wav +++ b/examples/SQA/Spoken-Squad-Test/sample_0.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:304b38f43745d431d607788b839a14409b8e8e2032a093f408302d79287efc3b -size 2143532 +oid sha256:deaac19a31e7bef8a82d98e4b4199d7424ebd7af772fc2bb1fd239af8978b818 +size 1389356 diff --git a/examples/SQA/Spoken-Squad-Test/sample_1.wav b/examples/SQA/Spoken-Squad-Test/sample_1.wav index 10820a48e3ffd1a5ecb2198457b3891122d161a1..51bb3dfb86abbc4f666bff1a00a0c57afdfdcf19 100644 --- a/examples/SQA/Spoken-Squad-Test/sample_1.wav +++ b/examples/SQA/Spoken-Squad-Test/sample_1.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:97c29b943c6244c1a6dee8bbd871055408ed9735349c3fd57f2d285cd3628041 -size 1789484 +oid sha256:23bed7b72f1631b75672d718af122340d2ed3c38ce86534f772df5c02cb8d91d +size 2665772 diff --git a/examples/SQA/Spoken-Squad-Test/sample_2.wav b/examples/SQA/Spoken-Squad-Test/sample_2.wav index 82953be741c78b78f28232b6b13d2392ff533c2e..1680d55210201e5299940d1b035fde5d75c75e55 100644 --- a/examples/SQA/Spoken-Squad-Test/sample_2.wav +++ b/examples/SQA/Spoken-Squad-Test/sample_2.wav @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:26c3bc8688fbd99e8dd6eff774a7994ae34ec6fddfb1242f3bbae500a12b7746 -size 2903852 +oid sha256:c244d0b3cb482c34b19c4fc4d4d15bcb8d49d9b0bf210165d498f745859e2357 +size 3141932 diff --git a/examples/SQA/Spoken-Squad-Test/state.json b/examples/SQA/Spoken-Squad-Test/state.json index bad03ce21b0c5b24ac005094f5a25236804a54a7..3657f1896e75f83413b7bb4253530da9c1b1c624 100644 --- a/examples/SQA/Spoken-Squad-Test/state.json +++ b/examples/SQA/Spoken-Squad-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "416952584cf805a4", + "_fingerprint": "8482b9acafa077ac", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ST/Covost2-EN-ID-test/data-00000-of-00001.arrow b/examples/ST/Covost2-EN-ID-test/data-00000-of-00001.arrow index 35d28390925a96fd109f6c96b0109cbe4c18a228..5dd8e3e20f2ad085150346711011cd79047ff75f 100644 --- a/examples/ST/Covost2-EN-ID-test/data-00000-of-00001.arrow +++ b/examples/ST/Covost2-EN-ID-test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e114114e362e4b40cbffbfe8f9857f64de0cac09471062c6f6c4f6e73b235de7 -size 549296 +oid sha256:a0390e30c7fd6f5c18b37f622f8784052c6b23149f141b4540cb59f6cd7b9c69 +size 523560 diff --git a/examples/ST/Covost2-EN-ID-test/dataset_info.json b/examples/ST/Covost2-EN-ID-test/dataset_info.json index 75447219133e63a2e07347f4f15add29dc2f358f..b25db6989f0e56b64fa01d76c2fb75832a886b5e 100644 --- a/examples/ST/Covost2-EN-ID-test/dataset_info.json +++ b/examples/ST/Covost2-EN-ID-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "mowe_audio": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-EN-ID-test/sample_0.wav b/examples/ST/Covost2-EN-ID-test/sample_0.wav index 5e70047fc638745caac9c90c2539cc9f18168f8a..25c2b5a12d15e235eb9aed64a3c872d32edba496 100644 Binary files a/examples/ST/Covost2-EN-ID-test/sample_0.wav and b/examples/ST/Covost2-EN-ID-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/sample_1.wav b/examples/ST/Covost2-EN-ID-test/sample_1.wav index 832716522492f3cbeb066ed58c45eab2e9036a2f..2b89ba1a9ffe0b5d8c0659c9410edc725f6b2dd7 100644 Binary files a/examples/ST/Covost2-EN-ID-test/sample_1.wav and b/examples/ST/Covost2-EN-ID-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/sample_2.wav b/examples/ST/Covost2-EN-ID-test/sample_2.wav index 2ccb333e9339d1fda08f7c84fe4fe2cd0b22508c..f3431117fb2b3a42a63c3138a0605ca0b0046ca5 100644 Binary files a/examples/ST/Covost2-EN-ID-test/sample_2.wav and b/examples/ST/Covost2-EN-ID-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/state.json b/examples/ST/Covost2-EN-ID-test/state.json index d460d81ba05ec305339ae4a50c65c2654e706950..9b4dc3f7b4960563f749f82c2c02bcc2666ea115 100644 --- a/examples/ST/Covost2-EN-ID-test/state.json +++ b/examples/ST/Covost2-EN-ID-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "17a5c97a84a7f33c", + "_fingerprint": "45aac62476189dab", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "mowe_audio", "qwen_audio_chat" ], diff --git a/examples/ST/Covost2-EN-TA-test/data-00000-of-00001.arrow b/examples/ST/Covost2-EN-TA-test/data-00000-of-00001.arrow index 50796f046accb5692f9772e9beeacf58537e9a1d..822e8ea669c238d7db916fece41251b84f1a1471 100644 --- a/examples/ST/Covost2-EN-TA-test/data-00000-of-00001.arrow +++ b/examples/ST/Covost2-EN-TA-test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a1667eba0de4088fbe20e9f6c9beaacf5beb556a1d817c92af178757da3bc78 -size 561528 +oid sha256:1dc2cff5380f28f45b6188f0eaf096a15207c693026a5854c04dc8d57887faba +size 576424 diff --git a/examples/ST/Covost2-EN-TA-test/dataset_info.json b/examples/ST/Covost2-EN-TA-test/dataset_info.json index 75447219133e63a2e07347f4f15add29dc2f358f..b25db6989f0e56b64fa01d76c2fb75832a886b5e 100644 --- a/examples/ST/Covost2-EN-TA-test/dataset_info.json +++ b/examples/ST/Covost2-EN-TA-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "mowe_audio": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-EN-TA-test/sample_0.wav b/examples/ST/Covost2-EN-TA-test/sample_0.wav index ad0f73bf6dbfe42e17635b063161d6154cfcf28a..f5cc0d779f3accaa6dff1dcc3f76c6c9b7370446 100644 Binary files a/examples/ST/Covost2-EN-TA-test/sample_0.wav and b/examples/ST/Covost2-EN-TA-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/sample_1.wav b/examples/ST/Covost2-EN-TA-test/sample_1.wav index e5d11995c4bd1e3484f0c15d828c678300d899b2..3aff66d4d7e2772192d6b37dc0e0142fc57adb0f 100644 Binary files a/examples/ST/Covost2-EN-TA-test/sample_1.wav and b/examples/ST/Covost2-EN-TA-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/sample_2.wav b/examples/ST/Covost2-EN-TA-test/sample_2.wav index 8910c6f24aebdc2aab0e2517f304448129282655..188a2b20fa8c2612762b09e774ace9f070b8aff0 100644 Binary files a/examples/ST/Covost2-EN-TA-test/sample_2.wav and b/examples/ST/Covost2-EN-TA-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/state.json b/examples/ST/Covost2-EN-TA-test/state.json index de82636ce42828a45d6c8c4ca1edde9874d87cc3..5e91574d7cd30ff6ea2a8f7fb95ce8d2f0890a83 100644 --- a/examples/ST/Covost2-EN-TA-test/state.json +++ b/examples/ST/Covost2-EN-TA-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "e9d273226522711f", + "_fingerprint": "e30a809aaa184c6f", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "mowe_audio", "qwen_audio_chat" ], diff --git a/examples/ST/Covost2-EN-ZH-test/data-00000-of-00001.arrow b/examples/ST/Covost2-EN-ZH-test/data-00000-of-00001.arrow index 02b18146d4be540926d80ef57b9bd914ef51aec2..b24a1fc60c363fa42e0df37710f1165c1908d10e 100644 --- a/examples/ST/Covost2-EN-ZH-test/data-00000-of-00001.arrow +++ b/examples/ST/Covost2-EN-ZH-test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f4e2a6c95fe3b8adbc4c2f55bd53fdb56b064d436c49ee20ac2e4333114ceac7 -size 600520 +oid sha256:df30bbb05d12ef63de76b4596fed8637846c580e01e8d1d7a8ebda41bac4d37f +size 398616 diff --git a/examples/ST/Covost2-EN-ZH-test/dataset_info.json b/examples/ST/Covost2-EN-ZH-test/dataset_info.json index 75447219133e63a2e07347f4f15add29dc2f358f..b25db6989f0e56b64fa01d76c2fb75832a886b5e 100644 --- a/examples/ST/Covost2-EN-ZH-test/dataset_info.json +++ b/examples/ST/Covost2-EN-ZH-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "mowe_audio": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-EN-ZH-test/sample_0.wav b/examples/ST/Covost2-EN-ZH-test/sample_0.wav index f428967146cecccb88b318b8388e3b897cd14fca..ebe2b681a9e476cf91aa9f2a2541a57988454221 100644 Binary files a/examples/ST/Covost2-EN-ZH-test/sample_0.wav and b/examples/ST/Covost2-EN-ZH-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/sample_1.wav b/examples/ST/Covost2-EN-ZH-test/sample_1.wav index e91b4565eb72ab11f760c9b276daf4f6998f9e5f..dfac27d0e29080bd490e51ed86a09a4804eb5ff1 100644 Binary files a/examples/ST/Covost2-EN-ZH-test/sample_1.wav and b/examples/ST/Covost2-EN-ZH-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/sample_2.wav b/examples/ST/Covost2-EN-ZH-test/sample_2.wav index 38f60afc485334971149ebeb3985573093dd9a96..1b3be50610325f271c28947bd1fa44c6586d6fe2 100644 Binary files a/examples/ST/Covost2-EN-ZH-test/sample_2.wav and b/examples/ST/Covost2-EN-ZH-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/state.json b/examples/ST/Covost2-EN-ZH-test/state.json index 7dafba85d0dbd927e3c9aae50f0d1a3cf3e2c71c..531e57e55ef2ebf6d0e2336fb125c4db42b0f9c8 100644 --- a/examples/ST/Covost2-EN-ZH-test/state.json +++ b/examples/ST/Covost2-EN-ZH-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "86243bad639f0cb6", + "_fingerprint": "8089a8574e5ffd7a", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "mowe_audio", "qwen_audio_chat" ], diff --git a/examples/ST/Covost2-ID-EN-test/data-00000-of-00001.arrow b/examples/ST/Covost2-ID-EN-test/data-00000-of-00001.arrow index 037f35bf9787de62b458d9a89969d0abcd8b02eb..370186e346c6087b6ed4e54ebf63a026cabd6ef2 100644 --- a/examples/ST/Covost2-ID-EN-test/data-00000-of-00001.arrow +++ b/examples/ST/Covost2-ID-EN-test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1785e6d316adfd52a99ce27e0b14fd68fa410a93f5082fab34e8f35a89563d8c -size 529872 +oid sha256:d80d8028a29587ce439219ba6729b6fdfee1e29154221045944f295541f1a146 +size 568024 diff --git a/examples/ST/Covost2-ID-EN-test/dataset_info.json b/examples/ST/Covost2-ID-EN-test/dataset_info.json index 75447219133e63a2e07347f4f15add29dc2f358f..b25db6989f0e56b64fa01d76c2fb75832a886b5e 100644 --- a/examples/ST/Covost2-ID-EN-test/dataset_info.json +++ b/examples/ST/Covost2-ID-EN-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "mowe_audio": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-ID-EN-test/sample_0.wav b/examples/ST/Covost2-ID-EN-test/sample_0.wav index 60cf3a26a30e046d87c72a4e4d4f15cf54732039..953755836980bd4b6cd2968d39e255ed0f199bfe 100644 Binary files a/examples/ST/Covost2-ID-EN-test/sample_0.wav and b/examples/ST/Covost2-ID-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/sample_1.wav b/examples/ST/Covost2-ID-EN-test/sample_1.wav index 0b37d62b0b132a59e5ce6d2c4551c1d701143efb..5302d7d0ff51cdce307b5fbb5ec7c3cc61bd8ee8 100644 Binary files a/examples/ST/Covost2-ID-EN-test/sample_1.wav and b/examples/ST/Covost2-ID-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/sample_2.wav b/examples/ST/Covost2-ID-EN-test/sample_2.wav index 5eda0ef756def2b097ee2bef2a92b8b9e99ff5c3..d01013320d93fa16a15ee38383b01dce205c6989 100644 Binary files a/examples/ST/Covost2-ID-EN-test/sample_2.wav and b/examples/ST/Covost2-ID-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/state.json b/examples/ST/Covost2-ID-EN-test/state.json index 592b67b79c5171bdccfcf07d28855f756e7f4f23..ccf75843fd30ff7f8653f7a9820c53ddece757b4 100644 --- a/examples/ST/Covost2-ID-EN-test/state.json +++ b/examples/ST/Covost2-ID-EN-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "69b492f6dd79179e", + "_fingerprint": "86eef937bbaf81f4", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "mowe_audio", "qwen_audio_chat" ], diff --git a/examples/ST/Covost2-TA-EN-test/data-00000-of-00001.arrow b/examples/ST/Covost2-TA-EN-test/data-00000-of-00001.arrow index d4966d98c3fec664dfe65f4a2094d48ef716ac78..229b5f65f3ff5ac118f64f82c1117a42e18dabf1 100644 --- a/examples/ST/Covost2-TA-EN-test/data-00000-of-00001.arrow +++ b/examples/ST/Covost2-TA-EN-test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f3b8c1a000b5085ab5af45333fd8c4fc37508c74595f1dba8668f7da20b4d21 -size 477584 +oid sha256:8bb4fe085f89698275aa7972d0023750e3efce2c0adef0af7a0b8e1a2351c985 +size 393168 diff --git a/examples/ST/Covost2-TA-EN-test/dataset_info.json b/examples/ST/Covost2-TA-EN-test/dataset_info.json index 0a34b7bc1ac66816e090968b4de959503e2c4168..63cdba3ce5662d2c70078e2343b090c3f42aa100 100644 --- a/examples/ST/Covost2-TA-EN-test/dataset_info.json +++ b/examples/ST/Covost2-TA-EN-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "mowe_audio": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-TA-EN-test/sample_0.wav b/examples/ST/Covost2-TA-EN-test/sample_0.wav index 642a3386437533748a4df9cbf7c45ab6f5bd0b6f..a930e9d6ecdf71d1a5c482512d317fc0d774d231 100644 Binary files a/examples/ST/Covost2-TA-EN-test/sample_0.wav and b/examples/ST/Covost2-TA-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/sample_1.wav b/examples/ST/Covost2-TA-EN-test/sample_1.wav index a9c003707ecdee7e8918014b2f77a69adbe71d93..20c17bae309b829001b09cf6376675f9210ff751 100644 Binary files a/examples/ST/Covost2-TA-EN-test/sample_1.wav and b/examples/ST/Covost2-TA-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/sample_2.wav b/examples/ST/Covost2-TA-EN-test/sample_2.wav index cd0ce42ab1336cc1591caa5cd56acf6db9afbc29..eaa8c8c0cfa770f7ebc137f70c07bceea40046d9 100644 Binary files a/examples/ST/Covost2-TA-EN-test/sample_2.wav and b/examples/ST/Covost2-TA-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/state.json b/examples/ST/Covost2-TA-EN-test/state.json index c7aef0119e13ade230e9d5c50ca9b66df7193400..1fa35e273c3e8ee0d8bb7c397deadf9509f8b56f 100644 --- a/examples/ST/Covost2-TA-EN-test/state.json +++ b/examples/ST/Covost2-TA-EN-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "6f095ca26fe268ab", + "_fingerprint": "93608e86f8b7524b", "_format_columns": [ "answer", "context", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "mowe_audio", "qwen_audio_chat" ], diff --git a/examples/ST/Covost2-ZH-EN-test/data-00000-of-00001.arrow b/examples/ST/Covost2-ZH-EN-test/data-00000-of-00001.arrow index dc342fcd295e1e1f058737f14d28154c8718cab1..d17e47b203a0bbde458ca1bccdf5027d205aa831 100644 --- a/examples/ST/Covost2-ZH-EN-test/data-00000-of-00001.arrow +++ b/examples/ST/Covost2-ZH-EN-test/data-00000-of-00001.arrow @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:acab5090424e3e32cac285d3a346f48308a64629aa5fba171addc37fbf4f5337 -size 554256 +oid sha256:ddb93d2266e005d13a6a9ba5f3ea7a6ee0192657c55d435fdde3fc27fac715ef +size 623552 diff --git a/examples/ST/Covost2-ZH-EN-test/dataset_info.json b/examples/ST/Covost2-ZH-EN-test/dataset_info.json index 75447219133e63a2e07347f4f15add29dc2f358f..b25db6989f0e56b64fa01d76c2fb75832a886b5e 100644 --- a/examples/ST/Covost2-ZH-EN-test/dataset_info.json +++ b/examples/ST/Covost2-ZH-EN-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "mowe_audio": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-ZH-EN-test/sample_0.wav b/examples/ST/Covost2-ZH-EN-test/sample_0.wav index a0add517f30ee8b82cef1be3aba2d471645bd648..b287e87b0a79fc0c8b8c25cbbe54a201e0201592 100644 Binary files a/examples/ST/Covost2-ZH-EN-test/sample_0.wav and b/examples/ST/Covost2-ZH-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/sample_1.wav b/examples/ST/Covost2-ZH-EN-test/sample_1.wav index 6fad51ce295d62bb0b68826f1d1f3c3f4a2756e9..25cec91737fb23243bb76c92b6077da6d3acc357 100644 Binary files a/examples/ST/Covost2-ZH-EN-test/sample_1.wav and b/examples/ST/Covost2-ZH-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/sample_2.wav b/examples/ST/Covost2-ZH-EN-test/sample_2.wav index e7bebe13c8efed3bb9b8c9b9a46831c8152240b5..78b1a729ea55854f60d3d1c91b40704ed23bd42e 100644 Binary files a/examples/ST/Covost2-ZH-EN-test/sample_2.wav and b/examples/ST/Covost2-ZH-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/state.json b/examples/ST/Covost2-ZH-EN-test/state.json index 4199c3b1fc42c18396a4f54e001d032ad37e6d97..3c449d398d13af1bb2ffddab3dd7f8a8fd373a41 100644 --- a/examples/ST/Covost2-ZH-EN-test/state.json +++ b/examples/ST/Covost2-ZH-EN-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "977bd2807131826b", + "_fingerprint": "98d00264fe4b6901", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "mowe_audio", "qwen_audio_chat" ],