{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "qjubHCEzYtG8" }, "source": [ "### Step 1. Download InfoRE dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "zCBJzJi6BE_o" }, "outputs": [], "source": [ "%%capture\n", "%%bash\n", "mkdir -p /content/data\n", "cd /content/data\n", "wget https://huggingface.co/datasets/ntt123/infore/resolve/main/infore_16k.zip\n", "# unzip -P BroughtToYouByInfoRe 25hours.zip\n", "unzip infore_16k.zip" ] }, { "cell_type": "markdown", "metadata": { "id": "6C47hb9nYzmB" }, "source": [ "### Step 2. Normalize audio clip" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Hp9TK8PbBcQM" }, "outputs": [], "source": [ "%%capture\n", "!sudo apt install -y sox\n", "!pip install soundfile librosa\n", "!pip install onnxruntime==1.11.1" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FW45D8xM9Mcc", "outputId": "8d7ea7a9-ea5a-48ca-88fe-37dd4ed55d9b" }, "outputs": [], "source": [ "!mkdir -p /content/infore_16k\n", "from pathlib import Path\n", "import os\n", "from tqdm.cli import tqdm\n", "\n", "wavs = sorted(Path(\"/content/data/InfoRe\").glob(\"*.wav\"))\n", "for path in tqdm(wavs):\n", " out = Path(\"/content/infore_16k\") / path.name\n", " cmd = f\"sox {path} -c 1 -e signed-integer -b 16 -r 16k --norm=-3 {out}\"\n", " os.system(cmd)" ] }, { "cell_type": "markdown", "metadata": { "id": "kooiBrsQY5sQ" }, "source": [ "### Step 3. Denoise using DNS-Challenge's baseline" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "hsXeNkZ3Xacj" }, "outputs": [], "source": [ "!git clone https://github.com/microsoft/DNS-Challenge\n", "%cd DNS-Challenge/NSNet2-baseline/\n", "!git checkout -f 8b87a33b2892f147b5c7ad39ea978453730db269\n", "!python run_nsnet2.py -i /content/infore_16k/ -o /content/infore_16k_denoised -m ./nsnet2-20ms-baseline.onnx" ] }, { "cell_type": "markdown", "metadata": { "id": "T5JtZwKgZI4r" }, "source": [ "### Step 4. Zip the denoised dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "eeFggV0uYop_" }, "outputs": [], "source": [ "%cd /content\n", "!cp /content/data/InfoRe/*.txt ./infore_16k_denoised\n", "!cd ./infore_16k_denoised; zip -r ../infore_16k_denoised.zip ." ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "prepare_infore_dataset.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }