{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Align text and audio using Montreal Forced Aligner (MFA)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "IPkicKwU8IWj" }, "outputs": [], "source": [ "%%capture\n", "!apt update -y\n", "!pip install -U pip" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "G6Z-aDd08hfk" }, "outputs": [], "source": [ "%%capture\n", "%%bash\n", "data_root=\"./infore_16k_denoised\"\n", "mkdir -p $data_root\n", "cd $data_root\n", "wget https://huggingface.co/datasets/ntt123/infore/resolve/main/infore_16k_denoised.zip -O infore.zip\n", "unzip infore.zip " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "VWwgAePDXy4m" }, "outputs": [], "source": [ "from pathlib import Path\n", "\n", "txt_files = sorted(Path(\"./infore_16k_denoised\").glob(\"*.txt\"))\n", "f = open(\"/content/words.txt\", \"w\", encoding=\"utf-8\")\n", "for txt_file in txt_files:\n", " wav_file = txt_file.with_suffix(\".wav\")\n", " if not wav_file.exists():\n", " continue\n", " line = open(txt_file, \"r\", encoding=\"utf-8\").read()\n", " for word in line.strip().lower().split():\n", " f.write(word)\n", " f.write(\"\\n\")\n", "f.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "FktjNXbDkBLh" }, "outputs": [], "source": [ "black_list = (\n", " []\n", " + [\"q\", \"adn\", \"h\", \"stress\", \"b\", \"k\", \"mark\", \"gas\", \"cs\", \"test\", \"l\", \"hiv\"]\n", " + [\"v\", \"d\", \"c\", \"p\", \"martin\", \"visa\", \"euro\", \"laser\", \"x\", \"real\", \"shop\"]\n", " + [\"studio\", \"kelvin\", \"đt\", \"pop\", \"rock\", \"gara\", \"karaoke\", \"đicr\", \"đigiúp\"]\n", " + [\"khmer\", \"ii\", \"s\", \"tr\", \"xhcn\", \"casino\", \"guitar\", \"sex\", \"oxi\", \"radio\"]\n", " + [\"qúy\", \"asean\", \"hlv\" \"ts\", \"video\", \"virus\", \"usd\", \"robot\", \"ph\", \"album\"]\n", " + [\"s\", \"kg\", \"km\", \"g\", \"tr\", \"đ\", \"ak\", \"d\", \"m\", \"n\"]\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "b3nMwfzK_g0B" }, "outputs": [], "source": [ "ws = open(\"/content/words.txt\").readlines()\n", "f = open(\"/content/lexicon.txt\", \"w\")\n", "for w in sorted(set(ws)):\n", " w = w.strip()\n", "\n", " # this is a hack to match phoneme set in the vietTTS repo\n", " p = list(w)\n", " p = \" \".join(p)\n", " if w in black_list:\n", " continue\n", " else:\n", " f.write(f\"{w}\\t{p}\\n\")\n", "f.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "WuWZKTNRt1eM" }, "outputs": [], "source": [ "%%writefile install_mfa.sh\n", "#!/bin/bash\n", "\n", "## a script to install Montreal Forced Aligner (MFA)\n", "\n", "root_dir=${1:-/tmp/mfa}\n", "mkdir -p $root_dir\n", "cd $root_dir\n", "\n", "# download miniconda3\n", "wget -q --show-progress https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh\n", "bash Miniconda3-latest-Linux-x86_64.sh -b -p $root_dir/miniconda3 -f\n", "\n", "#install MFA\n", "$root_dir/miniconda3/bin/conda create -n aligner -c conda-forge montreal-forced-aligner=2.0.0rc7 -y\n", "\n", "echo -e \"\\n======== DONE ==========\"\n", "echo -e \"\\nTo activate MFA, run: source $root_dir/miniconda3/bin/activate aligner\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "osR7KJCNXJYq" }, "outputs": [], "source": [ "# download and install mfa\n", "INSTALL_DIR = \"/tmp/mfa\" # path to install directory\n", "!bash ./install_mfa.sh {INSTALL_DIR}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "hxbXwJZlXLPz", "outputId": "d3e40ec5-68a7-40ec-d070-137736d7a956" }, "outputs": [], "source": [ "!source {INSTALL_DIR}/miniconda3/bin/activate aligner; \\\n", "mfa train --clean -t ./temp -o ./infore_mfa.zip ./infore_16k_denoised lexicon.txt ./infore_textgrid" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "8Z65_BtXagn1" }, "outputs": [], "source": [ "# copy to train directory\n", "!mkdir -p train_data\n", "!cp ./infore_16k_denoised/*.wav ./train_data\n", "!cp ./infore_textgrid/*.TextGrid ./train_data" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "align-text-audio | InfoRe using MFA v2rc7.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }