diff --git "a/VitsModelSplit/FeaturesCollectionDataset_notebook.ipynb" "b/VitsModelSplit/FeaturesCollectionDataset_notebook.ipynb" new file mode 100644--- /dev/null +++ "b/VitsModelSplit/FeaturesCollectionDataset_notebook.ipynb" @@ -0,0 +1,8871 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "KX0IBmbjeydD", + "outputId": "da61afd9-8a4e-478a-fcbc-4c7d0466ced9" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m12.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ], + "source": [ + "!pip --q install datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "PbGUt9dbnriq" + }, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings(\"ignore\")\n", + "from IPython.display import Audio\n", + "import torch\n", + "import numpy as np\n", + "import pandas as pd\n", + "import soundfile as sf\n", + "import matplotlib.pyplot as plt\n", + "plt.style.use(\"seaborn-whitegrid\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Zl13ArjoGBq9", + "outputId": "eb811f29-0434-4e0e-a043-017b25d42c3d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mounted at /content/drive\n" + ] + } + ], + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wUSLY8BmnrfA", + "outputId": "04ce4306-3c03-4c36-e9c5-4f01ffe9d908" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading cleansada-version-01.zip to /content\n", + " 98% 1.01G/1.03G [00:11<00:00, 109MB/s]\n", + "100% 1.03G/1.03G [00:11<00:00, 96.0MB/s]\n" + ] + } + ], + "source": [ + "def DownloadDataset(username,key):\n", + " import json\n", + " keys={\"username\":username,\"key\":key}\n", + " ! mkdir ~/.kaggle\n", + " json_object = json.dumps(keys, indent=4)\n", + " with open(r\"/root/.kaggle/kaggle.json\", \"w\") as outfile:\n", + " outfile.write(json_object)\n", + " ! chmod 600 ~/.kaggle/kaggle.json\n", + " ! kaggle datasets download --unzip engmahmoodanaam/cleansada-version-01\n", + "\n", + "#.............................................\n", + "\n", + "DownloadDataset( username = \"engmahmoodanaam\",\n", + " key = \"4a457b4fc3516f9d9b913d770ea64884\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "id": "l74W1-6ZnrbX" + }, + "outputs": [], + "source": [ + "def GetDataset(path_csv,path_audio):\n", + " df = pd.read_csv(path_csv)\n", + " audios_data = []\n", + " audios_samplerate = []\n", + " for idx, row in df.iterrows():\n", + " filename = f\"{path_audio}/{row['SegmentID']}.wav\"\n", + " audiodata, samplerate = sf.read(filename)\n", + " audios_data.append(audiodata)\n", + " audios_samplerate.append(samplerate)\n", + "\n", + " df['audio'] = audios_data\n", + " df['samplerate'] = audios_samplerate\n", + " df['text'] = df['ProcessedText']\n", + " df = df[['text','audio','samplerate']]\n", + "\n", + "\n", + " return df\n", + "\n", + "#.............................................\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "mfMp94fvnrXw", + "outputId": "26e03dbe-2713-4651-ad7a-6973ec6ce9cd" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"df\",\n \"rows\": 70,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 69,\n \"samples\": [\n \"\\u0648\\u0634 \\u0633\\u0648\\u064a\\u062a \\u062d\\u062a\\u0649 \\u062a\\u0635\\u064a\\u0631\\u0648\\u0646 \\u0628\\u0647\\u0627\\u0644\\u062c\\u062d\\u0648\\u062f \\u0627\\u064a\\u0647\",\n \"\\u064a\\u0627 \\u0633\\u0644\\u0627\\u0645 \\u0627\\u0646\\u062a\\u0647\\u0649 \\u0627\\u0644\\u0645\\u0648\\u0636\\u0648\\u0639\",\n \"\\u0645\\u0627 \\u0631\\u0627\\u062d \\u0645\\u0631\\u0631 \\u0644\\u0647 \\u0628\\u0633\\u0647\\u0648\\u0644\\u0629\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"audio\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"samplerate\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 16000,\n \"max\": 16000,\n \"num_unique_values\": 1,\n \"samples\": [\n 16000\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe", + "variable_name": "df" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
textaudiosamplerate
0يا سلام انتهى الموضوع[3.0517578125e-05, 0.000823974609375, 0.001464...16000
1يعني يا ابو مسامح[-0.0538330078125, -0.0531005859375, -0.046112...16000
2حصة موافقة[0.001007080078125, -0.004058837890625, -0.003...16000
3والله يا هي فكرة[-0.02789306640625, -0.03045654296875, -0.0335...16000
4فكرة تبي تخليك تطير من الفرح[0.05316162109375, 0.031829833984375, 0.021728...16000
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " text \\\n", + "0 يا سلام انتهى الموضوع \n", + "1 يعني يا ابو مسامح \n", + "2 حصة موافقة \n", + "3 والله يا هي فكرة \n", + "4 فكرة تبي تخليك تطير من الفرح \n", + "\n", + " audio samplerate \n", + "0 [3.0517578125e-05, 0.000823974609375, 0.001464... 16000 \n", + "1 [-0.0538330078125, -0.0531005859375, -0.046112... 16000 \n", + "2 [0.001007080078125, -0.004058837890625, -0.003... 16000 \n", + "3 [-0.02789306640625, -0.03045654296875, -0.0335... 16000 \n", + "4 [0.05316162109375, 0.031829833984375, 0.021728... 16000 " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = GetDataset(\n", + " path_csv = r'/content/cleanSada-01/data.csv',\n", + " path_audio = r'/content/cleanSada-01/Audios'\n", + " ).iloc[:70]\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dlgCJUn1n4SX", + "outputId": "b773b0f0-6c12-480a-c05e-25317fadfabc" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df shape: (70, 3)\n" + ] + } + ], + "source": [ + "print('df shape: ',df.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T1pF6RY1PKrM" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZFG8CCvdM4hW", + "outputId": "a7e8f7d1-29c4-47d1-ab77-bf2b36b7b393" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "train_df shape: (56, 3)\n", + "eval_df shape: (14, 3)\n", + "full_generation_df shape: (5, 3)\n" + ] + } + ], + "source": [ + "full_generation_index = 0\n", + "full_generation_df = df.iloc[full_generation_index:full_generation_index+5].reset_index(drop=True)\n", + "\n", + "\n", + "train_df = df.sample(frac=0.8,random_state=42)\n", + "eval_df = df.drop(train_df.index)\n", + "\n", + "\n", + "\n", + "print('train_df shape: ',train_df.shape)\n", + "print('eval_df shape: ',eval_df.shape)\n", + "print('full_generation_df shape: ',full_generation_df.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 93 + }, + "id": "gbGcXR1qOIcG", + "outputId": "8b57ba29-f453-4d5d-e09a-c580b0f6f82a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "فكرة تبي تخليك تطير من الفرح\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "index = 4\n", + "print(full_generation_df['text'][index])\n", + "Audio(full_generation_df['audio'][index], rate=full_generation_df['samplerate'][index])" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hYYJxFCYn5Hv", + "outputId": "fdac8ead-eacc-4b86-f0d7-bb5691973dcb" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatasetDict({\n", + " train: Dataset({\n", + " features: ['text', 'audio', 'samplerate', '__index_level_0__'],\n", + " num_rows: 56\n", + " })\n", + " eval: Dataset({\n", + " features: ['text', 'audio', 'samplerate', '__index_level_0__'],\n", + " num_rows: 14\n", + " })\n", + " full_generation: Dataset({\n", + " features: ['text', 'audio', 'samplerate'],\n", + " num_rows: 5\n", + " })\n", + "})" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datasets import DatasetDict,Dataset\n", + "\n", + "ds = {\n", + " \"train\": Dataset.from_pandas(train_df),\n", + " \"eval\": Dataset.from_pandas(eval_df),\n", + " \"full_generation\": Dataset.from_pandas(full_generation_df),\n", + " }\n", + "\n", + "dataset = DatasetDict(ds)\n", + "\n", + "dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 113, + "referenced_widgets": [ + "998749ebfcae4b8a87904df802acf7a9", + "2e5c554ee6114f229c5690cad6bf7060", + "ce22b3e69ecf4e21bfe5e22e16341195", + "bcefb41a71474150b6d52ba641bbf5c7", + "5560fbcf95184ec6a45756562eacf4a0", + "0c658108d0414932afd140a1430d2a0d", + "3b70e66d78a74386bdc8d854f237e0ad", + "152a67c06b674f17995767e44f64a679", + "3de5299f28754fbb906272165c95ea57", + "3acb7316c01b4e0fb986bdf739c8b025", + "b6fa7dd5ebba4e6788b0c242530659fb", + "f24acb6723dc47fe9b1f114a95c7ac40", + "0f54060ed4814eb2ad5cd35a92b6c3d4", + "c286b7a0030c4fd1aa570461c099970a", + "65e0bf2886ba420fa28f2511f4c7d824", + "e4aca46b0cb74622a52c42c12de0b6c2", + "a192ba8570464b8bbc6dda1345a2a84f", + "9b0aae7e4ede47459cdaae19571c6c57", + "35479653780e4d1591e4ce1b72b88229", + "a51be96ada234a8b8b46c1de7e90ed92", + "bc77557196504559a8964f37ef89dd9f", + "9c47eb81217f4c248d9984748669a322", + "efa47ac3f3314000b04b3bd2c3ea704a", + "5f81921b0d114db5b860aedcc8074844", + "7851afc2f61a463e858bbe8bf74109b0", + "14b6310fc034427da62a7a7a0614242e", + "a1927a9e8ec1418e9767fa934e4c2023", + "6a52c70f0c744e7a85528f1b3deec4d9", + "56987c5fdd284298b3c66672575a4482", + "64a6d77a5e994c83948a45e491de3c42", + "393232064a2a4af8a1866c6296a610ed", + "7d8f7f5854224704846c343b7d06548a", + "bfa7f944c9d447408803db3ab01692bd" + ] + }, + "id": "JKlxoOjtn5EO", + "outputId": "db5291b1-9727-4e68-bf47-fe550e8a0926" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "998749ebfcae4b8a87904df802acf7a9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Saving the dataset (0/1 shards): 0%| | 0/56 [00:00\u001b[0m \u001b[31m[54 lines of output]\u001b[0m\n", + " \u001b[31m \u001b[0m Running from numpy source directory.\n", + " \u001b[31m \u001b[0m :461: UserWarning: Unrecognized setuptools command, proceeding with generating Cython sources and expanding templates\n", + " \u001b[31m \u001b[0m Cythonizing sources\n", + " \u001b[31m \u001b[0m Processing numpy/random/_bounded_integers.pxd.in\n", + " \u001b[31m \u001b[0m Processing numpy/random/_mt19937.pyx\n", + " \u001b[31m \u001b[0m /tmp/pip-install-occ1zsm1/numpy_2a2efe8a0c3644c9b9b2c5f8237d8328/tools/cythonize.py:75: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", + " \u001b[31m \u001b[0m required_version = LooseVersion('0.29.14')\n", + " \u001b[31m \u001b[0m /tmp/pip-install-occ1zsm1/numpy_2a2efe8a0c3644c9b9b2c5f8237d8328/tools/cythonize.py:77: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.\n", + " \u001b[31m \u001b[0m if LooseVersion(cython_version) < required_version:\n", + " \u001b[31m \u001b[0m \n", + " \u001b[31m \u001b[0m Error compiling Cython file:\n", + " \u001b[31m \u001b[0m ------------------------------------------------------------\n", + " \u001b[31m \u001b[0m ...\n", + " \u001b[31m \u001b[0m for i in range(1, RK_STATE_LEN):\n", + " \u001b[31m \u001b[0m self.rng_state.key[i] = val[i]\n", + " \u001b[31m \u001b[0m self.rng_state.pos = i\n", + " \u001b[31m \u001b[0m \n", + " \u001b[31m \u001b[0m self._bitgen.state = &self.rng_state\n", + " \u001b[31m \u001b[0m self._bitgen.next_uint64 = &mt19937_uint64\n", + " \u001b[31m \u001b[0m ^\n", + " \u001b[31m \u001b[0m ------------------------------------------------------------\n", + " \u001b[31m \u001b[0m \n", + " \u001b[31m \u001b[0m _mt19937.pyx:138:35: Cannot assign type 'uint64_t (*)(void *) except? -1 nogil' to 'uint64_t (*)(void *) noexcept nogil'. Exception values are incompatible. Suggest adding 'noexcept' to the type of the value being assigned.\n", + " \u001b[31m \u001b[0m Traceback (most recent call last):\n", + " \u001b[31m \u001b[0m File \"/tmp/pip-install-occ1zsm1/numpy_2a2efe8a0c3644c9b9b2c5f8237d8328/tools/cythonize.py\", line 238, in \n", + " \u001b[31m \u001b[0m main()\n", + " \u001b[31m \u001b[0m File \"/tmp/pip-install-occ1zsm1/numpy_2a2efe8a0c3644c9b9b2c5f8237d8328/tools/cythonize.py\", line 234, in main\n", + " \u001b[31m \u001b[0m find_process_files(root_dir)\n", + " \u001b[31m \u001b[0m File \"/tmp/pip-install-occ1zsm1/numpy_2a2efe8a0c3644c9b9b2c5f8237d8328/tools/cythonize.py\", line 225, in find_process_files\n", + " \u001b[31m \u001b[0m process(root_dir, fromfile, tofile, function, hash_db)\n", + " \u001b[31m \u001b[0m File \"/tmp/pip-install-occ1zsm1/numpy_2a2efe8a0c3644c9b9b2c5f8237d8328/tools/cythonize.py\", line 191, in process\n", + " \u001b[31m \u001b[0m processor_function(fromfile, tofile)\n", + " \u001b[31m \u001b[0m File \"/tmp/pip-install-occ1zsm1/numpy_2a2efe8a0c3644c9b9b2c5f8237d8328/tools/cythonize.py\", line 80, in process_pyx\n", + " \u001b[31m \u001b[0m subprocess.check_call(\n", + " \u001b[31m \u001b[0m File \"/usr/local/lib/python3.10/subprocess.py\", line 369, in check_call\n", + " \u001b[31m \u001b[0m raise CalledProcessError(retcode, cmd)\n", + " \u001b[31m \u001b[0m subprocess.CalledProcessError: Command '['/usr/local/bin/python', '-m', 'cython', '-3', '--fast-fail', '-o', '_mt19937.c', '_mt19937.pyx']' returned non-zero exit status 1.\n", + " \u001b[31m \u001b[0m Traceback (most recent call last):\n", + " \u001b[31m \u001b[0m File \"/usr/local/lib/python3.10/site-packages/pip/_vendor/pep517/in_process/_in_process.py\", line 351, in \n", + " \u001b[31m \u001b[0m main()\n", + " \u001b[31m \u001b[0m File \"/usr/local/lib/python3.10/site-packages/pip/_vendor/pep517/in_process/_in_process.py\", line 333, in main\n", + " \u001b[31m \u001b[0m json_out['return_val'] = hook(**hook_input['kwargs'])\n", + " \u001b[31m \u001b[0m File \"/usr/local/lib/python3.10/site-packages/pip/_vendor/pep517/in_process/_in_process.py\", line 152, in prepare_metadata_for_build_wheel\n", + " \u001b[31m \u001b[0m return hook(metadata_directory, config_settings)\n", + " \u001b[31m \u001b[0m File \"/tmp/pip-build-env-ljsx66qd/overlay/lib/python3.10/site-packages/setuptools/build_meta.py\", line 373, in prepare_metadata_for_build_wheel\n", + " \u001b[31m \u001b[0m self.run_setup()\n", + " \u001b[31m \u001b[0m File \"/tmp/pip-build-env-ljsx66qd/overlay/lib/python3.10/site-packages/setuptools/build_meta.py\", line 502, in run_setup\n", + " \u001b[31m \u001b[0m super().run_setup(setup_script=setup_script)\n", + " \u001b[31m \u001b[0m File \"/tmp/pip-build-env-ljsx66qd/overlay/lib/python3.10/site-packages/setuptools/build_meta.py\", line 318, in run_setup\n", + " \u001b[31m \u001b[0m exec(code, locals())\n", + " \u001b[31m \u001b[0m File \"\", line 488, in \n", + " \u001b[31m \u001b[0m File \"\", line 469, in setup_package\n", + " \u001b[31m \u001b[0m File \"\", line 275, in generate_cython\n", + " \u001b[31m \u001b[0m RuntimeError: Running cythonize failed!\n", + " \u001b[31m \u001b[0m \u001b[31m[end of output]\u001b[0m\n", + " \n", + " \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n", + "\u001b[?25h\u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n", + "\n", + "\u001b[31m×\u001b[0m Encountered error while generating package metadata.\n", + "\u001b[31m╰─>\u001b[0m See above for output.\n", + "\n", + "\u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n", + "\u001b[1;36mhint\u001b[0m: See above for details.\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "[Errno 2] No such file or directory: 'vits # If you restart runtime'\n", + "/home/user/app/VitsModelSplit/vits\n", + "/home/user/app/VitsModelSplit/vits/monotonic_align\n", + "mkdir: cannot create directory ‘monotonic_align’: File exists\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library.\n", + " self.shell.db['dhist'] = compress_dhist(dhist)[-100:]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "running build_ext\n", + "building 'monotonic_align.core' extension\n", + "gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -fPIC -I/usr/local/lib/python3.10/site-packages/numpy/core/include -I/usr/local/include/python3.10 -c core.c -o build/temp.linux-x86_64-cpython-310/core.o\n", + "\u001b[01m\u001b[Kcore.c:\u001b[m\u001b[K In function ‘\u001b[01m\u001b[K__Pyx_InitGlobals\u001b[m\u001b[K’:\n", + "\u001b[01m\u001b[Kcore.c:16766:1:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[K‘\u001b[01m\u001b[KPyEval_InitThreads\u001b[m\u001b[K’ is deprecated [\u001b[01;35m\u001b[K\u001b]8;;https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wdeprecated-declarations\u0007-Wdeprecated-declarations\u001b]8;;\u0007\u001b[m\u001b[K]\n", + "16766 | \u001b[01;35m\u001b[KPyEval_InitThreads\u001b[m\u001b[K();\n", + " | \u001b[01;35m\u001b[K^~~~~~~~~~~~~~~~~~\u001b[m\u001b[K\n", + "In file included from \u001b[01m\u001b[K/usr/local/include/python3.10/Python.h:130\u001b[m\u001b[K,\n", + " from \u001b[01m\u001b[Kcore.c:16\u001b[m\u001b[K:\n", + "\u001b[01m\u001b[K/usr/local/include/python3.10/ceval.h:122:37:\u001b[m\u001b[K \u001b[01;36m\u001b[Knote: \u001b[m\u001b[Kdeclared here\n", + " 122 | Py_DEPRECATED(3.9) PyAPI_FUNC(void) \u001b[01;36m\u001b[KPyEval_InitThreads\u001b[m\u001b[K(void);\n", + " | \u001b[01;36m\u001b[K^~~~~~~~~~~~~~~~~~\u001b[m\u001b[K\n", + "gcc -shared build/temp.linux-x86_64-cpython-310/core.o -L/usr/local/lib -o build/lib.linux-x86_64-cpython-310/monotonic_align/core.cpython-310-x86_64-linux-gnu.so\n", + "copying build/lib.linux-x86_64-cpython-310/monotonic_align/core.cpython-310-x86_64-linux-gnu.so -> monotonic_align\n", + "/home/user/app/VitsModelSplit/vits\n" + ] + } + ], + "source": [ + "%cd vits\n", + "!pip install -r requirements.txt\n", + "\n", + "%cd vits # If you restart runtime\n", + "%cd monotonic_align\n", + "%mkdir monotonic_align\n", + "!python setup.py build_ext --inplace\n", + "%cd .." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/user/app/VitsModelSplit/vits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library.\n", + " self.shell.db['dhist'] = compress_dhist(dhist)[-100:]\n" + ] + } + ], + "source": [ + "%cd VitsModelSplit/vits" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "running build_ext\n", + "copying build/lib.linux-x86_64-cpython-310/VitsModelSplit/monotonic_align/core.cpython-310-x86_64-linux-gnu.so -> VitsModelSplit/monotonic_align\n", + "error: could not create 'VitsModelSplit/monotonic_align/core.cpython-310-x86_64-linux-gnu.so': No such file or directory\n" + ] + } + ], + "source": [ + "\n", + "!python setup.py build_ext --inplace\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cd" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "import vits.monotonic_align as monotonic_align" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "monotonic_align.maximum_path" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vits.monotonic_align" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import torch" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/site-packages/torch/cuda/__init__.py:619: UserWarning: Can't initialize NVML\n", + " warnings.warn(\"Can't initialize NVML\")\n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "torch.cuda.is_available()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 284, + "referenced_widgets": [ + "57c834af4a294010b7caed36de6b7d79", + "135b012f933d48b4a867cbf525227081", + "3dcd356e8e954cf38f67a2bff568e3f5", + "b8796459e1c74bb983353ddd127cd56d", + "c13279147dad4105a639d58f8c2d2fcf", + "8459abce62c44e79a1583a389f864afc", + "855b9032317c4f768bd898ae1b6af3e9", + "efa12fb1ad4648f0b4bcf3b78a1302fe", + "81d38112f6e446fcacf0dec1798b4f5d", + "2550b3ce363e4b82a4a5bcde6c145c98", + "9e802ae6031d4ac19ce817011cc28d06", + "61d7ca93814042c6aadf56b15caf5997", + "b360db76baf4418ca4aeed29fd81b720", + "eab93aa2a75946b08da2215f0cbdb288", + "35f5fee12eed4173900f2927ef5c7afd", + "d477521b301d4664bf8ab43702cd0a5a", + "9e698b2e5d134a9191ffdbad58ca6ad1", + "8a1e0ee9f0314de982638e52595f8a53", + "a8d82aff9c2b49e49c9de456da8fe511", + "fd62c14630f04d458df5893f09c0dcca", + "5c905b7209854add9c77393a0849409f", + "c9f04136846c44f7a648e507c86c3c0b", + "79ca4d4fd27e4c1db631f48e2baf3aaa", + "f2cdce487c734cb1893175e1e4bce082", + "52c1bf0ebfaf487bbc3dbfcadf1faa41", + "5ebaf35c0f09443ca9ca1c5b645b8947", + "84f7b3d0681946dc8973b02b6b895cae", + "430deedff8c2485c9555d17a09f1423e", + "a48d9f97129047c28b050be572e74362", + "c0fe61fa22f640b89551ffcf4b4760b2", + "dfd9f39b7361440bba04466a7347b2ea", + "abb4c4940c664c80bd84f71099f0ac6f", + "d3ab433c06684e129be478bfe557cd97", + "7f715538e29b47e0a11e2b19a2001466", + "d8a608d149b14a1887eaf59390477371", + "8285ecf950364d5db6ab30da5e4bf0f2", + "cb1c3fd26b78419bbae1879ab1a13917", + "5657da9a99d6443a83bd8e43419b6f4f", + "447331f21a89479491620f0ca29bef24", + "f9e2a30b7838405b967dcea4adeaca17", + "4095f81dbe3242cba316f3dcd9bf7e0a", + "d7c8934bf00a430fa4abd49807d8ba02", + "64403b30855e46878cedc5039f5b8c2e", + "b8cac84385674af485ab72bb3af43e0f", + "dbdef225b00746c290947de559043d6e", + "6f80be9ccbd04c6aaa4ba6294bd419d7", + "c7c5935ced304a44b971fc6b1120b6c7", + "cf89a91dc6644e518b43799abfc0aa91", + "0a0751554fb4455d88c2f561b0f0a24a", + "9255782df8d74156aacaea6b4202fbd6", + "020c2499c1664282aa97cc630bc851a3", + "d06d2b0f0887462583f6ba08ffdcc1bd", + "c3ff5b9880fb47ecb710754a1b4fbe42", + "8422dfc2296c47fc904c2f01973a595e", + "56da9e4fa333494e89a9b62b0fd87295" + ] + }, + "id": "gjQ-uoS1xSKt", + "outputId": "eea81d60-c380-40ca-dc50-2ed0399cd646" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of the model checkpoint at facebook/mms-tts-ara were not used when initializing VitsModel: ['flow.flows.0.wavenet.in_layers.0.weight_g', 'flow.flows.0.wavenet.in_layers.0.weight_v', 'flow.flows.0.wavenet.in_layers.1.weight_g', 'flow.flows.0.wavenet.in_layers.1.weight_v', 'flow.flows.0.wavenet.in_layers.2.weight_g', 'flow.flows.0.wavenet.in_layers.2.weight_v', 'flow.flows.0.wavenet.in_layers.3.weight_g', 'flow.flows.0.wavenet.in_layers.3.weight_v', 'flow.flows.0.wavenet.res_skip_layers.0.weight_g', 'flow.flows.0.wavenet.res_skip_layers.0.weight_v', 'flow.flows.0.wavenet.res_skip_layers.1.weight_g', 'flow.flows.0.wavenet.res_skip_layers.1.weight_v', 'flow.flows.0.wavenet.res_skip_layers.2.weight_g', 'flow.flows.0.wavenet.res_skip_layers.2.weight_v', 'flow.flows.0.wavenet.res_skip_layers.3.weight_g', 'flow.flows.0.wavenet.res_skip_layers.3.weight_v', 'flow.flows.1.wavenet.in_layers.0.weight_g', 'flow.flows.1.wavenet.in_layers.0.weight_v', 'flow.flows.1.wavenet.in_layers.1.weight_g', 'flow.flows.1.wavenet.in_layers.1.weight_v', 'flow.flows.1.wavenet.in_layers.2.weight_g', 'flow.flows.1.wavenet.in_layers.2.weight_v', 'flow.flows.1.wavenet.in_layers.3.weight_g', 'flow.flows.1.wavenet.in_layers.3.weight_v', 'flow.flows.1.wavenet.res_skip_layers.0.weight_g', 'flow.flows.1.wavenet.res_skip_layers.0.weight_v', 'flow.flows.1.wavenet.res_skip_layers.1.weight_g', 'flow.flows.1.wavenet.res_skip_layers.1.weight_v', 'flow.flows.1.wavenet.res_skip_layers.2.weight_g', 'flow.flows.1.wavenet.res_skip_layers.2.weight_v', 'flow.flows.1.wavenet.res_skip_layers.3.weight_g', 'flow.flows.1.wavenet.res_skip_layers.3.weight_v', 'flow.flows.2.wavenet.in_layers.0.weight_g', 'flow.flows.2.wavenet.in_layers.0.weight_v', 'flow.flows.2.wavenet.in_layers.1.weight_g', 'flow.flows.2.wavenet.in_layers.1.weight_v', 'flow.flows.2.wavenet.in_layers.2.weight_g', 'flow.flows.2.wavenet.in_layers.2.weight_v', 'flow.flows.2.wavenet.in_layers.3.weight_g', 'flow.flows.2.wavenet.in_layers.3.weight_v', 'flow.flows.2.wavenet.res_skip_layers.0.weight_g', 'flow.flows.2.wavenet.res_skip_layers.0.weight_v', 'flow.flows.2.wavenet.res_skip_layers.1.weight_g', 'flow.flows.2.wavenet.res_skip_layers.1.weight_v', 'flow.flows.2.wavenet.res_skip_layers.2.weight_g', 'flow.flows.2.wavenet.res_skip_layers.2.weight_v', 'flow.flows.2.wavenet.res_skip_layers.3.weight_g', 'flow.flows.2.wavenet.res_skip_layers.3.weight_v', 'flow.flows.3.wavenet.in_layers.0.weight_g', 'flow.flows.3.wavenet.in_layers.0.weight_v', 'flow.flows.3.wavenet.in_layers.1.weight_g', 'flow.flows.3.wavenet.in_layers.1.weight_v', 'flow.flows.3.wavenet.in_layers.2.weight_g', 'flow.flows.3.wavenet.in_layers.2.weight_v', 'flow.flows.3.wavenet.in_layers.3.weight_g', 'flow.flows.3.wavenet.in_layers.3.weight_v', 'flow.flows.3.wavenet.res_skip_layers.0.weight_g', 'flow.flows.3.wavenet.res_skip_layers.0.weight_v', 'flow.flows.3.wavenet.res_skip_layers.1.weight_g', 'flow.flows.3.wavenet.res_skip_layers.1.weight_v', 'flow.flows.3.wavenet.res_skip_layers.2.weight_g', 'flow.flows.3.wavenet.res_skip_layers.2.weight_v', 'flow.flows.3.wavenet.res_skip_layers.3.weight_g', 'flow.flows.3.wavenet.res_skip_layers.3.weight_v', 'posterior_encoder.wavenet.in_layers.0.weight_g', 'posterior_encoder.wavenet.in_layers.0.weight_v', 'posterior_encoder.wavenet.in_layers.1.weight_g', 'posterior_encoder.wavenet.in_layers.1.weight_v', 'posterior_encoder.wavenet.in_layers.10.weight_g', 'posterior_encoder.wavenet.in_layers.10.weight_v', 'posterior_encoder.wavenet.in_layers.11.weight_g', 'posterior_encoder.wavenet.in_layers.11.weight_v', 'posterior_encoder.wavenet.in_layers.12.weight_g', 'posterior_encoder.wavenet.in_layers.12.weight_v', 'posterior_encoder.wavenet.in_layers.13.weight_g', 'posterior_encoder.wavenet.in_layers.13.weight_v', 'posterior_encoder.wavenet.in_layers.14.weight_g', 'posterior_encoder.wavenet.in_layers.14.weight_v', 'posterior_encoder.wavenet.in_layers.15.weight_g', 'posterior_encoder.wavenet.in_layers.15.weight_v', 'posterior_encoder.wavenet.in_layers.2.weight_g', 'posterior_encoder.wavenet.in_layers.2.weight_v', 'posterior_encoder.wavenet.in_layers.3.weight_g', 'posterior_encoder.wavenet.in_layers.3.weight_v', 'posterior_encoder.wavenet.in_layers.4.weight_g', 'posterior_encoder.wavenet.in_layers.4.weight_v', 'posterior_encoder.wavenet.in_layers.5.weight_g', 'posterior_encoder.wavenet.in_layers.5.weight_v', 'posterior_encoder.wavenet.in_layers.6.weight_g', 'posterior_encoder.wavenet.in_layers.6.weight_v', 'posterior_encoder.wavenet.in_layers.7.weight_g', 'posterior_encoder.wavenet.in_layers.7.weight_v', 'posterior_encoder.wavenet.in_layers.8.weight_g', 'posterior_encoder.wavenet.in_layers.8.weight_v', 'posterior_encoder.wavenet.in_layers.9.weight_g', 'posterior_encoder.wavenet.in_layers.9.weight_v', 'posterior_encoder.wavenet.res_skip_layers.0.weight_g', 'posterior_encoder.wavenet.res_skip_layers.0.weight_v', 'posterior_encoder.wavenet.res_skip_layers.1.weight_g', 'posterior_encoder.wavenet.res_skip_layers.1.weight_v', 'posterior_encoder.wavenet.res_skip_layers.10.weight_g', 'posterior_encoder.wavenet.res_skip_layers.10.weight_v', 'posterior_encoder.wavenet.res_skip_layers.11.weight_g', 'posterior_encoder.wavenet.res_skip_layers.11.weight_v', 'posterior_encoder.wavenet.res_skip_layers.12.weight_g', 'posterior_encoder.wavenet.res_skip_layers.12.weight_v', 'posterior_encoder.wavenet.res_skip_layers.13.weight_g', 'posterior_encoder.wavenet.res_skip_layers.13.weight_v', 'posterior_encoder.wavenet.res_skip_layers.14.weight_g', 'posterior_encoder.wavenet.res_skip_layers.14.weight_v', 'posterior_encoder.wavenet.res_skip_layers.15.weight_g', 'posterior_encoder.wavenet.res_skip_layers.15.weight_v', 'posterior_encoder.wavenet.res_skip_layers.2.weight_g', 'posterior_encoder.wavenet.res_skip_layers.2.weight_v', 'posterior_encoder.wavenet.res_skip_layers.3.weight_g', 'posterior_encoder.wavenet.res_skip_layers.3.weight_v', 'posterior_encoder.wavenet.res_skip_layers.4.weight_g', 'posterior_encoder.wavenet.res_skip_layers.4.weight_v', 'posterior_encoder.wavenet.res_skip_layers.5.weight_g', 'posterior_encoder.wavenet.res_skip_layers.5.weight_v', 'posterior_encoder.wavenet.res_skip_layers.6.weight_g', 'posterior_encoder.wavenet.res_skip_layers.6.weight_v', 'posterior_encoder.wavenet.res_skip_layers.7.weight_g', 'posterior_encoder.wavenet.res_skip_layers.7.weight_v', 'posterior_encoder.wavenet.res_skip_layers.8.weight_g', 'posterior_encoder.wavenet.res_skip_layers.8.weight_v', 'posterior_encoder.wavenet.res_skip_layers.9.weight_g', 'posterior_encoder.wavenet.res_skip_layers.9.weight_v']\n", + "- This IS expected if you are initializing VitsModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing VitsModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of VitsModel were not initialized from the model checkpoint at facebook/mms-tts-ara and are newly initialized: ['discriminator.discriminators.0.convs.0.bias', 'discriminator.discriminators.0.convs.0.weight', 'discriminator.discriminators.0.convs.1.bias', 'discriminator.discriminators.0.convs.1.weight', 'discriminator.discriminators.0.convs.2.bias', 'discriminator.discriminators.0.convs.2.weight', 'discriminator.discriminators.0.convs.3.bias', 'discriminator.discriminators.0.convs.3.weight', 'discriminator.discriminators.0.convs.4.bias', 'discriminator.discriminators.0.convs.4.weight', 'discriminator.discriminators.0.convs.5.bias', 'discriminator.discriminators.0.convs.5.weight', 'discriminator.discriminators.0.final_conv.bias', 'discriminator.discriminators.0.final_conv.weight', 'discriminator.discriminators.1.convs.0.bias', 'discriminator.discriminators.1.convs.0.weight', 'discriminator.discriminators.1.convs.1.bias', 'discriminator.discriminators.1.convs.1.weight', 'discriminator.discriminators.1.convs.2.bias', 'discriminator.discriminators.1.convs.2.weight', 'discriminator.discriminators.1.convs.3.bias', 'discriminator.discriminators.1.convs.3.weight', 'discriminator.discriminators.1.convs.4.bias', 'discriminator.discriminators.1.convs.4.weight', 'discriminator.discriminators.1.final_conv.bias', 'discriminator.discriminators.1.final_conv.weight', 'discriminator.discriminators.2.convs.0.bias', 'discriminator.discriminators.2.convs.0.weight', 'discriminator.discriminators.2.convs.1.bias', 'discriminator.discriminators.2.convs.1.weight', 'discriminator.discriminators.2.convs.2.bias', 'discriminator.discriminators.2.convs.2.weight', 'discriminator.discriminators.2.convs.3.bias', 'discriminator.discriminators.2.convs.3.weight', 'discriminator.discriminators.2.convs.4.bias', 'discriminator.discriminators.2.convs.4.weight', 'discriminator.discriminators.2.final_conv.bias', 'discriminator.discriminators.2.final_conv.weight', 'discriminator.discriminators.3.convs.0.bias', 'discriminator.discriminators.3.convs.0.weight', 'discriminator.discriminators.3.convs.1.bias', 'discriminator.discriminators.3.convs.1.weight', 'discriminator.discriminators.3.convs.2.bias', 'discriminator.discriminators.3.convs.2.weight', 'discriminator.discriminators.3.convs.3.bias', 'discriminator.discriminators.3.convs.3.weight', 'discriminator.discriminators.3.convs.4.bias', 'discriminator.discriminators.3.convs.4.weight', 'discriminator.discriminators.3.final_conv.bias', 'discriminator.discriminators.3.final_conv.weight', 'discriminator.discriminators.4.convs.0.bias', 'discriminator.discriminators.4.convs.0.weight', 'discriminator.discriminators.4.convs.1.bias', 'discriminator.discriminators.4.convs.1.weight', 'discriminator.discriminators.4.convs.2.bias', 'discriminator.discriminators.4.convs.2.weight', 'discriminator.discriminators.4.convs.3.bias', 'discriminator.discriminators.4.convs.3.weight', 'discriminator.discriminators.4.convs.4.bias', 'discriminator.discriminators.4.convs.4.weight', 'discriminator.discriminators.4.final_conv.bias', 'discriminator.discriminators.4.final_conv.weight', 'discriminator.discriminators.5.convs.0.bias', 'discriminator.discriminators.5.convs.0.weight', 'discriminator.discriminators.5.convs.1.bias', 'discriminator.discriminators.5.convs.1.weight', 'discriminator.discriminators.5.convs.2.bias', 'discriminator.discriminators.5.convs.2.weight', 'discriminator.discriminators.5.convs.3.bias', 'discriminator.discriminators.5.convs.3.weight', 'discriminator.discriminators.5.convs.4.bias', 'discriminator.discriminators.5.convs.4.weight', 'discriminator.discriminators.5.final_conv.bias', 'discriminator.discriminators.5.final_conv.weight', 'flow.flows.0.wavenet.in_layers.0.parametrizations.weight.original0', 'flow.flows.0.wavenet.in_layers.0.parametrizations.weight.original1', 'flow.flows.0.wavenet.in_layers.1.parametrizations.weight.original0', 'flow.flows.0.wavenet.in_layers.1.parametrizations.weight.original1', 'flow.flows.0.wavenet.in_layers.2.parametrizations.weight.original0', 'flow.flows.0.wavenet.in_layers.2.parametrizations.weight.original1', 'flow.flows.0.wavenet.in_layers.3.parametrizations.weight.original0', 'flow.flows.0.wavenet.in_layers.3.parametrizations.weight.original1', 'flow.flows.0.wavenet.res_skip_layers.0.parametrizations.weight.original0', 'flow.flows.0.wavenet.res_skip_layers.0.parametrizations.weight.original1', 'flow.flows.0.wavenet.res_skip_layers.1.parametrizations.weight.original0', 'flow.flows.0.wavenet.res_skip_layers.1.parametrizations.weight.original1', 'flow.flows.0.wavenet.res_skip_layers.2.parametrizations.weight.original0', 'flow.flows.0.wavenet.res_skip_layers.2.parametrizations.weight.original1', 'flow.flows.0.wavenet.res_skip_layers.3.parametrizations.weight.original0', 'flow.flows.0.wavenet.res_skip_layers.3.parametrizations.weight.original1', 'flow.flows.1.wavenet.in_layers.0.parametrizations.weight.original0', 'flow.flows.1.wavenet.in_layers.0.parametrizations.weight.original1', 'flow.flows.1.wavenet.in_layers.1.parametrizations.weight.original0', 'flow.flows.1.wavenet.in_layers.1.parametrizations.weight.original1', 'flow.flows.1.wavenet.in_layers.2.parametrizations.weight.original0', 'flow.flows.1.wavenet.in_layers.2.parametrizations.weight.original1', 'flow.flows.1.wavenet.in_layers.3.parametrizations.weight.original0', 'flow.flows.1.wavenet.in_layers.3.parametrizations.weight.original1', 'flow.flows.1.wavenet.res_skip_layers.0.parametrizations.weight.original0', 'flow.flows.1.wavenet.res_skip_layers.0.parametrizations.weight.original1', 'flow.flows.1.wavenet.res_skip_layers.1.parametrizations.weight.original0', 'flow.flows.1.wavenet.res_skip_layers.1.parametrizations.weight.original1', 'flow.flows.1.wavenet.res_skip_layers.2.parametrizations.weight.original0', 'flow.flows.1.wavenet.res_skip_layers.2.parametrizations.weight.original1', 'flow.flows.1.wavenet.res_skip_layers.3.parametrizations.weight.original0', 'flow.flows.1.wavenet.res_skip_layers.3.parametrizations.weight.original1', 'flow.flows.2.wavenet.in_layers.0.parametrizations.weight.original0', 'flow.flows.2.wavenet.in_layers.0.parametrizations.weight.original1', 'flow.flows.2.wavenet.in_layers.1.parametrizations.weight.original0', 'flow.flows.2.wavenet.in_layers.1.parametrizations.weight.original1', 'flow.flows.2.wavenet.in_layers.2.parametrizations.weight.original0', 'flow.flows.2.wavenet.in_layers.2.parametrizations.weight.original1', 'flow.flows.2.wavenet.in_layers.3.parametrizations.weight.original0', 'flow.flows.2.wavenet.in_layers.3.parametrizations.weight.original1', 'flow.flows.2.wavenet.res_skip_layers.0.parametrizations.weight.original0', 'flow.flows.2.wavenet.res_skip_layers.0.parametrizations.weight.original1', 'flow.flows.2.wavenet.res_skip_layers.1.parametrizations.weight.original0', 'flow.flows.2.wavenet.res_skip_layers.1.parametrizations.weight.original1', 'flow.flows.2.wavenet.res_skip_layers.2.parametrizations.weight.original0', 'flow.flows.2.wavenet.res_skip_layers.2.parametrizations.weight.original1', 'flow.flows.2.wavenet.res_skip_layers.3.parametrizations.weight.original0', 'flow.flows.2.wavenet.res_skip_layers.3.parametrizations.weight.original1', 'flow.flows.3.wavenet.in_layers.0.parametrizations.weight.original0', 'flow.flows.3.wavenet.in_layers.0.parametrizations.weight.original1', 'flow.flows.3.wavenet.in_layers.1.parametrizations.weight.original0', 'flow.flows.3.wavenet.in_layers.1.parametrizations.weight.original1', 'flow.flows.3.wavenet.in_layers.2.parametrizations.weight.original0', 'flow.flows.3.wavenet.in_layers.2.parametrizations.weight.original1', 'flow.flows.3.wavenet.in_layers.3.parametrizations.weight.original0', 'flow.flows.3.wavenet.in_layers.3.parametrizations.weight.original1', 'flow.flows.3.wavenet.res_skip_layers.0.parametrizations.weight.original0', 'flow.flows.3.wavenet.res_skip_layers.0.parametrizations.weight.original1', 'flow.flows.3.wavenet.res_skip_layers.1.parametrizations.weight.original0', 'flow.flows.3.wavenet.res_skip_layers.1.parametrizations.weight.original1', 'flow.flows.3.wavenet.res_skip_layers.2.parametrizations.weight.original0', 'flow.flows.3.wavenet.res_skip_layers.2.parametrizations.weight.original1', 'flow.flows.3.wavenet.res_skip_layers.3.parametrizations.weight.original0', 'flow.flows.3.wavenet.res_skip_layers.3.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.0.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.0.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.1.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.1.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.10.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.10.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.11.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.11.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.12.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.12.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.13.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.13.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.14.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.14.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.15.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.15.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.2.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.2.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.3.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.3.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.4.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.4.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.5.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.5.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.6.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.6.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.7.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.7.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.8.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.8.parametrizations.weight.original1', 'posterior_encoder.wavenet.in_layers.9.parametrizations.weight.original0', 'posterior_encoder.wavenet.in_layers.9.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.0.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.0.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.1.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.1.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.10.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.10.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.11.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.11.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.12.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.12.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.13.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.13.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.14.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.14.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.15.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.15.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.2.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.2.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.3.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.3.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.4.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.4.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.5.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.5.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.6.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.6.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.7.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.7.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.8.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.8.parametrizations.weight.original1', 'posterior_encoder.wavenet.res_skip_layers.9.parametrizations.weight.original0', 'posterior_encoder.wavenet.res_skip_layers.9.parametrizations.weight.original1']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ] + } + ], + "source": [ + "model = VitsModel.from_pretrained(\"facebook/mms-tts-ara\",cache_dir=\"./\")\n", + "tokenizer = AutoTokenizer.from_pretrained(\"facebook/mms-tts-ara\",cache_dir=\"./\")\n", + "feature_extractor = VitsFeatureExtractor()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 713, + "referenced_widgets": [ + "5fe8c49187324ff5a9e7a1902a56e6c1", + "c75e944aeb794d5fb3b17d6db308d5c9", + "5c2193563191495ba8b767e3fe53ad34", + "f3e903c659be4a5cb5b5c861d4bc2cb6", + "eaf8644ba06746008ebc0bbf1f6d1f58", + "ed2b79e52630424782ba93485693ea8e", + "312aff17ac5745a78ac0a0e5114d2e66", + "8354096236c84017bba7441e2442fa42", + "a93fd7b8c44a4b41b175db4d7dd3d321", + "ad11453c933147cd88e46285c9c44d1e", + "51f7dae0eed843ffbe3681681adfe2b4", + "0c95cda53fc64f11ab33b3b7eba32752", + "c3d16dd56b4e42f1bc90ec39b05dd30e", + "f5072addecb0409fb88432072cb1b389", + "d62c0f26002241a3b46e5315870fa6f1", + "85cdb8ca0dba4279b6716e53950d1143", + "347caf53164840b3a5cfa7d82f49a01a", + "22056322f3984123a377201d9d429146", + "c847db691b394603a73ae87915d3ed04", + "a31dec7811dd4d45aeedfc5840228943", + "a84c1ab2d30e448b9aeb8c27efe04d7f", + "3d4326b689a647c295d9a798224f37dd", + "819790829e504c78a3645180cb802589", + "270c2a900df74d9d8b687a8ad2b90053", + "339c8eed6a5c41c9aebcdb823cfcbb3e", + "6e2304b4162f4757be1e663612c6b263", + "bb4ffb1d46d84c24b26c5e58369359b6", + "20860987ba3341aa999727de64e97099", + "f83498582e404ffeb934d7480342cf5a", + "9aea6400a4d04c748fa5a2a7ad90e155", + "2196612c515e49ad9ef9576c277dc39a", + "eaaf2cb486ce4e7e90049f6ff1c201a6", + "b1b627ee58eb4b9297d9ca93301a3576", + "beaba909476d4d1299b862d396b7b24f", + "90452370798e435f8ec1568e3faca4b2", + "9cad3b9f73f4418e8e257f708e4d2f78", + "9afb1f8f10014dda847a587d935ee552", + "9f437c4ec8bb436abdb364e9dba256e5", + "366fe10d4c1e4a5c9dba52a9d5955ab2", + "9c6b341f1c82422aa30c9e9a4a737bd0", + "790fa683f6d54cf6a7fab8a838e5b996", + "f71d71156f3948bd8217d36f00f65e32", + "017db64686184080b1a5cb9c9b3236d0", + "f693e52fd9ea4c1986957104f7415a0c", + "8d5dcab702944737879e96f849401339", + "f0b172a83f434f2c81734e1c8216677d", + "6b14177921df4db6a1d725fe8b6db251", + "b883163a1ee4439a97c210cf759ca01f", + "a4a3a38618ff47ddb8826edee43de913", + "81f20981fbed444997db3b3872be22a3", + "67799d1b16f24379a92bffdfeb19914c", + "169c9039501145828fae8c3d0d6979de", + "75a93cefcdde4078ab270864899c6b8f", + "902d41e40b3f420db0871365c8bd9665", + "d9bb28ace6de471c9f9307c64e8c1f38", + "a7c3e7f16ea84282a87c261b672b67df", + "8b5ce15ee86d4db6bb9f495423511205", + "40bcba1944594b3898458a33c2b116ad", + "0d9c4b5f9aea438da6ca5bda7b8c8fde", + "07dcb7da40e24821bc02a7ab08c23e74", + "e9ba94addb0e4da585a0a1c1a049dacc", + "d15b00851c0c44a09d54d524256655a4", + "531fc44a16e343b2adf5cbf562aa6ada", + "63cb8174b6a648488a4a4c9b7f2fcb63", + "ff8e8ae333814136b615e33b734f650d", + "4057e65d28674c119c96c9292804d07c", + "db06e8aed1474220b32ae3248d39748b", + "654e88d4747540ce8ed0170609c520b5", + "403b1089c57a4fea91b8fa2c4957a91a", + "c7a19333c43047458b6b754af7203f1d", + "0c38053a5e40419bbfbf9777d1947f62", + "3b745e441db3401c95f231078fca4cf4", + "4302a7d26f174e20aa5391a97a895860", + "2442a8cc799c4b5ca593aeb47bbc47b1", + "ed5037f18ee8489abb802b4f2d36b540", + "91d8558d8e0042baac37e3e83199ddf1", + "b7b817a1400840ec80819d232df37e5c", + "b91ceb88d032476aa4de3dca3efc676d", + "5962e7de7fc846e180edc8eba0a083cb", + "3b8671570fcd4e83978a4ff3435550b8", + "0b8c274470a04cfb8e18cc6980d43419", + "1484a6cea27f4084b4df138165c909be", + "2c7acd7bd0aa4e4ca3f879ea5c47e0d1", + "de14de5e58304e658ffca6225c724998", + "91aeefae393647f09face5eea070d0c2", + "e5dce5ae71054c1ead47f42b178d4d71", + "41b907d0ab7f4d58a356d72594f17baa", + "eac84897079a452dbb0e22b3081b87a9", + "c194eb49e9d84dca99b054845f1ceaa4", + "defddf038323411aa81b5fa7b64a201e", + "acbf0c6175ad4483b8baa67060c52c14", + "70b07c878bea4b26a71135dda9e47749", + "e5976c493938462eacf1e939d442f03d", + "26b2eb080c7c4e958edca4959f0c8951", + "4198ee7869ec4ef3b4c56748d4e9752b", + "a219e27d8fe44af1b15092bfad83f43f", + "65f8f4edc481444f9bcadbed1647a9e5", + "a4acbb2bbaea404e8e91a945e32dca5d", + "498f0edc46eb4e3790e2e7f7f2eaed40", + "bda5fdc8c3f74530b98ae28769b49ab6", + "f50b92560b8c4b86b505d18bd72a4aeb", + "e8cb34f0f8054a1b97146d134bdf38ae", + "6062b8e2d4e6466eb8995a899918cbdc", + "fb600097eb4a4bb7a68b6e8832860b94", + "929a10e1233a4dd793f1205e5e53409a", + "4386c14a16ce40cda419c9030345581d", + "039b5b6c083848ffa087cbe0c463da5a", + "f71575651feb4e9ebecf9624cb922d91", + "7ef42115ee38460c82a198a4d90ac269", + "76f899e8b9a043caa3bc8d164279952c", + "e03cbc92104d4a5f9a584225c111574c", + "5aa238fa9b5e4430a4238406456caa77", + "8e48fd70bdba4fc5b515d0bbf994a770", + "3ffa7d6309dd4bd994ba710b72d68299", + "9cd405360ec840dea97d00d4b46bc367", + "2459318b633e4b12ab400341bb3b43cb", + "20f71610e3284e0cba496430e75c5eee", + "5c9ef3ba1ac146f7aec7905b0978ecae", + "aee5d7b9d2994c419bd614810fa6e6b2", + "d40b97c2c7414be58e15401464352a05", + "66bea563a202481e85cf916b60711fa3", + "69661dc87b7348d9a69dd1efe2363214", + "0edcbaccd4074f1f9b0965d907d4e35e", + "db6a12d0be6f48dfae06d360af20a4cd", + "24f4431025a64f979059affd5eea0a50", + "b32f78bbae0249239682a88174051195", + "ed3df636482043029fe38ebcfb8f1f2f", + "a11a6872aba44566a039af46fac49615", + "838395a6a04346afbfd930ca9bb86082", + "3e8f5fadfd3c43d2809aa89975f7348b", + "57c711c754dc43879942f645c4fe68d5", + "a51bc4a7234e454993d09e2d126defa7", + "5cc078eff4734f5f810999fc4abbaec3", + "fefdf0d8046c41788ecaad39a573f9e3", + "cd833887722b40b4b0178c816d578fc7", + "1abe8d9bc1144fd69e7a6b1e12a267f4", + "2139e7a11a564465b6d3dab42d7979c3", + "dac8c067b12a483cb2cacf5984ab49e6", + "39d8a1b664ff4a03b015107cf55f53bf", + "dd482220d6b448c0b9da1fdce0269cf7", + "d617604ab51645e5b2140e608f15056e", + "850e12143fa3428492f1c642137ed68c", + "ce6d4e3919d248c8b4eef6090d9e68a6" + ] + }, + "id": "e5OwBRCzwU1X", + "outputId": "d7e962da-0a89-4796-dddc-31fb306054d9" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Parameter 'function'= of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.\n", + "WARNING:datasets.fingerprint:Parameter 'function'= of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5fe8c49187324ff5a9e7a1902a56e6c1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "preprocess: 0%| | 0/10 [00:00