Ailyth commited on
Commit
a73e40d
1 Parent(s): 9a85c9a

push_231002013754

Browse files
Files changed (1) hide show
  1. Bert_VITS2_Guide.ipynb +0 -323
Bert_VITS2_Guide.ipynb DELETED
@@ -1,323 +0,0 @@
1
- {
2
- "nbformat": 4,
3
- "nbformat_minor": 0,
4
- "metadata": {
5
- "colab": {
6
- "provenance": [],
7
- "authorship_tag": "ABX9TyP+PQNqeOew0Ap+hzVH7i8r",
8
- "include_colab_link": true
9
- },
10
- "kernelspec": {
11
- "name": "python3",
12
- "display_name": "Python 3"
13
- },
14
- "language_info": {
15
- "name": "python"
16
- }
17
- },
18
- "cells": [
19
- {
20
- "cell_type": "markdown",
21
- "metadata": {
22
- "id": "view-in-github",
23
- "colab_type": "text"
24
- },
25
- "source": [
26
- "<a href=\"https://colab.research.google.com/github/KevinWang676/Bert-VITS2-quick-start/blob/main/Bert_VITS2_Guide.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
27
- ]
28
- },
29
- {
30
- "cell_type": "markdown",
31
- "source": [
32
- "### 0. 如果使用AutoDL,请运行下载packages的加速代码:"
33
- ],
34
- "metadata": {
35
- "id": "CGg4SV4ObQaT"
36
- }
37
- },
38
- {
39
- "cell_type": "code",
40
- "execution_count": null,
41
- "metadata": {
42
- "id": "MgfAJzoHbK2-"
43
- },
44
- "outputs": [],
45
- "source": [
46
- "!source /etc/network_turbo\n",
47
- "!python -m pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt"
48
- ]
49
- },
50
- {
51
- "cell_type": "markdown",
52
- "source": [
53
- "### 1. 数据集重采样和标注"
54
- ],
55
- "metadata": {
56
- "id": "sloMn00-bgxY"
57
- }
58
- },
59
- {
60
- "cell_type": "code",
61
- "source": [
62
- "import subprocess\n",
63
- "import random\n",
64
- "import os\n",
65
- "from pathlib import Path\n",
66
- "import librosa\n",
67
- "from scipy.io import wavfile\n",
68
- "import numpy as np\n",
69
- "import torch\n",
70
- "import csv\n",
71
- "import whisper\n",
72
- "\n",
73
- "a=\"linghua\" # 请在这里修改说话人的名字,目前只支持中文语音\n",
74
- "\n",
75
- "def split_long_audio(model, filepaths, save_dir=\"data_dir\", out_sr=44100):\n",
76
- " if isinstance(filepaths, str):\n",
77
- " filepaths = [filepaths]\n",
78
- "\n",
79
- " for file_idx, filepath in enumerate(filepaths):\n",
80
- "\n",
81
- " save_path = Path(save_dir)\n",
82
- " save_path.mkdir(exist_ok=True, parents=True)\n",
83
- "\n",
84
- " print(f\"Transcribing file {file_idx}: '{filepath}' to segments...\")\n",
85
- " result = model.transcribe(filepath, word_timestamps=True, task=\"transcribe\", beam_size=5, best_of=5)\n",
86
- " segments = result['segments']\n",
87
- "\n",
88
- " wav, sr = librosa.load(filepath, sr=None, offset=0, duration=None, mono=True)\n",
89
- " wav, _ = librosa.effects.trim(wav, top_db=20)\n",
90
- " peak = np.abs(wav).max()\n",
91
- " if peak > 1.0:\n",
92
- " wav = 0.98 * wav / peak\n",
93
- " wav2 = librosa.resample(wav, orig_sr=sr, target_sr=out_sr)\n",
94
- " wav2 /= max(wav2.max(), -wav2.min())\n",
95
- "\n",
96
- " for i, seg in enumerate(segments):\n",
97
- " start_time = seg['start']\n",
98
- " end_time = seg['end']\n",
99
- " wav_seg = wav2[int(start_time * out_sr):int(end_time * out_sr)]\n",
100
- " wav_seg_name = f\"{a}_{i}.wav\" # 在上方可修改名字\n",
101
- " out_fpath = save_path / wav_seg_name\n",
102
- " wavfile.write(out_fpath, rate=out_sr, data=(wav_seg * np.iinfo(np.int16).max).astype(np.int16))"
103
- ],
104
- "metadata": {
105
- "id": "LtLBGhGCbYYh"
106
- },
107
- "execution_count": null,
108
- "outputs": []
109
- },
110
- {
111
- "cell_type": "code",
112
- "source": [
113
- "whisper_size = \"large\"\n",
114
- "whisper_model = whisper.load_model(whisper_size)"
115
- ],
116
- "metadata": {
117
- "id": "--wS7X95b--m"
118
- },
119
- "execution_count": null,
120
- "outputs": []
121
- },
122
- {
123
- "cell_type": "markdown",
124
- "source": [
125
- "### 请将下方的**linghua.wav**修改成自己的.wav文件名,路径./custom_character_voice/**linghua**/也可以改为自己的角色名\n"
126
- ],
127
- "metadata": {
128
- "id": "0wAE5HRXcCQ_"
129
- }
130
- },
131
- {
132
- "cell_type": "code",
133
- "source": [
134
- "split_long_audio(whisper_model, \"./linghua.wav\", \"./custom_character_voice/linghua/\")"
135
- ],
136
- "metadata": {
137
- "id": "3f7ljJhCcEbd"
138
- },
139
- "execution_count": null,
140
- "outputs": []
141
- },
142
- {
143
- "cell_type": "code",
144
- "source": [
145
- "!python short_audio_transcribe.py --languages \"C\" --whisper_size large"
146
- ],
147
- "metadata": {
148
- "id": "rBJDPe3ccVrP"
149
- },
150
- "execution_count": null,
151
- "outputs": []
152
- },
153
- {
154
- "cell_type": "markdown",
155
- "source": [
156
- "#### 处理完成后,可以打开\"./filelists/short_character_anno.list\"文件进行微调"
157
- ],
158
- "metadata": {
159
- "id": "4pesbcMjcikn"
160
- }
161
- },
162
- {
163
- "cell_type": "markdown",
164
- "source": [
165
- "### 2. 文本处理"
166
- ],
167
- "metadata": {
168
- "id": "9pxo4KL-ceGI"
169
- }
170
- },
171
- {
172
- "cell_type": "code",
173
- "source": [
174
- "!python preprocess_text.py"
175
- ],
176
- "metadata": {
177
- "id": "_xfO2r_0cgCT"
178
- },
179
- "execution_count": null,
180
- "outputs": []
181
- },
182
- {
183
- "cell_type": "markdown",
184
- "source": [
185
- "### 3. 运行bert_gen.py"
186
- ],
187
- "metadata": {
188
- "id": "DoDs7lL6cu01"
189
- }
190
- },
191
- {
192
- "cell_type": "code",
193
- "source": [
194
- "!python bert_gen.py"
195
- ],
196
- "metadata": {
197
- "id": "jyiT28B3cxWX"
198
- },
199
- "execution_count": null,
200
- "outputs": []
201
- },
202
- {
203
- "cell_type": "markdown",
204
- "source": [
205
- "### 4. 训练"
206
- ],
207
- "metadata": {
208
- "id": "dHQPDFdbc04g"
209
- }
210
- },
211
- {
212
- "cell_type": "markdown",
213
- "source": [
214
- "#### 可以在\"./configs/config.json\"更改训练参数,包括epoch,学习率等"
215
- ],
216
- "metadata": {
217
- "id": "gHNws-IUc6Sd"
218
- }
219
- },
220
- {
221
- "cell_type": "code",
222
- "source": [
223
- "cd monotonic_align"
224
- ],
225
- "metadata": {
226
- "id": "S56s0emH8BqN"
227
- },
228
- "execution_count": null,
229
- "outputs": []
230
- },
231
- {
232
- "cell_type": "code",
233
- "source": [
234
- "!python setup.py build_ext --inplace"
235
- ],
236
- "metadata": {
237
- "id": "6rLsyel-8KKc"
238
- },
239
- "execution_count": null,
240
- "outputs": []
241
- },
242
- {
243
- "cell_type": "code",
244
- "source": [
245
- "cd .."
246
- ],
247
- "metadata": {
248
- "id": "mUgA6ho2-XAN"
249
- },
250
- "execution_count": null,
251
- "outputs": []
252
- },
253
- {
254
- "cell_type": "markdown",
255
- "source": [
256
- "#### 若为首次训练,请运行:"
257
- ],
258
- "metadata": {
259
- "id": "8vJ6VF__dCYW"
260
- }
261
- },
262
- {
263
- "cell_type": "code",
264
- "source": [
265
- "!python train_ms.py -c ./configs/config.json"
266
- ],
267
- "metadata": {
268
- "id": "iwHCWVijc5h6"
269
- },
270
- "execution_count": null,
271
- "outputs": []
272
- },
273
- {
274
- "cell_type": "markdown",
275
- "source": [
276
- "#### 若为继续训练,请运行:"
277
- ],
278
- "metadata": {
279
- "id": "skAGULw2dKXW"
280
- }
281
- },
282
- {
283
- "cell_type": "code",
284
- "source": [
285
- "!python train_ms.py -c ./configs/config.json --cont"
286
- ],
287
- "metadata": {
288
- "id": "Ru09Gmavc2t4"
289
- },
290
- "execution_count": null,
291
- "outputs": []
292
- },
293
- {
294
- "cell_type": "markdown",
295
- "source": [
296
- "### 5. 推理"
297
- ],
298
- "metadata": {
299
- "id": "IinmucfadVLU"
300
- }
301
- },
302
- {
303
- "cell_type": "markdown",
304
- "source": [
305
- "#### 请将下方的**G_lastest.pth**修改为最新的模型文件,如**G_3400.pth**"
306
- ],
307
- "metadata": {
308
- "id": "psBRLH_TdZDb"
309
- }
310
- },
311
- {
312
- "cell_type": "code",
313
- "source": [
314
- "!python inference_webui.py --model_dir ./logs/OUTPUT_MODEL/G_latest.pth"
315
- ],
316
- "metadata": {
317
- "id": "lOWVtUgMdUZa"
318
- },
319
- "execution_count": null,
320
- "outputs": []
321
- }
322
- ]
323
- }