Spaces:
Runtime error
Runtime error
File size: 4,050 Bytes
4e9cd67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"id": "7c7beac1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:root:Loaded checkpoint 'logs/48k/G_168000.pth' (iteration 52)\n"
]
}
],
"source": [
"import io\n",
"import logging\n",
"import time\n",
"from pathlib import Path\n",
"\n",
"import librosa\n",
"import numpy as np\n",
"import soundfile\n",
"import IPython.display as ipd\n",
"from inference import infer_tool\n",
"from inference import slicer\n",
"from inference.infer_tool import Svc\n",
"\n",
"logging.getLogger('numba').setLevel(logging.WARNING)\n",
"chunks_dict = infer_tool.read_temp(\"inference/chunks_temp.json\")\n",
"\n",
"model_path = \"logs/48k/G_168000.pth\"\n",
"config_path = \"configs/config.json\"\n",
"svc_model = Svc(model_path, config_path)\n",
"infer_tool.mkdir([\"raw\", \"results\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b8a69a25",
"metadata": {},
"outputs": [],
"source": [
"# 支持多个wav文件,放在raw文件夹下\n",
"clean_names = [\"7_1\"]\n",
"trans = [2] # 音高调整,支持正负(半音)\n",
"spk_list = ['钟离'] # 每次同时合成多语者音色\n",
"slice_db = -40 # 默认-40,嘈杂的音频可以-30,干声保留呼吸可以-50\n",
"wav_format = 'flac' # 音频输出格式\n",
"\n",
"infer_tool.fill_a_to_b(trans, clean_names)\n",
"for clean_name, tran in zip(clean_names, trans):\n",
" raw_audio_path = f\"raw/{clean_name}\"\n",
" if \".\" not in raw_audio_path:\n",
" raw_audio_path += \".wav\"\n",
" infer_tool.format_wav(raw_audio_path)\n",
" wav_path = Path(raw_audio_path).with_suffix('.wav')\n",
" audio, sr = librosa.load(wav_path, mono=True, sr=None)\n",
" wav_hash = infer_tool.get_md5(audio)\n",
" if wav_hash in chunks_dict.keys():\n",
" print(\"load chunks from temp\")\n",
" chunks = chunks_dict[wav_hash][\"chunks\"]\n",
" else:\n",
" chunks = slicer.cut(wav_path, db_thresh=slice_db)\n",
" print(chunks)\n",
" chunks_dict[wav_hash] = {\"chunks\": chunks, \"time\": int(time.time())}\n",
" infer_tool.write_temp(\"inference/chunks_temp.json\", chunks_dict)\n",
" audio_data, audio_sr = slicer.chunks2audio(wav_path, chunks)\n",
"\n",
" for spk in spk_list:\n",
" audio = []\n",
" for (slice_tag, data) in audio_data:\n",
" print(f'#=====segment start, {round(len(data) / audio_sr, 3)}s======')\n",
" length = int(np.ceil(len(data) / audio_sr * svc_model.target_sample))\n",
" raw_path = io.BytesIO()\n",
" soundfile.write(raw_path, data, audio_sr, format=\"wav\")\n",
" raw_path.seek(0)\n",
" if slice_tag:\n",
" print('jump empty segment')\n",
" _audio = np.zeros(length)\n",
" else:\n",
" out_audio, out_sr = svc_model.infer(spk, tran, raw_path)\n",
" _audio = out_audio.cpu().numpy()\n",
" audio.extend(list(_audio))\n",
"\n",
" res_path = f'./results/{clean_name}_{tran}key_{spk}.{wav_format}'\n",
" soundfile.write(res_path, audio, svc_model.target_sample, format=wav_format)\n",
"ipd.display(ipd.Audio(audio, rate=audio_sr, normalize=False))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|