antoniomae
commited on
Commit
•
bbb0b42
1
Parent(s):
513c7c5
f291528ef840e0812fc4459f0c0be17a01952a1e
Browse files- README.md +11 -1
- app.py +34 -0
- gitattributes +35 -0
- hubconf.py +46 -0
- requirements.dev.txt +5 -0
- requirements.ja.txt +5 -0
- requirements.notebooks.txt +0 -0
- requirements.txt +57 -0
- setup.py +141 -0
README.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1 |
---
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Voice Clone
|
3 |
+
emoji: 🏃
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: blue
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 4.5.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
---
|
12 |
+
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import subprocess
|
2 |
+
|
3 |
+
# Run the setup.py install command
|
4 |
+
try:
|
5 |
+
subprocess.run(['python', 'setup.py', 'install', '--user'], check=True)
|
6 |
+
print("Installation successful.")
|
7 |
+
except subprocess.CalledProcessError as e:
|
8 |
+
print(f"Installation failed with error: {e}")
|
9 |
+
|
10 |
+
import gradio as gr
|
11 |
+
import torch
|
12 |
+
from TTS.api import TTS
|
13 |
+
|
14 |
+
# Get device
|
15 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
16 |
+
|
17 |
+
# Init TTS
|
18 |
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
19 |
+
|
20 |
+
def voice_clone(text: str, speaker_wav: str, language: str):
|
21 |
+
# Run TTS
|
22 |
+
print("Speaker wav:", speaker_wav)
|
23 |
+
tts.tts_to_file(text=text, speaker_wav=speaker_wav, language=language, file_path="output.mp3")
|
24 |
+
return "output.mp3"
|
25 |
+
|
26 |
+
iface = gr.Interface(fn=voice_clone,
|
27 |
+
inputs=[gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"),
|
28 |
+
gr.Audio(type="filepath", label="Upload audio file"),
|
29 |
+
gr.Radio(['ru', 'en', 'zh-cn', 'ja', 'de', 'fr', 'it', 'pt', 'pl', 'tr', 'ko', 'nl', 'cs', 'ar', 'es', 'hu'], label="language"),
|
30 |
+
],
|
31 |
+
outputs=gr.Audio(type="filepath", label="Generated audio file"),
|
32 |
+
title="Voice Cloning")
|
33 |
+
|
34 |
+
iface.launch()
|
gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
hubconf.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dependencies = [
|
2 |
+
'torch', 'gdown', 'pysbd', 'gruut', 'anyascii', 'pypinyin', 'coqpit', 'mecab-python3', 'unidic-lite'
|
3 |
+
]
|
4 |
+
import torch
|
5 |
+
|
6 |
+
from TTS.utils.manage import ModelManager
|
7 |
+
from TTS.utils.synthesizer import Synthesizer
|
8 |
+
|
9 |
+
|
10 |
+
def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA',
|
11 |
+
vocoder_name=None,
|
12 |
+
use_cuda=False):
|
13 |
+
"""TTS entry point for PyTorch Hub that provides a Synthesizer object to synthesize speech from a give text.
|
14 |
+
|
15 |
+
Example:
|
16 |
+
>>> synthesizer = torch.hub.load('coqui-ai/TTS', 'tts', source='github')
|
17 |
+
>>> wavs = synthesizer.tts("This is a test! This is also a test!!")
|
18 |
+
wavs - is a list of values of the synthesized speech.
|
19 |
+
|
20 |
+
Args:
|
21 |
+
model_name (str, optional): One of the model names from .model.json. Defaults to 'tts_models/en/ljspeech/tacotron2-DCA'.
|
22 |
+
vocoder_name (str, optional): One of the model names from .model.json. Defaults to 'vocoder_models/en/ljspeech/multiband-melgan'.
|
23 |
+
pretrained (bool, optional): [description]. Defaults to True.
|
24 |
+
|
25 |
+
Returns:
|
26 |
+
TTS.utils.synthesizer.Synthesizer: Synthesizer object wrapping both vocoder and tts models.
|
27 |
+
"""
|
28 |
+
manager = ModelManager()
|
29 |
+
|
30 |
+
model_path, config_path, model_item = manager.download_model(model_name)
|
31 |
+
vocoder_name = model_item[
|
32 |
+
'default_vocoder'] if vocoder_name is None else vocoder_name
|
33 |
+
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
|
34 |
+
|
35 |
+
# create synthesizer
|
36 |
+
synt = Synthesizer(tts_checkpoint=model_path,
|
37 |
+
tts_config_path=config_path,
|
38 |
+
vocoder_checkpoint=vocoder_path,
|
39 |
+
vocoder_config=vocoder_config_path,
|
40 |
+
use_cuda=use_cuda)
|
41 |
+
return synt
|
42 |
+
|
43 |
+
|
44 |
+
if __name__ == '__main__':
|
45 |
+
synthesizer = torch.hub.load('coqui-ai/TTS:dev', 'tts', source='github')
|
46 |
+
synthesizer.tts("This is a test!")
|
requirements.dev.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
black
|
2 |
+
coverage
|
3 |
+
isort
|
4 |
+
nose2
|
5 |
+
pylint==2.10.2
|
requirements.ja.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# These cause some compatibility issues on some systems and are not strictly necessary
|
2 |
+
# japanese g2p deps
|
3 |
+
mecab-python3==1.0.6
|
4 |
+
unidic-lite==1.0.8
|
5 |
+
cutlet
|
requirements.notebooks.txt
ADDED
File without changes
|
requirements.txt
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# core deps
|
2 |
+
numpy==1.22.0;python_version<="3.10"
|
3 |
+
numpy>=1.24.3;python_version>"3.10"
|
4 |
+
cython>=0.29.30
|
5 |
+
scipy>=1.11.2
|
6 |
+
torch>=2.1
|
7 |
+
torchaudio
|
8 |
+
soundfile>=0.12.0
|
9 |
+
librosa>=0.10.0
|
10 |
+
scikit-learn>=1.3.0
|
11 |
+
numba==0.55.1;python_version<"3.9"
|
12 |
+
numba>=0.57.0;python_version>="3.9"
|
13 |
+
inflect>=5.6.0
|
14 |
+
tqdm>=4.64.1
|
15 |
+
anyascii>=0.3.0
|
16 |
+
pyyaml>=6.0
|
17 |
+
fsspec>=2023.6.0 # <= 2023.9.1 makes aux tests fail
|
18 |
+
aiohttp>=3.8.1
|
19 |
+
packaging>=23.1
|
20 |
+
# deps for examples
|
21 |
+
flask>=2.0.1
|
22 |
+
# deps for inference
|
23 |
+
pysbd>=0.3.4
|
24 |
+
# deps for notebooks
|
25 |
+
umap-learn>=0.5.1
|
26 |
+
pandas>=1.4,<2.0
|
27 |
+
# deps for training
|
28 |
+
matplotlib>=3.7.0
|
29 |
+
# coqui stack
|
30 |
+
trainer>=0.0.32
|
31 |
+
# config management
|
32 |
+
coqpit>=0.0.16
|
33 |
+
# chinese g2p deps
|
34 |
+
jieba
|
35 |
+
pypinyin
|
36 |
+
# korean
|
37 |
+
hangul_romanize
|
38 |
+
# gruut+supported langs
|
39 |
+
gruut[de,es,fr]==2.2.3
|
40 |
+
# deps for korean
|
41 |
+
jamo
|
42 |
+
nltk
|
43 |
+
g2pkk>=0.1.1
|
44 |
+
# deps for bangla
|
45 |
+
bangla
|
46 |
+
bnnumerizer
|
47 |
+
bnunicodenormalizer
|
48 |
+
#deps for tortoise
|
49 |
+
einops>=0.6.0
|
50 |
+
transformers>=4.33.0
|
51 |
+
#deps for bark
|
52 |
+
encodec>=0.1.1
|
53 |
+
# deps for XTTS
|
54 |
+
unidecode>=1.3.2
|
55 |
+
num2words
|
56 |
+
spacy[ja]>=3
|
57 |
+
gradio
|
setup.py
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# ,*++++++*, ,*++++++*,
|
3 |
+
# *++. .+++ *++. .++*
|
4 |
+
# *+* ,++++* *+* *+* ,++++, *+*
|
5 |
+
# ,+, .++++++++++* ,++,,,,*+, ,++++++++++. *+,
|
6 |
+
# *+. .++++++++++++..++ *+.,++++++++++++. .+*
|
7 |
+
# .+* ++++++++++++.*+, .+*.++++++++++++ *+,
|
8 |
+
# .++ *++++++++* ++, .++.*++++++++* ++,
|
9 |
+
# ,+++*. . .*++, ,++*. .*+++*
|
10 |
+
# *+, .,*++**. .**++**. ,+*
|
11 |
+
# .+* *+,
|
12 |
+
# *+. Coqui .+*
|
13 |
+
# *+* +++ TTS +++ *+*
|
14 |
+
# .+++*. . . *+++.
|
15 |
+
# ,+* *+++*... ...*+++* *+,
|
16 |
+
# .++. .""""+++++++****+++++++"""". ++.
|
17 |
+
# ,++. .++,
|
18 |
+
# .++* *++.
|
19 |
+
# *+++, ,+++*
|
20 |
+
# .,*++++::::::++++*,.
|
21 |
+
# ``````
|
22 |
+
|
23 |
+
import os
|
24 |
+
import subprocess
|
25 |
+
import sys
|
26 |
+
from packaging.version import Version
|
27 |
+
|
28 |
+
import numpy
|
29 |
+
import setuptools.command.build_py
|
30 |
+
import setuptools.command.develop
|
31 |
+
from Cython.Build import cythonize
|
32 |
+
from setuptools import Extension, find_packages, setup
|
33 |
+
|
34 |
+
python_version = sys.version.split()[0]
|
35 |
+
if Version(python_version) < Version("3.9") or Version(python_version) >= Version("3.12"):
|
36 |
+
raise RuntimeError("TTS requires python >= 3.9 and < 3.12 " "but your Python version is {}".format(sys.version))
|
37 |
+
|
38 |
+
|
39 |
+
cwd = os.path.dirname(os.path.abspath(__file__))
|
40 |
+
with open(os.path.join(cwd, "TTS", "VERSION")) as fin:
|
41 |
+
version = fin.read().strip()
|
42 |
+
|
43 |
+
|
44 |
+
class build_py(setuptools.command.build_py.build_py): # pylint: disable=too-many-ancestors
|
45 |
+
def run(self):
|
46 |
+
setuptools.command.build_py.build_py.run(self)
|
47 |
+
|
48 |
+
|
49 |
+
class develop(setuptools.command.develop.develop):
|
50 |
+
def run(self):
|
51 |
+
setuptools.command.develop.develop.run(self)
|
52 |
+
|
53 |
+
|
54 |
+
# The documentation for this feature is in server/README.md
|
55 |
+
package_data = ["TTS/server/templates/*"]
|
56 |
+
|
57 |
+
|
58 |
+
def pip_install(package_name):
|
59 |
+
subprocess.call([sys.executable, "-m", "pip", "install", package_name])
|
60 |
+
|
61 |
+
|
62 |
+
requirements = open(os.path.join(cwd, "requirements.txt"), "r").readlines()
|
63 |
+
with open(os.path.join(cwd, "requirements.notebooks.txt"), "r") as f:
|
64 |
+
requirements_notebooks = f.readlines()
|
65 |
+
with open(os.path.join(cwd, "requirements.dev.txt"), "r") as f:
|
66 |
+
requirements_dev = f.readlines()
|
67 |
+
with open(os.path.join(cwd, "requirements.ja.txt"), "r") as f:
|
68 |
+
requirements_ja = f.readlines()
|
69 |
+
requirements_all = requirements_dev + requirements_notebooks + requirements_ja
|
70 |
+
|
71 |
+
with open("README.md", "r", encoding="utf-8") as readme_file:
|
72 |
+
README = readme_file.read()
|
73 |
+
|
74 |
+
exts = [
|
75 |
+
Extension(
|
76 |
+
name="TTS.tts.utils.monotonic_align.core",
|
77 |
+
sources=["TTS/tts/utils/monotonic_align/core.pyx"],
|
78 |
+
)
|
79 |
+
]
|
80 |
+
setup(
|
81 |
+
name="TTS",
|
82 |
+
version=version,
|
83 |
+
url="https://github.com/coqui-ai/TTS",
|
84 |
+
author="Eren Gölge",
|
85 |
+
author_email="[email protected]",
|
86 |
+
description="Deep learning for Text to Speech by Coqui.",
|
87 |
+
long_description=README,
|
88 |
+
long_description_content_type="text/markdown",
|
89 |
+
license="MPL-2.0",
|
90 |
+
# cython
|
91 |
+
include_dirs=numpy.get_include(),
|
92 |
+
ext_modules=cythonize(exts, language_level=3),
|
93 |
+
# ext_modules=find_cython_extensions(),
|
94 |
+
# package
|
95 |
+
include_package_data=True,
|
96 |
+
packages=find_packages(include=["TTS"], exclude=["*.tests", "*tests.*", "tests.*", "*tests", "tests"]),
|
97 |
+
package_data={
|
98 |
+
"TTS": [
|
99 |
+
"VERSION",
|
100 |
+
]
|
101 |
+
},
|
102 |
+
project_urls={
|
103 |
+
"Documentation": "https://github.com/coqui-ai/TTS/wiki",
|
104 |
+
"Tracker": "https://github.com/coqui-ai/TTS/issues",
|
105 |
+
"Repository": "https://github.com/coqui-ai/TTS",
|
106 |
+
"Discussions": "https://github.com/coqui-ai/TTS/discussions",
|
107 |
+
},
|
108 |
+
cmdclass={
|
109 |
+
"build_py": build_py,
|
110 |
+
"develop": develop,
|
111 |
+
# 'build_ext': build_ext
|
112 |
+
},
|
113 |
+
install_requires=requirements,
|
114 |
+
extras_require={
|
115 |
+
"all": requirements_all,
|
116 |
+
"dev": requirements_dev,
|
117 |
+
"notebooks": requirements_notebooks,
|
118 |
+
"ja": requirements_ja,
|
119 |
+
},
|
120 |
+
python_requires=">=3.9.0, <3.12",
|
121 |
+
entry_points={"console_scripts": ["tts=TTS.bin.synthesize:main", "tts-server = TTS.server.server:main"]},
|
122 |
+
classifiers=[
|
123 |
+
"Programming Language :: Python",
|
124 |
+
"Programming Language :: Python :: 3",
|
125 |
+
"Programming Language :: Python :: 3.9",
|
126 |
+
"Programming Language :: Python :: 3.10",
|
127 |
+
"Programming Language :: Python :: 3.11",
|
128 |
+
"Development Status :: 3 - Alpha",
|
129 |
+
"Intended Audience :: Science/Research",
|
130 |
+
"Intended Audience :: Developers",
|
131 |
+
"Operating System :: POSIX :: Linux",
|
132 |
+
"License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
|
133 |
+
"Topic :: Software Development",
|
134 |
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
135 |
+
"Topic :: Multimedia :: Sound/Audio :: Speech",
|
136 |
+
"Topic :: Multimedia :: Sound/Audio",
|
137 |
+
"Topic :: Multimedia",
|
138 |
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
139 |
+
],
|
140 |
+
zip_safe=False,
|
141 |
+
)
|