text_to_speech
#20
by
sarathkumarhubshell
- opened
- README.md +11 -23
- config.json +1 -1
README.md
CHANGED
@@ -16,13 +16,13 @@ language:
|
|
16 |
thumbnail: >-
|
17 |
https://user-images.githubusercontent.com/5068315/230698495-cbb1ced9-c911-4c9a-941d-a1a4a1286ac6.png
|
18 |
library: bark
|
19 |
-
license:
|
20 |
tags:
|
21 |
- bark
|
22 |
- audio
|
23 |
- text-to-speech
|
24 |
pipeline_tag: text-to-speech
|
25 |
-
inference:
|
26 |
---
|
27 |
|
28 |
# Bark
|
@@ -69,35 +69,23 @@ Try out Bark yourself!
|
|
69 |
|
70 |
## π€ Transformers Usage
|
71 |
|
|
|
72 |
You can run Bark locally with the π€ Transformers library from version 4.31.0 onwards.
|
73 |
|
74 |
-
1. First install the π€ [Transformers library](https://github.com/huggingface/transformers)
|
75 |
|
76 |
```
|
77 |
-
pip install
|
78 |
-
pip install --upgrade transformers scipy
|
79 |
-
```
|
80 |
-
|
81 |
-
2. Run inference via the `Text-to-Speech` (TTS) pipeline. You can infer the bark model via the TTS pipeline in just a few lines of code!
|
82 |
-
|
83 |
-
```python
|
84 |
-
from transformers import pipeline
|
85 |
-
import scipy
|
86 |
-
|
87 |
-
synthesiser = pipeline("text-to-speech", "suno/bark")
|
88 |
-
|
89 |
-
speech = synthesiser("Hello, my dog is cooler than you!", forward_params={"do_sample": True})
|
90 |
-
|
91 |
-
scipy.io.wavfile.write("bark_out.wav", rate=speech["sampling_rate"], data=speech["audio"])
|
92 |
```
|
93 |
|
94 |
-
|
95 |
|
96 |
```python
|
97 |
from transformers import AutoProcessor, AutoModel
|
98 |
|
99 |
-
|
100 |
-
|
|
|
101 |
|
102 |
inputs = processor(
|
103 |
text=["Hello, my name is Suno. And, uh β and I like pizza. [laughs] But I also have other interests such as playing tic tac toe."],
|
@@ -107,7 +95,7 @@ inputs = processor(
|
|
107 |
speech_values = model.generate(**inputs, do_sample=True)
|
108 |
```
|
109 |
|
110 |
-
|
111 |
|
112 |
```python
|
113 |
from IPython.display import Audio
|
@@ -133,7 +121,7 @@ You can also run Bark locally through the original [Bark library]((https://githu
|
|
133 |
|
134 |
1. First install the [`bark` library](https://github.com/suno-ai/bark)
|
135 |
|
136 |
-
|
137 |
|
138 |
```python
|
139 |
from bark import SAMPLE_RATE, generate_audio, preload_models
|
|
|
16 |
thumbnail: >-
|
17 |
https://user-images.githubusercontent.com/5068315/230698495-cbb1ced9-c911-4c9a-941d-a1a4a1286ac6.png
|
18 |
library: bark
|
19 |
+
license: cc-by-nc-4.0
|
20 |
tags:
|
21 |
- bark
|
22 |
- audio
|
23 |
- text-to-speech
|
24 |
pipeline_tag: text-to-speech
|
25 |
+
inference: false
|
26 |
---
|
27 |
|
28 |
# Bark
|
|
|
69 |
|
70 |
## π€ Transformers Usage
|
71 |
|
72 |
+
|
73 |
You can run Bark locally with the π€ Transformers library from version 4.31.0 onwards.
|
74 |
|
75 |
+
1. First install the π€ [Transformers library](https://github.com/huggingface/transformers) from main:
|
76 |
|
77 |
```
|
78 |
+
pip install git+https://github.com/huggingface/transformers.git
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
```
|
80 |
|
81 |
+
2. Run the following Python code to generate speech samples:
|
82 |
|
83 |
```python
|
84 |
from transformers import AutoProcessor, AutoModel
|
85 |
|
86 |
+
|
87 |
+
processor = AutoProcessor.from_pretrained("suno/bark-small")
|
88 |
+
model = AutoModel.from_pretrained("suno/bark-small")
|
89 |
|
90 |
inputs = processor(
|
91 |
text=["Hello, my name is Suno. And, uh β and I like pizza. [laughs] But I also have other interests such as playing tic tac toe."],
|
|
|
95 |
speech_values = model.generate(**inputs, do_sample=True)
|
96 |
```
|
97 |
|
98 |
+
3. Listen to the speech samples either in an ipynb notebook:
|
99 |
|
100 |
```python
|
101 |
from IPython.display import Audio
|
|
|
121 |
|
122 |
1. First install the [`bark` library](https://github.com/suno-ai/bark)
|
123 |
|
124 |
+
3. Run the following Python code:
|
125 |
|
126 |
```python
|
127 |
from bark import SAMPLE_RATE, generate_audio, preload_models
|
config.json
CHANGED
@@ -80,7 +80,7 @@
|
|
80 |
"use_cache": true
|
81 |
},
|
82 |
"codec_config": {
|
83 |
-
"_name_or_path": "
|
84 |
"add_cross_attention": false,
|
85 |
"architectures": [
|
86 |
"EncodecModel"
|
|
|
80 |
"use_cache": true
|
81 |
},
|
82 |
"codec_config": {
|
83 |
+
"_name_or_path": "ArthurZ/encodec_24khz",
|
84 |
"add_cross_attention": false,
|
85 |
"architectures": [
|
86 |
"EncodecModel"
|