GlitchGhost commited on
Commit
ffd1d7c
Β·
verified Β·
1 Parent(s): 31aa844

Upload 5 files

Browse files
Files changed (5) hide show
  1. README.md +6 -4
  2. app.py +67 -0
  3. gitattributes +36 -0
  4. gitignore +1 -0
  5. requirements.txt +3 -0
README.md CHANGED
@@ -1,12 +1,14 @@
1
  ---
2
  title: Voice Clone
3
- emoji: 🌍
4
- colorFrom: gray
5
- colorTo: red
6
  sdk: streamlit
7
- sdk_version: 1.41.1
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Voice Clone
3
+ emoji: πŸ“‰
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: streamlit
7
+ sdk_version: 1.39.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ short_description: Voice cloning model
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import os
3
+ import streamlit as st
4
+ from TTS.api import TTS
5
+ from tempfile import NamedTemporaryFile
6
+
7
+ os.environ["COQUI_TOS_AGREED"] = "1"
8
+
9
+ def generate_audio(audio_file, text_input):
10
+ # Initialize model
11
+ model = "tts_models/multilingual/multi-dataset/xtts_v2"
12
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
13
+ tts = TTS(model).to(device)
14
+
15
+ with NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
16
+ output_path = tmp_file.name
17
+ tts.tts_to_file(text=text_input, speaker_wav=audio_file, language='en', file_path=output_path)
18
+
19
+ return output_path
20
+
21
+ def main():
22
+ # Title
23
+ title = f"""<h1 align="center" style="font-size: 2rem";>Voice Clone</h1>"""
24
+ st.markdown(title, unsafe_allow_html=True)
25
+
26
+ # Subtitle
27
+ title = f"""<h2 align="center" style="font-size: 1.2rem; margin-bottom: 2rem;">Make your favorite characters say anything!</h2>"""
28
+ st.markdown(title, unsafe_allow_html=True)
29
+
30
+ sample_files = {
31
+ '': '',
32
+ 'Stewie Griffin': 'sample_inputs/stewie.wav',
33
+ 'Donald Trump': 'sample_inputs/trump.wav',
34
+ 'Joe Rogan': 'sample_inputs/rogan.wav'
35
+ }
36
+
37
+ # Upload audio file
38
+ uploaded_file = st.file_uploader('Add an audio (.wav) file of the voice you want to clone...', type=['wav'])
39
+
40
+ if uploaded_file is None:
41
+ selected_sample = st.selectbox('Or choose a sample:', list(sample_files.keys()))
42
+
43
+ speaker_file = uploaded_file if uploaded_file is not None else sample_files[selected_sample]
44
+
45
+ if speaker_file:
46
+ st.header('Reference Audio')
47
+ st.audio(speaker_file, format='audio/wav')
48
+
49
+ # Input text
50
+ text_input = st.text_area('What do you want your character to say? Try to keep the prompt around 2 sentences.')
51
+
52
+ if st.button('Synthesize'):
53
+ if text_input:
54
+ try:
55
+ with st.spinner('Synthesizing...'):
56
+ output_path = generate_audio(speaker_file, text_input)
57
+
58
+ st.header('Synthesized Audio')
59
+ st.audio(output_path, format='audio/wav')
60
+ except:
61
+ st.error('There was an issue synthesizing the text. Please check the input and try again. Try to keep the input around 2 sentences, and less than 200 characters.')
62
+ else:
63
+ st.error('Please provide a text input!')
64
+
65
+ if __name__ == '__main__':
66
+ main()
67
+
gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.wav filter=lfs diff=lfs merge=lfs -text
gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .voice_env
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ coqui_tts==0.24.2
2
+ streamlit==1.39.0
3
+ torch==2.5.1