Spaces:
Runtime error
Runtime error
lhzstar
commited on
Commit
·
436ce71
1
Parent(s):
d2b6583
new commits
Browse files- .gitignore +3 -1
- app.py +82 -102
- celebbot.py +6 -6
- data.json +0 -0
- embeds/Adele.npy +0 -0
- embeds/Barack_Obama.npy +0 -0
- requirements.txt +2 -1
- rtvc/synthesizer/utils/cleaners.py +1 -0
- run_tts.py +6 -7
- utils.py +57 -2
.gitignore
CHANGED
@@ -20,4 +20,6 @@ launch.json
|
|
20 |
*.m4a
|
21 |
*.csv
|
22 |
input_audios/
|
23 |
-
syn_results/
|
|
|
|
|
|
20 |
*.m4a
|
21 |
*.csv
|
22 |
input_audios/
|
23 |
+
syn_results/
|
24 |
+
falcon-7b-instruct/
|
25 |
+
flan-t5-large/
|
app.py
CHANGED
@@ -1,54 +1,19 @@
|
|
1 |
from celebbot import CelebBot
|
2 |
import streamlit as st
|
3 |
-
import
|
4 |
-
import spacy
|
5 |
-
import json
|
6 |
-
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
|
7 |
from utils import *
|
8 |
|
9 |
|
10 |
-
@st.cache_resource
|
11 |
-
def get_seq2seq_model(model_id):
|
12 |
-
return AutoModelForSeq2SeqLM.from_pretrained(model_id)
|
13 |
-
|
14 |
-
@st.cache_resource
|
15 |
-
def get_auto_model(model_id):
|
16 |
-
return AutoModel.from_pretrained(model_id)
|
17 |
-
|
18 |
-
@st.cache_resource
|
19 |
-
def get_tokenizer(model_id):
|
20 |
-
return AutoTokenizer.from_pretrained(model_id)
|
21 |
-
|
22 |
-
@st.cache_data
|
23 |
-
def get_celeb_data(fpath):
|
24 |
-
with open(fpath) as json_file:
|
25 |
-
return json.load(json_file)
|
26 |
-
|
27 |
-
@st.cache_resource
|
28 |
-
def preprocess_text(name, gender, text, model_id):
|
29 |
-
lname = name.split(" ")[-1]
|
30 |
-
lname_regex = re.compile(rf'\b({lname})\b')
|
31 |
-
name_regex = re.compile(rf'\b({name})\b')
|
32 |
-
lnames = lname+"’s" if not lname.endswith("s") else lname+"’"
|
33 |
-
lnames_regex = re.compile(rf'\b({lnames})\b')
|
34 |
-
names = name+"’s" if not name.endswith("s") else name+"’"
|
35 |
-
names_regex = re.compile(rf'\b({names})\b')
|
36 |
-
if gender == "M":
|
37 |
-
text = re.sub(he_regex, "I", text)
|
38 |
-
text = re.sub(his_regex, "my", text)
|
39 |
-
elif gender == "F":
|
40 |
-
text = re.sub(she_regex, "I", text)
|
41 |
-
text = re.sub(her_regex, "my", text)
|
42 |
-
text = re.sub(names_regex, "my", text)
|
43 |
-
text = re.sub(lnames_regex, "my", text)
|
44 |
-
text = re.sub(name_regex, "I", text)
|
45 |
-
text = re.sub(lname_regex, "I", text)
|
46 |
-
spacy_model = spacy.load(model_id)
|
47 |
-
texts = [i.text.strip() for i in spacy_model(text).sents]
|
48 |
-
return spacy_model, texts
|
49 |
-
|
50 |
def main():
|
|
|
51 |
hide_footer()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
if "messages" not in st.session_state:
|
53 |
st.session_state["messages"] = []
|
54 |
if "QA_model_path" not in st.session_state:
|
@@ -57,65 +22,80 @@ def main():
|
|
57 |
st.session_state["sentTr_model_path"] = "sentence-transformers/all-mpnet-base-v2"
|
58 |
if "start_chat" not in st.session_state:
|
59 |
st.session_state["start_chat"] = False
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
st.
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
# Add assistant response to chat history
|
102 |
-
response = celeb_bot.question_answer()
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
|
121 |
if __name__ == "__main__":
|
|
|
1 |
from celebbot import CelebBot
|
2 |
import streamlit as st
|
3 |
+
from streamlit_mic_recorder import speech_to_text
|
|
|
|
|
|
|
4 |
from utils import *
|
5 |
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
def main():
|
8 |
+
|
9 |
hide_footer()
|
10 |
+
model_list = ["flan-t5-large", "flan-t5-xl", "Falcon-7b-instruct"]
|
11 |
+
celeb_data = get_celeb_data(f'data.json')
|
12 |
+
|
13 |
+
st.sidebar.header("CelebChat")
|
14 |
+
expander = st.sidebar.expander('About the app')
|
15 |
+
with expander:
|
16 |
+
st.markdown("This app is a demo of celebrity chatting!")
|
17 |
if "messages" not in st.session_state:
|
18 |
st.session_state["messages"] = []
|
19 |
if "QA_model_path" not in st.session_state:
|
|
|
22 |
st.session_state["sentTr_model_path"] = "sentence-transformers/all-mpnet-base-v2"
|
23 |
if "start_chat" not in st.session_state:
|
24 |
st.session_state["start_chat"] = False
|
25 |
+
if "prompt" not in st.session_state:
|
26 |
+
st.session_state["prompt"] = None
|
27 |
+
|
28 |
+
def start_chat(name, model_id):
|
29 |
+
print(name, model_id)
|
30 |
+
if name != '' and model_id != '':
|
31 |
+
st.session_state["start_chat"] = True
|
32 |
+
else:
|
33 |
+
st.session_state["start_chat"] = False
|
34 |
+
|
35 |
+
with st.sidebar.form("my_form"):
|
36 |
+
print("enter form")
|
37 |
+
st.session_state["celeb_name"] = st.selectbox('Choose a celebrity', options=list(celeb_data.keys()))
|
38 |
+
model_id=st.selectbox("Choose Your Flan-T5 model",options=model_list)
|
39 |
+
st.session_state["QA_model_path"] = f"google/{model_id}" if "flan-t5" in model_id else model_id
|
40 |
+
|
41 |
+
st.form_submit_button(label="Start Chatting", on_click=start_chat, args=(st.session_state["celeb_name"], st.session_state["QA_model_path"]))
|
42 |
+
|
43 |
+
if st.session_state["start_chat"]:
|
44 |
+
celeb_gender = celeb_data[st.session_state["celeb_name"]]["gender"]
|
45 |
+
knowledge = celeb_data[st.session_state["celeb_name"]]["knowledge"]
|
46 |
+
st.session_state["celeb_bot"] = CelebBot(st.session_state["celeb_name"],
|
47 |
+
get_tokenizer(st.session_state["QA_model_path"]),
|
48 |
+
get_seq2seq_model(st.session_state["QA_model_path"]) if "flan-t5" in st.session_state["QA_model_path"] else get_causal_model(st.session_state["QA_model_path"]),
|
49 |
+
get_tokenizer(st.session_state["sentTr_model_path"]),
|
50 |
+
get_auto_model(st.session_state["sentTr_model_path"]),
|
51 |
+
*preprocess_text(st.session_state["celeb_name"], celeb_gender, knowledge, "en_core_web_sm")
|
52 |
+
)
|
53 |
+
|
54 |
+
dialogue_container = st.container()
|
55 |
+
with dialogue_container:
|
56 |
+
for message in st.session_state["messages"]:
|
57 |
+
with st.chat_message(message["role"]):
|
58 |
+
st.markdown(message["content"])
|
59 |
+
|
60 |
+
|
61 |
+
if "_last_audio_id" not in st.session_state:
|
62 |
+
st.session_state["_last_audio_id"] = 0
|
63 |
+
with st.sidebar:
|
64 |
+
prompt_from_audio =speech_to_text(start_prompt="Start Recording",stop_prompt="Stop Recording",language='en',use_container_width=True, just_once=True,key='STT')
|
65 |
+
prompt_from_text = st.text_input('Or write something')
|
|
|
|
|
66 |
|
67 |
+
if prompt_from_audio != None:
|
68 |
+
st.session_state["prompt"] = prompt_from_audio
|
69 |
+
elif prompt_from_text != None:
|
70 |
+
st.session_state["prompt"] = prompt_from_text
|
71 |
+
print(st.session_state["prompt"])
|
72 |
+
if st.session_state["prompt"] != None and st.session_state["prompt"] != '':
|
73 |
+
st.session_state["celeb_bot"].text = st.session_state["prompt"]
|
74 |
+
# Display user message in chat message container
|
75 |
+
with dialogue_container:
|
76 |
+
st.chat_message("user").markdown(st.session_state["prompt"])
|
77 |
+
# Add user message to chat history
|
78 |
+
st.session_state["messages"].append({"role": "user", "content": st.session_state["prompt"]})
|
79 |
+
|
80 |
+
# Add assistant response to chat history
|
81 |
+
response = st.session_state["celeb_bot"].question_answer()
|
82 |
+
|
83 |
+
# disable autoplay to play in HTML
|
84 |
+
b64 = st.session_state["celeb_bot"].text_to_speech(autoplay=False)
|
85 |
+
md = f"""
|
86 |
+
<p>{response}</p>
|
87 |
+
<audio controls autoplay style="display:none;">
|
88 |
+
<source src="data:audio/wav;base64,{b64}" type="audio/wav">
|
89 |
+
Your browser does not support the audio element.
|
90 |
+
</audio>
|
91 |
+
"""
|
92 |
+
with dialogue_container:
|
93 |
+
st.chat_message("assistant").markdown(
|
94 |
+
md,
|
95 |
+
unsafe_allow_html=True,
|
96 |
+
)
|
97 |
+
# Display assistant response in chat message container
|
98 |
+
st.session_state["messages"].append({"role": "assistant", "content": response})
|
99 |
|
100 |
|
101 |
if __name__ == "__main__":
|
celebbot.py
CHANGED
@@ -102,13 +102,13 @@ class CelebBot():
|
|
102 |
self.text = f"Hello I am {self.name} the AI, what can I do for you?"
|
103 |
## have a conversation
|
104 |
else:
|
105 |
-
|
106 |
-
|
107 |
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
query = f"{instruction1} [knowledge] {knowledge} [question] {self.text}
|
112 |
input_ids = self.QA_tokenizer(f"{query}", return_tensors="pt").input_ids
|
113 |
outputs = self.QA_model.generate(input_ids, max_length=1024)
|
114 |
self.text = self.QA_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
102 |
self.text = f"Hello I am {self.name} the AI, what can I do for you?"
|
103 |
## have a conversation
|
104 |
else:
|
105 |
+
if re.search(re.compile(rf'\b(you|your|{self.name})\b', flags=re.IGNORECASE), self.text) != None:
|
106 |
+
instruction1 = f'[Instruction] You are a celebrity named {self.name}. You need to answer the question based on knowledge and commonsense.'
|
107 |
|
108 |
+
knowledge = self.retrieve_knowledge_assertions()
|
109 |
+
else:
|
110 |
+
instruction1 = f'[Instruction] You need to answer the question based on commonsense.'
|
111 |
+
query = f"{instruction1} [knowledge] {knowledge} [question] {self.text}"
|
112 |
input_ids = self.QA_tokenizer(f"{query}", return_tensors="pt").input_ids
|
113 |
outputs = self.QA_model.generate(input_ids, max_length=1024)
|
114 |
self.text = self.QA_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
data.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
embeds/Adele.npy
ADDED
Binary file (1.15 kB). View file
|
|
embeds/Barack_Obama.npy
ADDED
Binary file (1.15 kB). View file
|
|
requirements.txt
CHANGED
@@ -23,9 +23,10 @@ torchaudio==0.11.0
|
|
23 |
tensorflow-cpu==2.9.0
|
24 |
denoiser==0.1.5
|
25 |
SpeechRecognition==3.10.0
|
26 |
-
transformers==4.
|
27 |
streamlit==1.27.2
|
28 |
sentence-transformers==2.2.2
|
29 |
evaluate==0.4.1
|
30 |
https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
|
31 |
protobuf==3.20
|
|
|
|
23 |
tensorflow-cpu==2.9.0
|
24 |
denoiser==0.1.5
|
25 |
SpeechRecognition==3.10.0
|
26 |
+
transformers==4.27.1
|
27 |
streamlit==1.27.2
|
28 |
sentence-transformers==2.2.2
|
29 |
evaluate==0.4.1
|
30 |
https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
|
31 |
protobuf==3.20
|
32 |
+
streamlit_mic_recorder==0.0.2
|
rtvc/synthesizer/utils/cleaners.py
CHANGED
@@ -223,6 +223,7 @@ def english_cleaners_predict(text):
|
|
223 |
text = expand_numbers(text)
|
224 |
# text = split_conj(text)
|
225 |
text = collapse_whitespace(text)
|
|
|
226 |
return text
|
227 |
|
228 |
def english_cleaners(text):
|
|
|
223 |
text = expand_numbers(text)
|
224 |
# text = split_conj(text)
|
225 |
text = collapse_whitespace(text)
|
226 |
+
text = text.replace(',', '.')
|
227 |
return text
|
228 |
|
229 |
def english_cleaners(text):
|
run_tts.py
CHANGED
@@ -27,7 +27,7 @@ from rtvc.utils.argutils import print_args
|
|
27 |
from rtvc.utils.default_models import ensure_default_models
|
28 |
from rtvc.vocoder import inference as vocoder
|
29 |
from rtvc.vocoder.display import save_attention_multiple, save_spectrogram, save_stop_tokens
|
30 |
-
from rtvc.synthesizer.utils.cleaners import
|
31 |
from rtvc.speed_changer.fixSpeed import *
|
32 |
|
33 |
|
@@ -41,12 +41,12 @@ def tts(text, embed_name, nlp, autoplay=True):
|
|
41 |
|
42 |
ensure_default_models(run_id, models_dir)
|
43 |
synthesizer = Synthesizer_infer(list(models_dir.glob(f"{run_id}/synthesizer.pt"))[0])
|
44 |
-
|
45 |
## Generating the spectrogram
|
46 |
|
47 |
# The synthesizer works in batch, so you need to put your data in a list or numpy array
|
48 |
def split_text(text):
|
49 |
-
text =
|
50 |
texts = [i.text.strip() for i in nlp(text).sents] # split paragraph to sentences
|
51 |
return texts
|
52 |
|
@@ -81,8 +81,7 @@ def tts(text, embed_name, nlp, autoplay=True):
|
|
81 |
|
82 |
# Synthesizing the waveform is fairly straightforward. Remember that the longer the
|
83 |
# spectrogram, the more time-efficient the vocoder.
|
84 |
-
wav =
|
85 |
-
|
86 |
wav = vocoder.waveform_denoising(wav)
|
87 |
|
88 |
# Add breaks
|
@@ -118,7 +117,7 @@ def tts(text, embed_name, nlp, autoplay=True):
|
|
118 |
|
119 |
|
120 |
if __name__ == "__main__":
|
121 |
-
text = "
|
122 |
-
embed_name = "
|
123 |
nlp = spacy.load('en_core_web_sm')
|
124 |
tts(text, embed_name, nlp)
|
|
|
27 |
from rtvc.utils.default_models import ensure_default_models
|
28 |
from rtvc.vocoder import inference as vocoder
|
29 |
from rtvc.vocoder.display import save_attention_multiple, save_spectrogram, save_stop_tokens
|
30 |
+
from rtvc.synthesizer.utils.cleaners import english_cleaners_predict
|
31 |
from rtvc.speed_changer.fixSpeed import *
|
32 |
|
33 |
|
|
|
41 |
|
42 |
ensure_default_models(run_id, models_dir)
|
43 |
synthesizer = Synthesizer_infer(list(models_dir.glob(f"{run_id}/synthesizer.pt"))[0])
|
44 |
+
# vocoder.load_model(list(models_dir.glob(f"{run_id}/vocoder.pt"))[0])
|
45 |
## Generating the spectrogram
|
46 |
|
47 |
# The synthesizer works in batch, so you need to put your data in a list or numpy array
|
48 |
def split_text(text):
|
49 |
+
text = english_cleaners_predict(text)
|
50 |
texts = [i.text.strip() for i in nlp(text).sents] # split paragraph to sentences
|
51 |
return texts
|
52 |
|
|
|
81 |
|
82 |
# Synthesizing the waveform is fairly straightforward. Remember that the longer the
|
83 |
# spectrogram, the more time-efficient the vocoder.
|
84 |
+
wav = synthesizer.griffin_lim(spec)
|
|
|
85 |
wav = vocoder.waveform_denoising(wav)
|
86 |
|
87 |
# Add breaks
|
|
|
117 |
|
118 |
|
119 |
if __name__ == "__main__":
|
120 |
+
text = "Adkins was raised by a young single mother in various working-class neighbourhoods of London. As a child, she enjoyed singing contemporary pop music and learned to play the guitar and the clarinet. However, it was not until her early teens, when she discovered rhythm-and-blues singer Etta James and other mid-20th-century performers, that she began to consider a musical career. While she honed her talents at a government-funded secondary school for the performing arts, a friend began posting songs Adkins had written and recorded onto the social networking Web site Myspace. Her music eventually caught the attention of record labels, and in 2006, several months after graduating, she signed a contract with XL Recordings."
|
121 |
+
embed_name = "Adele"
|
122 |
nlp = spacy.load('en_core_web_sm')
|
123 |
tts(text, embed_name, nlp)
|
utils.py
CHANGED
@@ -1,7 +1,10 @@
|
|
1 |
import re
|
|
|
|
|
|
|
2 |
import streamlit as st
|
|
|
3 |
|
4 |
-
you_regex = re.compile(r'\b(you|your)\b', flags=re.IGNORECASE)
|
5 |
he_regex = re.compile(r'\b(he|him|himself)\b', flags=re.IGNORECASE)
|
6 |
his_regex = re.compile(r'\b(his)\b', flags=re.IGNORECASE)
|
7 |
she_regex = re.compile(r'\b(she|herself)\b', flags=re.IGNORECASE)
|
@@ -14,4 +17,56 @@ def hide_footer():
|
|
14 |
footer {visibility: hidden;}
|
15 |
</style>
|
16 |
"""
|
17 |
-
st.markdown(hide_st_style, unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import re
|
2 |
+
import spacy
|
3 |
+
import json
|
4 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModel
|
5 |
import streamlit as st
|
6 |
+
import whisper
|
7 |
|
|
|
8 |
he_regex = re.compile(r'\b(he|him|himself)\b', flags=re.IGNORECASE)
|
9 |
his_regex = re.compile(r'\b(his)\b', flags=re.IGNORECASE)
|
10 |
she_regex = re.compile(r'\b(she|herself)\b', flags=re.IGNORECASE)
|
|
|
17 |
footer {visibility: hidden;}
|
18 |
</style>
|
19 |
"""
|
20 |
+
st.markdown(hide_st_style, unsafe_allow_html=True)
|
21 |
+
|
22 |
+
@st.cache_resource
|
23 |
+
def get_whisper_model(model_url:str='tiny'):
|
24 |
+
print("--------------------------------------------")
|
25 |
+
print("Attempting to load Whisper ...")
|
26 |
+
model = whisper.load_model(model_url, device='cpu')
|
27 |
+
print("Succesfully loaded Whisper")
|
28 |
+
return model
|
29 |
+
|
30 |
+
@st.cache_resource
|
31 |
+
def get_seq2seq_model(model_id):
|
32 |
+
return AutoModelForSeq2SeqLM.from_pretrained(model_id)
|
33 |
+
|
34 |
+
@st.cache_resource
|
35 |
+
def get_causal_model(model_id):
|
36 |
+
return AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
|
37 |
+
|
38 |
+
@st.cache_resource
|
39 |
+
def get_auto_model(model_id):
|
40 |
+
return AutoModel.from_pretrained(model_id)
|
41 |
+
|
42 |
+
@st.cache_resource
|
43 |
+
def get_tokenizer(model_id):
|
44 |
+
return AutoTokenizer.from_pretrained(model_id)
|
45 |
+
|
46 |
+
@st.cache_data
|
47 |
+
def get_celeb_data(fpath):
|
48 |
+
with open(fpath) as json_file:
|
49 |
+
return json.load(json_file)
|
50 |
+
|
51 |
+
@st.cache_resource
|
52 |
+
def preprocess_text(name, gender, text, model_id):
|
53 |
+
lname = name.split(" ")[-1]
|
54 |
+
lname_regex = re.compile(rf'\b({lname})\b')
|
55 |
+
name_regex = re.compile(rf'\b({name})\b')
|
56 |
+
lnames = lname+"’s" if not lname.endswith("s") else lname+"’"
|
57 |
+
lnames_regex = re.compile(rf'\b({lnames})\b')
|
58 |
+
names = name+"’s" if not name.endswith("s") else name+"’"
|
59 |
+
names_regex = re.compile(rf'\b({names})\b')
|
60 |
+
if gender == "M":
|
61 |
+
text = re.sub(he_regex, "I", text)
|
62 |
+
text = re.sub(his_regex, "my", text)
|
63 |
+
elif gender == "F":
|
64 |
+
text = re.sub(she_regex, "I", text)
|
65 |
+
text = re.sub(her_regex, "my", text)
|
66 |
+
text = re.sub(names_regex, "my", text)
|
67 |
+
text = re.sub(lnames_regex, "my", text)
|
68 |
+
text = re.sub(name_regex, "I", text)
|
69 |
+
text = re.sub(lname_regex, "I", text)
|
70 |
+
spacy_model = spacy.load(model_id)
|
71 |
+
texts = [i.text.strip() for i in spacy_model(text).sents]
|
72 |
+
return spacy_model, texts
|