Spaces:

liuhaozhe6788
/

CelebChat

Runtime error

App Files Files Community

lhzstar commited on Oct 29, 2023

Commit

436ce71

1 Parent(s): d2b6583

new commits

Browse files

Files changed (10) hide show

.gitignore +3 -1
app.py +82 -102
celebbot.py +6 -6
data.json +0 -0
embeds/Adele.npy +0 -0
embeds/Barack_Obama.npy +0 -0
requirements.txt +2 -1
rtvc/synthesizer/utils/cleaners.py +1 -0
run_tts.py +6 -7
utils.py +57 -2

.gitignore CHANGED Viewed

@@ -20,4 +20,6 @@ launch.json
 *.m4a
 *.csv
 input_audios/
-syn_results/

 *.m4a
 *.csv
 input_audios/
+syn_results/
+falcon-7b-instruct/
+flan-t5-large/

app.py CHANGED Viewed

@@ -1,54 +1,19 @@
 from celebbot import CelebBot
 import streamlit as st
-import re
-import spacy
-import json
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
 from utils import *
-@st.cache_resource
-def get_seq2seq_model(model_id):
-    return AutoModelForSeq2SeqLM.from_pretrained(model_id)
-@st.cache_resource
-def get_auto_model(model_id):
-    return AutoModel.from_pretrained(model_id)
-@st.cache_resource
-def get_tokenizer(model_id):
-    return AutoTokenizer.from_pretrained(model_id)
-@st.cache_data
-def get_celeb_data(fpath):
-    with open(fpath) as json_file:
-        return json.load(json_file)
-@st.cache_resource
-def preprocess_text(name, gender, text, model_id):
-    lname = name.split(" ")[-1]
-    lname_regex = re.compile(rf'\b({lname})\b')
-    name_regex = re.compile(rf'\b({name})\b')
-    lnames = lname+"’s" if not lname.endswith("s") else lname+"’"
-    lnames_regex = re.compile(rf'\b({lnames})\b')
-    names = name+"’s" if not name.endswith("s") else name+"’"
-    names_regex = re.compile(rf'\b({names})\b')
-    if gender == "M":
-        text = re.sub(he_regex, "I", text)
-        text = re.sub(his_regex, "my", text)
-    elif gender == "F":
-        text = re.sub(she_regex, "I", text)
-        text = re.sub(her_regex, "my", text)
-    text = re.sub(names_regex, "my", text)
-    text = re.sub(lnames_regex, "my", text)
-    text = re.sub(name_regex, "I", text)
-    text = re.sub(lname_regex, "I", text)
-    spacy_model = spacy.load(model_id)
-    texts = [i.text.strip() for i in spacy_model(text).sents]
-    return spacy_model, texts
 def main():
     hide_footer()
     if "messages" not in st.session_state:
         st.session_state["messages"] = []
     if "QA_model_path" not in st.session_state:
@@ -57,65 +22,80 @@ def main():
         st.session_state["sentTr_model_path"] = "sentence-transformers/all-mpnet-base-v2"
     if "start_chat" not in st.session_state:
         st.session_state["start_chat"] = False
-    model_list = ["base", "large", "xl", "xxl"]
-    for message in st.session_state["messages"]:
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
-    celeb_data = get_celeb_data(f'data.json')
-    # Create a Form Component on the Sidebar for accepting input data and parameters
-    celeb_name = st.sidebar.selectbox('Choose a celebrity', options=list(celeb_data.keys()))
-    celeb_gender = celeb_data[celeb_name]["gender"]
-    knowledge = celeb_data[celeb_name]["knowledge"]
-    model_choice = st.sidebar.selectbox("Choose Your Flan-T5 model",options=model_list)
-    st.session_state["QA_model_path"] = f"google/flan-t5-{model_choice}"
-    #     submitted = st.form_submit_button(label="Start Chatting")
-    # if submitted:
-    #     st.session_state["start_chat"] = True
-    # if st.session_state["start_chat"]:
-    celeb_bot = CelebBot(celeb_name,
-                         get_tokenizer(st.session_state["QA_model_path"]),
-                         get_seq2seq_model(st.session_state["QA_model_path"]),
-                         get_tokenizer(st.session_state["sentTr_model_path"]),
-                         get_auto_model(st.session_state["sentTr_model_path"]),
-                         *preprocess_text(celeb_name, celeb_gender, knowledge, "en_core_web_sm")
-                         )
-    prompt = st.chat_input("Say something")
-    print(prompt)
-    if prompt:
-        celeb_bot.text = prompt
-        # Display user message in chat message container
-        st.chat_message("user").markdown(prompt)
-        # Add user message to chat history
-        st.session_state["messages"].append({"role": "user", "content": prompt})
-        # Add assistant response to chat history
-        response = celeb_bot.question_answer()
-        # disable autoplay to play in HTML
-        b64 = celeb_bot.text_to_speech(autoplay=False)
-        md = f"""
-        <p>{response}</p>
-        <audio controls autoplay style="display:none;">
-        <source src="data:audio/wav;base64,{b64}" type="audio/wav">
-        Your browser does not support the audio element.
-        </audio>
-        """
-        st.chat_message("assistant").markdown(
-            md,
-            unsafe_allow_html=True,
-        )
-        # Display assistant response in chat message container
-        st.session_state["messages"].append({"role": "assistant", "content": response})
 if __name__ == "__main__":

 from celebbot import CelebBot
 import streamlit as st
+from streamlit_mic_recorder import speech_to_text
 from utils import *
 def main():
     hide_footer()
+    model_list = ["flan-t5-large", "flan-t5-xl", "Falcon-7b-instruct"]
+    celeb_data = get_celeb_data(f'data.json')
+    st.sidebar.header("CelebChat")
+    expander = st.sidebar.expander('About the app')
+    with expander:
+        st.markdown("This app is a demo of celebrity chatting!")
     if "messages" not in st.session_state:
         st.session_state["messages"] = []
     if "QA_model_path" not in st.session_state:
         st.session_state["sentTr_model_path"] = "sentence-transformers/all-mpnet-base-v2"
     if "start_chat" not in st.session_state:
         st.session_state["start_chat"] = False
+    if "prompt" not in st.session_state:
+        st.session_state["prompt"] = None
+    def start_chat(name, model_id):
+        print(name, model_id)
+        if name != '' and model_id != '':
+            st.session_state["start_chat"] = True
+        else:
+            st.session_state["start_chat"] = False
+    with st.sidebar.form("my_form"):
+        print("enter form")
+        st.session_state["celeb_name"] = st.selectbox('Choose a celebrity', options=list(celeb_data.keys()))
+        model_id=st.selectbox("Choose Your Flan-T5 model",options=model_list)
+        st.session_state["QA_model_path"] = f"google/{model_id}" if "flan-t5" in model_id else model_id
+        st.form_submit_button(label="Start Chatting", on_click=start_chat, args=(st.session_state["celeb_name"], st.session_state["QA_model_path"]))
+    if st.session_state["start_chat"]:
+        celeb_gender = celeb_data[st.session_state["celeb_name"]]["gender"]
+        knowledge = celeb_data[st.session_state["celeb_name"]]["knowledge"]
+        st.session_state["celeb_bot"] = CelebBot(st.session_state["celeb_name"],
+                        get_tokenizer(st.session_state["QA_model_path"]),
+                        get_seq2seq_model(st.session_state["QA_model_path"]) if "flan-t5" in st.session_state["QA_model_path"] else get_causal_model(st.session_state["QA_model_path"]),
+                        get_tokenizer(st.session_state["sentTr_model_path"]),
+                        get_auto_model(st.session_state["sentTr_model_path"]),
+                        *preprocess_text(st.session_state["celeb_name"], celeb_gender, knowledge, "en_core_web_sm")
+                        )
+        dialogue_container = st.container()
+        with dialogue_container:
+            for message in st.session_state["messages"]:
+                with st.chat_message(message["role"]):
+                    st.markdown(message["content"])
+        if "_last_audio_id" not in st.session_state:
+            st.session_state["_last_audio_id"] = 0
+        with st.sidebar:
+            prompt_from_audio =speech_to_text(start_prompt="Start Recording",stop_prompt="Stop Recording",language='en',use_container_width=True, just_once=True,key='STT')
+            prompt_from_text = st.text_input('Or write something')
+        if prompt_from_audio != None:
+            st.session_state["prompt"] = prompt_from_audio
+        elif prompt_from_text != None:
+            st.session_state["prompt"] = prompt_from_text
+        print(st.session_state["prompt"])
+        if st.session_state["prompt"] != None and st.session_state["prompt"] != '':
+            st.session_state["celeb_bot"].text = st.session_state["prompt"]
+            # Display user message in chat message container
+            with dialogue_container:
+                st.chat_message("user").markdown(st.session_state["prompt"])
+            # Add user message to chat history
+            st.session_state["messages"].append({"role": "user", "content": st.session_state["prompt"]})
+            # Add assistant response to chat history
+            response = st.session_state["celeb_bot"].question_answer()
+            # disable autoplay to play in HTML
+            b64 = st.session_state["celeb_bot"].text_to_speech(autoplay=False)
+            md = f"""
+            <p>{response}</p>
+            <audio controls autoplay style="display:none;">
+            <source src="data:audio/wav;base64,{b64}" type="audio/wav">
+            Your browser does not support the audio element.
+            </audio>
+            """
+            with dialogue_container:
+                st.chat_message("assistant").markdown(
+                    md,
+                    unsafe_allow_html=True,
+                )
+            # Display assistant response in chat message container
+            st.session_state["messages"].append({"role": "assistant", "content": response})
 if __name__ == "__main__":

celebbot.py CHANGED Viewed

@@ -102,13 +102,13 @@ class CelebBot():
                 self.text = f"Hello I am {self.name} the AI, what can I do for you?"
             ## have a conversation
             else:
-                # if re.search(you_regex, self.text) != None:
-                instruction1 = f'[Instruction] You are a celebrity named {self.name}. You need to answer the question based on knowledge and commonsense.'
-                knowledge = self.retrieve_knowledge_assertions()
-                # else:
-                #     instruction1 = f'[Instruction] You need to answer the question based on commonsense.'
-                query = f"{instruction1} [knowledge] {knowledge} [question] {self.text} {self.name}!"
                 input_ids = self.QA_tokenizer(f"{query}", return_tensors="pt").input_ids
                 outputs = self.QA_model.generate(input_ids, max_length=1024)
                 self.text = self.QA_tokenizer.decode(outputs[0], skip_special_tokens=True)

                 self.text = f"Hello I am {self.name} the AI, what can I do for you?"
             ## have a conversation
             else:
+                if re.search(re.compile(rf'\b(you|your|{self.name})\b', flags=re.IGNORECASE), self.text) != None:
+                    instruction1 = f'[Instruction] You are a celebrity named {self.name}. You need to answer the question based on knowledge and commonsense.'
+                    knowledge = self.retrieve_knowledge_assertions()
+                else:
+                    instruction1 = f'[Instruction] You need to answer the question based on commonsense.'
+                query = f"{instruction1} [knowledge] {knowledge} [question] {self.text}"
                 input_ids = self.QA_tokenizer(f"{query}", return_tensors="pt").input_ids
                 outputs = self.QA_model.generate(input_ids, max_length=1024)
                 self.text = self.QA_tokenizer.decode(outputs[0], skip_special_tokens=True)

data.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

embeds/Adele.npy ADDED Viewed

Binary file (1.15 kB). View file

embeds/Barack_Obama.npy ADDED Viewed

Binary file (1.15 kB). View file

requirements.txt CHANGED Viewed

@@ -23,9 +23,10 @@ torchaudio==0.11.0
 tensorflow-cpu==2.9.0
 denoiser==0.1.5
 SpeechRecognition==3.10.0
-transformers==4.25.1
 streamlit==1.27.2
 sentence-transformers==2.2.2
 evaluate==0.4.1
 https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
 protobuf==3.20

 tensorflow-cpu==2.9.0
 denoiser==0.1.5
 SpeechRecognition==3.10.0
+transformers==4.27.1
 streamlit==1.27.2
 sentence-transformers==2.2.2
 evaluate==0.4.1
 https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
 protobuf==3.20
+streamlit_mic_recorder==0.0.2

rtvc/synthesizer/utils/cleaners.py CHANGED Viewed

@@ -223,6 +223,7 @@ def english_cleaners_predict(text):
     text = expand_numbers(text)
     # text = split_conj(text)
     text = collapse_whitespace(text)
     return text
 def english_cleaners(text):

     text = expand_numbers(text)
     # text = split_conj(text)
     text = collapse_whitespace(text)
+    text = text.replace(',', '.')
     return text
 def english_cleaners(text):

run_tts.py CHANGED Viewed

@@ -27,7 +27,7 @@ from rtvc.utils.argutils import print_args
 from rtvc.utils.default_models import ensure_default_models
 from rtvc.vocoder import inference as vocoder
 from rtvc.vocoder.display import save_attention_multiple, save_spectrogram, save_stop_tokens
-from rtvc.synthesizer.utils.cleaners import english_cleaners
 from rtvc.speed_changer.fixSpeed import *
@@ -41,12 +41,12 @@ def tts(text, embed_name, nlp, autoplay=True):
     ensure_default_models(run_id, models_dir)
     synthesizer = Synthesizer_infer(list(models_dir.glob(f"{run_id}/synthesizer.pt"))[0])
     ## Generating the spectrogram
     # The synthesizer works in batch, so you need to put your data in a list or numpy array
     def split_text(text):
-        text = english_cleaners(text)
         texts = [i.text.strip() for i in nlp(text).sents]  # split paragraph to sentences
         return texts
@@ -81,8 +81,7 @@ def tts(text, embed_name, nlp, autoplay=True):
     # Synthesizing the waveform is fairly straightforward. Remember that the longer the
     # spectrogram, the more time-efficient the vocoder.
-    wav = Synthesizer_infer.griffin_lim(spec)
     wav = vocoder.waveform_denoising(wav)
     # Add breaks
@@ -118,7 +117,7 @@ def tts(text, embed_name, nlp, autoplay=True):
 if __name__ == "__main__":
-    text = "Continuing without audio playback. Suppress this message"
-    embed_name = "Cate_Blanchett"
     nlp = spacy.load('en_core_web_sm')
     tts(text, embed_name, nlp)

 from rtvc.utils.default_models import ensure_default_models
 from rtvc.vocoder import inference as vocoder
 from rtvc.vocoder.display import save_attention_multiple, save_spectrogram, save_stop_tokens
+from rtvc.synthesizer.utils.cleaners import english_cleaners_predict
 from rtvc.speed_changer.fixSpeed import *
     ensure_default_models(run_id, models_dir)
     synthesizer = Synthesizer_infer(list(models_dir.glob(f"{run_id}/synthesizer.pt"))[0])
+    # vocoder.load_model(list(models_dir.glob(f"{run_id}/vocoder.pt"))[0])
     ## Generating the spectrogram
     # The synthesizer works in batch, so you need to put your data in a list or numpy array
     def split_text(text):
+        text = english_cleaners_predict(text)
         texts = [i.text.strip() for i in nlp(text).sents]  # split paragraph to sentences
         return texts
     # Synthesizing the waveform is fairly straightforward. Remember that the longer the
     # spectrogram, the more time-efficient the vocoder.
+    wav = synthesizer.griffin_lim(spec)
     wav = vocoder.waveform_denoising(wav)
     # Add breaks
 if __name__ == "__main__":
+    text = "Adkins was raised by a young single mother in various working-class neighbourhoods of London. As a child, she enjoyed singing contemporary pop music and learned to play the guitar and the clarinet. However, it was not until her early teens, when she discovered rhythm-and-blues singer Etta James and other mid-20th-century performers, that she began to consider a musical career. While she honed her talents at a government-funded secondary school for the performing arts, a friend began posting songs Adkins had written and recorded onto the social networking Web site Myspace. Her music eventually caught the attention of record labels, and in 2006, several months after graduating, she signed a contract with XL Recordings."
+    embed_name = "Adele"
     nlp = spacy.load('en_core_web_sm')
     tts(text, embed_name, nlp)

utils.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import re
 import streamlit as st
-you_regex = re.compile(r'\b(you|your)\b', flags=re.IGNORECASE)
 he_regex = re.compile(r'\b(he|him|himself)\b', flags=re.IGNORECASE)
 his_regex = re.compile(r'\b(his)\b', flags=re.IGNORECASE)
 she_regex = re.compile(r'\b(she|herself)\b', flags=re.IGNORECASE)
@@ -14,4 +17,56 @@ def hide_footer():
             footer {visibility: hidden;}
             </style>
             """
-    st.markdown(hide_st_style, unsafe_allow_html=True)

 import re
+import spacy
+import json
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoModel
 import streamlit as st
+import whisper
 he_regex = re.compile(r'\b(he|him|himself)\b', flags=re.IGNORECASE)
 his_regex = re.compile(r'\b(his)\b', flags=re.IGNORECASE)
 she_regex = re.compile(r'\b(she|herself)\b', flags=re.IGNORECASE)
             footer {visibility: hidden;}
             </style>
             """
+    st.markdown(hide_st_style, unsafe_allow_html=True)
+@st.cache_resource
+def get_whisper_model(model_url:str='tiny'):
+        print("--------------------------------------------")
+        print("Attempting to load Whisper ...")
+        model = whisper.load_model(model_url, device='cpu')
+        print("Succesfully loaded Whisper")
+        return model
+@st.cache_resource
+def get_seq2seq_model(model_id):
+    return AutoModelForSeq2SeqLM.from_pretrained(model_id)
+@st.cache_resource
+def get_causal_model(model_id):
+    return AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
+@st.cache_resource
+def get_auto_model(model_id):
+    return AutoModel.from_pretrained(model_id)
+@st.cache_resource
+def get_tokenizer(model_id):
+    return AutoTokenizer.from_pretrained(model_id)
+@st.cache_data
+def get_celeb_data(fpath):
+    with open(fpath) as json_file:
+        return json.load(json_file)
+@st.cache_resource
+def preprocess_text(name, gender, text, model_id):
+    lname = name.split(" ")[-1]
+    lname_regex = re.compile(rf'\b({lname})\b')
+    name_regex = re.compile(rf'\b({name})\b')
+    lnames = lname+"’s" if not lname.endswith("s") else lname+"’"
+    lnames_regex = re.compile(rf'\b({lnames})\b')
+    names = name+"’s" if not name.endswith("s") else name+"’"
+    names_regex = re.compile(rf'\b({names})\b')
+    if gender == "M":
+        text = re.sub(he_regex, "I", text)
+        text = re.sub(his_regex, "my", text)
+    elif gender == "F":
+        text = re.sub(she_regex, "I", text)
+        text = re.sub(her_regex, "my", text)
+    text = re.sub(names_regex, "my", text)
+    text = re.sub(lnames_regex, "my", text)
+    text = re.sub(name_regex, "I", text)
+    text = re.sub(lname_regex, "I", text)
+    spacy_model = spacy.load(model_id)
+    texts = [i.text.strip() for i in spacy_model(text).sents]
+    return spacy_model, texts