Spaces:
Paused
Paused
File size: 5,028 Bytes
7886158 103f4bd 7886158 103f4bd 6baa22b 103f4bd 27a0fd9 103f4bd 7886158 103f4bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import gradio as gr
from transformers import pipeline
import librosa
########################LLama model###############################
# from transformers import AutoModelForCausalLM, AutoTokenizer
# model_name_or_path = "TheBloke/llama2_7b_chat_uncensored-GPTQ"
# # To use a different branch, change revision
# # For example: revision="main"
# model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
# device_map="auto",
# trust_remote_code=True,
# revision="main",
# #quantization_config=QuantizationConfig(disable_exllama=True)
# )
# tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
# Llama_pipe = pipeline(
# "text-generation",
# model=model,
# tokenizer=tokenizer,
# max_new_tokens=40,
# do_sample=True,
# temperature=0.7,
# top_p=0.95,
# top_k=40,
# repetition_penalty=1.1
# )
# history="""User: Hello, Rally?
# Rally: I'm happy to see you again. What you want to talk to day?
# User: Let's talk about food
# Rally: Sure.
# User: I'm hungry right now. Do you know any Vietnamese food?"""
# prompt_template = f"""<|im_start|>system
# Write one sentence to continue the conversation<|im_end|>
# {history}
# Rally:"""
# print(Llama_pipe(prompt_template)[0]['generated_text'])
# def RallyRespone(chat_history, message):
# chat_history += "User: " + message + "\n"
# t_chat = Llama_pipe(prompt_template)[0]['generated_text']
# res = t_chat[t_chat.rfind("Rally: "):]
# return res
########################ASR model###############################
from transformers import WhisperProcessor, WhisperForConditionalGeneration
# load model and processor
processor = WhisperProcessor.from_pretrained("openai/whisper-base")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
model.config.forced_decoder_ids = None
sample_rate = 16000
def ASR_model(audio, sr=16000):
DB_audio = audio
input_features = processor(audio, sampling_rate=sr, return_tensors="pt").input_features
# generate token ids
predicted_ids = model.generate(input_features)
# decode token ids to text
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
return transcription
########################Gradio UI###############################
# Chatbot demo with multimodal input (text, markdown, LaTeX, code blocks, image, audio, & video). Plus shows support for streaming text.
def add_file(files):
return files.name
def print_like_dislike(x: gr.LikeData):
print(x.index, x.value, x.liked)
def upfile(files):
x = librosa.load(files, sr=16000)
print(x[0])
text = ASR_model(x[0])
return [text[0], text[0]]
def transcribe(audio):
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
return transcriber({"sampling_rate": sr, "raw": y})["text"], transcriber({"sampling_rate": sr, "raw": y})["text"]
# def recommand(text):
# ret = "answer for"
# return ret + text
def add_text(history, text):
history = history + [(text, None)]
return history, gr.Textbox(value="", interactive=False)
# def bot(history):
# response = "**That's cool!**"
# history[-1][1] = ""
# for character in response:
# history[-1][1] += character
# time.sleep(0.05)
# yield history
with gr.Blocks() as demo:
chatbot = gr.Chatbot(
[],
elem_id="chatbot",
bubble_full_width=False,
)
file_output = gr.File()
def respond(message, chat_history):
bot_message = RallyRespone(chat_history, message)
chat_history.append((message, bot_message))
time.sleep(2)
print (chat_history[-1])
return chat_history[-1][-1], chat_history
with gr.Row():
with gr.Column():
audio_speech = gr.Audio(sources=["microphone"])
submit = gr.Button("Submit")
send = gr.Button("Send")
btn = gr.UploadButton("📁", file_types=["audio"])
with gr.Column():
opt1 = gr.Button("1: ")
opt2 = gr.Button("2: ")
#submit.click(translate, inputs=audio_speech, outputs=[opt1, opt2])
# output is opt1 value, opt2 value [ , ]
file_msg = btn.upload(add_file, btn, file_output)
submit.click(upfile, inputs=file_output, outputs=[opt1, opt2])
send.click(transcribe, inputs=audio_speech, outputs=[opt1, opt2])
opt1.click(respond, [opt1, chatbot], [opt1, chatbot])
opt2.click(respond, [opt2, chatbot], [opt2, chatbot])
#opt2.click(recommand, inputs=opt2)
#click event maybe BOT . generate history = optx.value,
chatbot.like(print_like_dislike, None, None)
if __name__ == "__main__":
demo.queue()
demo.launch(debug=True) |