import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch from peft import PeftModel model = AutoModelForCausalLM.from_pretrained("DAMO-NLP-MT/polylm-1.7b") model = PeftModel.from_pretrained(model, "fadliaulawi/polylm-1.7b-finetuned") tokenizer = AutoTokenizer.from_pretrained("DAMO-NLP-MT/polylm-1.7b",use_fast = False) def user(message, history): return "", history + [[message, None]] def bot(history,temperature, max_length, top_p,top_k): user_message = history[-1][0] new_user_input_ids = tokenizer.encode( user_message + tokenizer.eos_token, return_tensors="pt" ) # append the new user input tokens to the chat history bot_input_ids = torch.cat([torch.LongTensor([]), new_user_input_ids], dim=-1) # generate a response response = model.generate( input_ids=bot_input_ids, pad_token_id=tokenizer.eos_token_id, temperature = float(temperature), max_length=max_length, top_p=float(top_p), top_k=top_k, do_sample=True ).tolist() # convert the tokens to text, and then split the responses into lines response = tokenizer.decode(response[0]).split("<|endoftext|>") response = [ (response[i], response[i + 1]) for i in range(0, len(response) - 1, 2) ] # convert to tuples of list history[-1] = response[0] return history with gr.Blocks() as demo: temperature = gr.Slider(0, 5, value=0.8, step=0.1, label='Temperature') max_length = gr.Slider(0, 8192, value=256, step=1, label='Max Length') top_p = gr.Slider(0, 1, value=0.8, step=0.1, label='Top P') top_k = gr.Slider(0, 50, value=50, step=1, label='Top K') chatbot = gr.Chatbot() msg = gr.Textbox() submit = gr.Button("Submit") clear = gr.Button("Clear") examples = gr.Examples(examples=["Dokter aku sakit flu dan pilek. Apa yang terjadi denganku?"],inputs=[msg]) #submit.click(bot,[msg,chatbot,temperature, max_length, top_p,top_k],chatbot) submit.click(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, [chatbot,temperature,max_length,top_p,top_k], chatbot ) clear.click(lambda: None, None, chatbot, queue=False) demo.queue().launch()