#! /bin/bash # -*- coding: utf-8 -*- """Gradio.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1NhPAjcqhdmjOyMrg7j8IHqzGlJARGIjs """ import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer from threading import Thread new_model="Mervyn999/mistral-7b-distilabel-mini-DPO" model = AutoModelForCausalLM.from_pretrained(new_model, # low_cpu_mem_usage=True, torch_dtype=torch.float16, load_in_4bit=True, # device_map="cuda" ) tokenizer = AutoTokenizer.from_pretrained(new_model) # model = model.to('cuda:0') class StopOnTokens(StoppingCriteria): def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: stop_ids = [29, 0] for stop_id in stop_ids: if input_ids[0][-1] == stop_id: return True return False def predict(message, history): history_transformer_format = history + [[message, ""]] stop = StopOnTokens() #Wrap the prompt using the right chat template messages = "".join([f"### Instruction:\n{item[0]}\n\n### Response:\n{item[1]}" #curr_system_message + for item in history_transformer_format]) model_inputs = tokenizer([messages], return_tensors="pt").to("cuda") streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True) generate_kwargs = dict( model_inputs, streamer=streamer, max_new_tokens=1024, do_sample=True, top_p=0.95, top_k=1000, temperature=1.0, num_beams=1, stopping_criteria=StoppingCriteriaList([stop]) ) t = Thread(target=model.generate, kwargs=generate_kwargs) t.start() partial_message = "" for new_token in streamer: if new_token != '<': partial_message += new_token yield partial_message title = "Jaewon's finetuned LLM" gr.close_all() # gr.ChatInterface(predict).queue().launch(debug=True) gr.ChatInterface( predict, chatbot=gr.Chatbot(height=300), textbox=gr.Textbox(placeholder="Send a message", container=False, scale=7), title="Chat with Mistral-7B DPO tuned", theme="soft", retry_btn=None, undo_btn="Delete Previous", clear_btn="Clear", ).queue().launch()