from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM from transformers.utils import logging import gradio as gr #import spaces # Define the logger instance for the transformers library logger = logging.get_logger("transformers") # Load the model and tokenizer model_name = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" #"openai-community/gpt2" or "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" or "TheBloke/Llama-2-7B-Chat-GGML" or "TheBloke/zephyr-7B-beta-GPTQ" tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=True) model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",trust_remote_code=False,revision="main") #tokenizer.pad_token_id = tokenizer.eos_token_id #transfer model on GPU #model.to("cuda") pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.95, top_k=40, repetition_penalty=1.1) # Generate text using the model and tokenizer #@spaces.GPU(duration=60) def generate_text(input_text): #input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda") #attention_mask = input_ids.ne(tokenizer.pad_token_id).long() #output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True) #output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True) #return tokenizer.decode(output[0]) return pipe(input_text)[0]["generated_text"] interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.") interface.launch() # Example of disabling Exllama backend (if applicable in your configuration) #config = {"disable_exllama": True} #model.config.update(config) # def generate_text(prompt): # inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512, padding="max_length") # summary_ids = model.generate(inputs["input_ids"], max_new_tokens=512, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True) # return tokenizer.decode(summary_ids[0], skip_special_tokens=True) # #for training the model after the data is collected # #model.save_pretrained("model") # #tokenizer.save_pretrained("model") # #for the app functions # def show_output_text(message): # history.append((message,"")) # story = generate_text(message) # history[-1] = (message,story) # return story # def clear_textbox(): # return None,None # # Créer une interface de saisie avec Gradio # with gr.Blocks() as demo: # gr.Markdown("TeLLMyStory chatbot") # with gr.Row(): # input_text = gr.Textbox(label="Enter your story idea here", placeholder="Once upon a time...") # clear_button = gr.Button("Clear",variant="secondary") # submit_button = gr.Button("Submit", variant="primary") # with gr.Row(): # gr.Markdown("And see the story take shape here") # output_text = gr.Textbox(label="History") # submit_button.click(fn=show_output_text, inputs=input_text,outputs=output_text) # clear_button.click(fn=clear_textbox,outputs=[input_text,output_text]) # # Lancer l'interface