ThomasBlumet
change model
1e59ffd
raw
history blame
3.54 kB
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from transformers.utils import logging
import gradio as gr
#import spaces
# Define the logger instance for the transformers library
logger = logging.get_logger("transformers")
# Load the model and tokenizer
model_name = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" #"openai-community/gpt2" or "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" or "TheBloke/Llama-2-7B-Chat-GGML" or "TheBloke/zephyr-7B-beta-GPTQ"
tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",trust_remote_code=False,revision="main")
#tokenizer.pad_token_id = tokenizer.eos_token_id
#transfer model on GPU
#model.to("cuda")
# pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer,
# max_new_tokens=512,
# do_sample=True,
# temperature=0.7,
# top_p=0.95,
# top_k=40,
# repetition_penalty=1.1)
# Generate text using the model and tokenizer
#@spaces.GPU(duration=60)
def generate_text(input_text):
input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda")
#attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
#output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
return tokenizer.decode(output[0])
#return pipe(input_text)[0]["generated_text"]
interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
interface.launch()
# Example of disabling Exllama backend (if applicable in your configuration)
#config = {"disable_exllama": True}
#model.config.update(config)
# def generate_text(prompt):
# inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512, padding="max_length")
# summary_ids = model.generate(inputs["input_ids"], max_new_tokens=512, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
# return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
# #for training the model after the data is collected
# #model.save_pretrained("model")
# #tokenizer.save_pretrained("model")
# #for the app functions
# def show_output_text(message):
# history.append((message,""))
# story = generate_text(message)
# history[-1] = (message,story)
# return story
# def clear_textbox():
# return None,None
# # Créer une interface de saisie avec Gradio
# with gr.Blocks() as demo:
# gr.Markdown("TeLLMyStory chatbot")
# with gr.Row():
# input_text = gr.Textbox(label="Enter your story idea here", placeholder="Once upon a time...")
# clear_button = gr.Button("Clear",variant="secondary")
# submit_button = gr.Button("Submit", variant="primary")
# with gr.Row():
# gr.Markdown("And see the story take shape here")
# output_text = gr.Textbox(label="History")
# submit_button.click(fn=show_output_text, inputs=input_text,outputs=output_text)
# clear_button.click(fn=clear_textbox,outputs=[input_text,output_text])
# # Lancer l'interface