from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
from transformers.utils import logging | |
import gradio as gr | |
#import spaces | |
# Define the logger instance for the transformers library | |
logger = logging.get_logger("transformers") | |
# Load the model and tokenizer | |
model_name = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" #"openai-community/gpt2" or "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" or "TheBloke/Llama-2-7B-Chat-GGML" or "TheBloke/zephyr-7B-beta-GPTQ" | |
tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=True) | |
model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",trust_remote_code=False,revision="main") | |
#tokenizer.pad_token_id = tokenizer.eos_token_id | |
#transfer model on GPU | |
#model.to("cuda") | |
# pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer, | |
# max_new_tokens=512, | |
# do_sample=True, | |
# temperature=0.7, | |
# top_p=0.95, | |
# top_k=40, | |
# repetition_penalty=1.1) | |
# Generate text using the model and tokenizer | |
#@spaces.GPU(duration=60) | |
def generate_text(input_text): | |
input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda") | |
#attention_mask = input_ids.ne(tokenizer.pad_token_id).long() | |
output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True) | |
#output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True) | |
return tokenizer.decode(output[0]) | |
#return pipe(input_text)[0]["generated_text"] | |
interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.") | |
interface.launch() | |
# Example of disabling Exllama backend (if applicable in your configuration) | |
#config = {"disable_exllama": True} | |
#model.config.update(config) | |
# def generate_text(prompt): | |
# inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512, padding="max_length") | |
# summary_ids = model.generate(inputs["input_ids"], max_new_tokens=512, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True) | |
# return tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
# #for training the model after the data is collected | |
# #model.save_pretrained("model") | |
# #tokenizer.save_pretrained("model") | |
# #for the app functions | |
# def show_output_text(message): | |
# history.append((message,"")) | |
# story = generate_text(message) | |
# history[-1] = (message,story) | |
# return story | |
# def clear_textbox(): | |
# return None,None | |
# # Créer une interface de saisie avec Gradio | |
# with gr.Blocks() as demo: | |
# gr.Markdown("TeLLMyStory chatbot") | |
# with gr.Row(): | |
# input_text = gr.Textbox(label="Enter your story idea here", placeholder="Once upon a time...") | |
# clear_button = gr.Button("Clear",variant="secondary") | |
# submit_button = gr.Button("Submit", variant="primary") | |
# with gr.Row(): | |
# gr.Markdown("And see the story take shape here") | |
# output_text = gr.Textbox(label="History") | |
# submit_button.click(fn=show_output_text, inputs=input_text,outputs=output_text) | |
# clear_button.click(fn=clear_textbox,outputs=[input_text,output_text]) | |
# # Lancer l'interface | |