Spaces:

TeLLMyStory
/

story-generation-docker

Paused

ThomasBlumet

change model

1e59ffd about 2 months ago

3.54 kB

	from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
	from transformers.utils import logging
	import gradio as gr
	#import spaces

	# Define the logger instance for the transformers library
	logger = logging.get_logger("transformers")

	# Load the model and tokenizer
	model_name = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" #"openai-community/gpt2" or "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" or "TheBloke/Llama-2-7B-Chat-GGML" or "TheBloke/zephyr-7B-beta-GPTQ"
	tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=True)
	model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",trust_remote_code=False,revision="main")
	#tokenizer.pad_token_id = tokenizer.eos_token_id

	#transfer model on GPU
	#model.to("cuda")
	# pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer,
	# max_new_tokens=512,
	# do_sample=True,
	# temperature=0.7,
	# top_p=0.95,
	# top_k=40,
	# repetition_penalty=1.1)

	# Generate text using the model and tokenizer
	#@spaces.GPU(duration=60)
	def generate_text(input_text):
	input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda")
	#attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
	output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
	#output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
	return tokenizer.decode(output[0])
	#return pipe(input_text)[0]["generated_text"]

	interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
	interface.launch()


	# Example of disabling Exllama backend (if applicable in your configuration)
	#config = {"disable_exllama": True}
	#model.config.update(config)

	# def generate_text(prompt):
	# inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512, padding="max_length")
	# summary_ids = model.generate(inputs["input_ids"], max_new_tokens=512, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
	# return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

	# #for training the model after the data is collected
	# #model.save_pretrained("model")
	# #tokenizer.save_pretrained("model")

	# #for the app functions

	# def show_output_text(message):
	# history.append((message,""))
	# story = generate_text(message)
	# history[-1] = (message,story)
	# return story

	# def clear_textbox():
	# return None,None

	# # Créer une interface de saisie avec Gradio

	# with gr.Blocks() as demo:
	# gr.Markdown("TeLLMyStory chatbot")
	# with gr.Row():
	# input_text = gr.Textbox(label="Enter your story idea here", placeholder="Once upon a time...")
	# clear_button = gr.Button("Clear",variant="secondary")
	# submit_button = gr.Button("Submit", variant="primary")

	# with gr.Row():
	# gr.Markdown("And see the story take shape here")
	# output_text = gr.Textbox(label="History")

	# submit_button.click(fn=show_output_text, inputs=input_text,outputs=output_text)
	# clear_button.click(fn=clear_textbox,outputs=[input_text,output_text])
	# # Lancer l'interface