Spaces:

TeLLMyStory
/

story-generation-docker

Paused

File size: 3,537 Bytes

2616382
12476bc
437cdee
1e59ffd
437cdee
86e2a34
12476bc
 
86e2a34
2616382
 
 
 
5362bf1
72903e4
8d9e0dc
f694567
 
 
 
 
 
 
72903e4
0fdb2f3
1e59ffd
0fdb2f3
f694567
5a33e76
f694567
4faf856
f694567
 
2616382
 
 
 
5362bf1
2d4b9ba
72903e4
 
2d4b9ba
c1ef9c2
 
 
 
 
5362bf1
 
 
 
 
 
0fdb2f3
5362bf1
 
 
0fdb2f3
5362bf1
0fdb2f3
 
5362bf1
 
2616382
5362bf1
 
 
54f41e4
 
 
5362bf1
 
54f41e4
0fdb2f3
12476bc
0fdb2f3
 
5362bf1
2616382
3f1d57b

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from transformers.utils import logging
import gradio as gr
#import spaces

# Define the logger instance for the transformers library
logger = logging.get_logger("transformers")

# Load the model and tokenizer
model_name = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" #"openai-community/gpt2" or "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" or "TheBloke/Llama-2-7B-Chat-GGML" or "TheBloke/zephyr-7B-beta-GPTQ"
tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",trust_remote_code=False,revision="main")
#tokenizer.pad_token_id = tokenizer.eos_token_id

#transfer model on GPU
#model.to("cuda")
# pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer, 
#         max_new_tokens=512,
#         do_sample=True,
#         temperature=0.7,
#         top_p=0.95,
#         top_k=40,
#         repetition_penalty=1.1)

# Generate text using the model and tokenizer
#@spaces.GPU(duration=60)
def generate_text(input_text):
    input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda")
    #attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
    output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
    #output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
    return tokenizer.decode(output[0])
    #return pipe(input_text)[0]["generated_text"]

interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
interface.launch()


# Example of disabling Exllama backend (if applicable in your configuration)
#config = {"disable_exllama": True}
#model.config.update(config)

# def generate_text(prompt):
#     inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512, padding="max_length")
#     summary_ids = model.generate(inputs["input_ids"], max_new_tokens=512, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
#     return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# #for training the model after the data is collected
# #model.save_pretrained("model")
# #tokenizer.save_pretrained("model")

# #for the app functions

# def show_output_text(message):
#     history.append((message,""))
#     story = generate_text(message)
#     history[-1] = (message,story)
#     return story

# def clear_textbox():
#     return None,None

# # Créer une interface de saisie avec Gradio

# with gr.Blocks() as demo:
#     gr.Markdown("TeLLMyStory chatbot")
#     with gr.Row():
#         input_text = gr.Textbox(label="Enter your story idea here", placeholder="Once upon a time...")
#         clear_button = gr.Button("Clear",variant="secondary")
#         submit_button = gr.Button("Submit", variant="primary")

#     with gr.Row():
#         gr.Markdown("And see the story take shape here")
#         output_text = gr.Textbox(label="History")
    
#     submit_button.click(fn=show_output_text, inputs=input_text,outputs=output_text)
#     clear_button.click(fn=clear_textbox,outputs=[input_text,output_text])
# # Lancer l'interface