Spaces:

TeLLMyStory
/

story-generation-docker

Paused

App Files Files Community

ThomasBlumet commited on Jan 9

Commit

2616382

1 Parent(s): 4faf856

change model

Browse files

Files changed (1) hide show

app.py +19 -12

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, GPT2Model, GPT2Tokenizer
 from transformers.utils import logging
 import gradio as gr
 #import spaces
@@ -7,15 +7,20 @@ import gradio as gr
 logger = logging.get_logger("transformers")
 # Load the model and tokenizer
-# model_name = "openai-community/gpt2" #"openai-community/gpt2" or "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" or "TheBloke/Llama-2-7B-Chat-GGML" or "TheBloke/zephyr-7B-beta-GPTQ"
-# tokenizer = AutoTokenizer.from_pretrained(model_name)#,use_fast=True
-# model = AutoModelForCausalLM.from_pretrained(model_name)#,device_map="auto",trust_remote_code=False,revision="main")
-# tokenizer.pad_token_id = tokenizer.eos_token_id
 #transfer model on GPU
 #model.to("cuda")
-tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
-model = GPT2Model.from_pretrained('gpt2')
 # Generate text using the model and tokenizer
 #@spaces.GPU(duration=60)
@@ -25,9 +30,11 @@ def generate_text(input_text):
     #output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
     #output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
     #return tokenizer.decode(output[0])
-    encoded_input = tokenizer(input_text, return_tensors='pt')
-    output = model(**encoded_input)
-    return output
 # Example of disabling Exllama backend (if applicable in your configuration)
 #config = {"disable_exllama": True}
@@ -54,7 +61,7 @@ def generate_text(input_text):
 #     return None,None
 # # Créer une interface de saisie avec Gradio
-interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
 # with gr.Blocks() as demo:
 #     gr.Markdown("TeLLMyStory chatbot")
 #     with gr.Row():
@@ -69,5 +76,5 @@ interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="
 #     submit_button.click(fn=show_output_text, inputs=input_text,outputs=output_text)
 #     clear_button.click(fn=clear_textbox,outputs=[input_text,output_text])
 # # Lancer l'interface
-interface.launch()

+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 from transformers.utils import logging
 import gradio as gr
 #import spaces
 logger = logging.get_logger("transformers")
 # Load the model and tokenizer
+model_name = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" #"openai-community/gpt2" or "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" or "TheBloke/Llama-2-7B-Chat-GGML" or "TheBloke/zephyr-7B-beta-GPTQ"
+tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=True)
+model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",trust_remote_code=False,revision="main")
+#tokenizer.pad_token_id = tokenizer.eos_token_id
 #transfer model on GPU
 #model.to("cuda")
+pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer,
+        max_new_tokens=512,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.95,
+        top_k=40,
+        repetition_penalty=1.1)
 # Generate text using the model and tokenizer
 #@spaces.GPU(duration=60)
     #output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
     #output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
     #return tokenizer.decode(output[0])
+    return pipe(input_text)[0]["generated_text"]
+interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
+interface.launch()
 # Example of disabling Exllama backend (if applicable in your configuration)
 #config = {"disable_exllama": True}
 #     return None,None
 # # Créer une interface de saisie avec Gradio
 # with gr.Blocks() as demo:
 #     gr.Markdown("TeLLMyStory chatbot")
 #     with gr.Row():
 #     submit_button.click(fn=show_output_text, inputs=input_text,outputs=output_text)
 #     clear_button.click(fn=clear_textbox,outputs=[input_text,output_text])
 # # Lancer l'interface