ThomasBlumet commited on
Commit
2616382
·
1 Parent(s): 4faf856

change model

Browse files
Files changed (1) hide show
  1. app.py +19 -12
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, GPT2Model, GPT2Tokenizer
2
  from transformers.utils import logging
3
  import gradio as gr
4
  #import spaces
@@ -7,15 +7,20 @@ import gradio as gr
7
  logger = logging.get_logger("transformers")
8
 
9
  # Load the model and tokenizer
10
- # model_name = "openai-community/gpt2" #"openai-community/gpt2" or "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" or "TheBloke/Llama-2-7B-Chat-GGML" or "TheBloke/zephyr-7B-beta-GPTQ"
11
- # tokenizer = AutoTokenizer.from_pretrained(model_name)#,use_fast=True
12
- # model = AutoModelForCausalLM.from_pretrained(model_name)#,device_map="auto",trust_remote_code=False,revision="main")
13
- # tokenizer.pad_token_id = tokenizer.eos_token_id
14
 
15
  #transfer model on GPU
16
  #model.to("cuda")
17
- tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
18
- model = GPT2Model.from_pretrained('gpt2')
 
 
 
 
 
19
 
20
  # Generate text using the model and tokenizer
21
  #@spaces.GPU(duration=60)
@@ -25,9 +30,11 @@ def generate_text(input_text):
25
  #output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
26
  #output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
27
  #return tokenizer.decode(output[0])
28
- encoded_input = tokenizer(input_text, return_tensors='pt')
29
- output = model(**encoded_input)
30
- return output
 
 
31
 
32
  # Example of disabling Exllama backend (if applicable in your configuration)
33
  #config = {"disable_exllama": True}
@@ -54,7 +61,7 @@ def generate_text(input_text):
54
  # return None,None
55
 
56
  # # Créer une interface de saisie avec Gradio
57
- interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
58
  # with gr.Blocks() as demo:
59
  # gr.Markdown("TeLLMyStory chatbot")
60
  # with gr.Row():
@@ -69,5 +76,5 @@ interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="
69
  # submit_button.click(fn=show_output_text, inputs=input_text,outputs=output_text)
70
  # clear_button.click(fn=clear_textbox,outputs=[input_text,output_text])
71
  # # Lancer l'interface
72
- interface.launch()
73
 
 
1
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
2
  from transformers.utils import logging
3
  import gradio as gr
4
  #import spaces
 
7
  logger = logging.get_logger("transformers")
8
 
9
  # Load the model and tokenizer
10
+ model_name = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" #"openai-community/gpt2" or "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" or "TheBloke/Llama-2-7B-Chat-GGML" or "TheBloke/zephyr-7B-beta-GPTQ"
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=True)
12
+ model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",trust_remote_code=False,revision="main")
13
+ #tokenizer.pad_token_id = tokenizer.eos_token_id
14
 
15
  #transfer model on GPU
16
  #model.to("cuda")
17
+ pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer,
18
+ max_new_tokens=512,
19
+ do_sample=True,
20
+ temperature=0.7,
21
+ top_p=0.95,
22
+ top_k=40,
23
+ repetition_penalty=1.1)
24
 
25
  # Generate text using the model and tokenizer
26
  #@spaces.GPU(duration=60)
 
30
  #output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
31
  #output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
32
  #return tokenizer.decode(output[0])
33
+ return pipe(input_text)[0]["generated_text"]
34
+
35
+ interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
36
+ interface.launch()
37
+
38
 
39
  # Example of disabling Exllama backend (if applicable in your configuration)
40
  #config = {"disable_exllama": True}
 
61
  # return None,None
62
 
63
  # # Créer une interface de saisie avec Gradio
64
+
65
  # with gr.Blocks() as demo:
66
  # gr.Markdown("TeLLMyStory chatbot")
67
  # with gr.Row():
 
76
  # submit_button.click(fn=show_output_text, inputs=input_text,outputs=output_text)
77
  # clear_button.click(fn=clear_textbox,outputs=[input_text,output_text])
78
  # # Lancer l'interface
79
+
80