from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from transformers.utils import logging
import gradio as gr
#import spaces

# Define the logger instance for the transformers library
logger = logging.get_logger("transformers")

# Load the model and tokenizer
model_name = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" #"openai-community/gpt2" or "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ" or "TheBloke/Llama-2-7B-Chat-GGML" or "TheBloke/zephyr-7B-beta-GPTQ"
tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",trust_remote_code=False,revision="main")
#tokenizer.pad_token_id = tokenizer.eos_token_id

#transfer model on GPU
#model.to("cuda")
pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer, 
        max_new_tokens=512,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        top_k=40,
        repetition_penalty=1.1)

# Generate text using the model and tokenizer
#@spaces.GPU(duration=60)
def generate_text(input_text):
    #input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda")
    #attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
    #output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
    #output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
    #return tokenizer.decode(output[0])
    return pipe(input_text)[0]["generated_text"]

interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
interface.launch()


# Example of disabling Exllama backend (if applicable in your configuration)
#config = {"disable_exllama": True}
#model.config.update(config)

# def generate_text(prompt):
#     inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512, padding="max_length")
#     summary_ids = model.generate(inputs["input_ids"], max_new_tokens=512, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
#     return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# #for training the model after the data is collected
# #model.save_pretrained("model")
# #tokenizer.save_pretrained("model")

# #for the app functions

# def show_output_text(message):
#     history.append((message,""))
#     story = generate_text(message)
#     history[-1] = (message,story)
#     return story

# def clear_textbox():
#     return None,None

# # Créer une interface de saisie avec Gradio

# with gr.Blocks() as demo:
#     gr.Markdown("TeLLMyStory chatbot")
#     with gr.Row():
#         input_text = gr.Textbox(label="Enter your story idea here", placeholder="Once upon a time...")
#         clear_button = gr.Button("Clear",variant="secondary")
#         submit_button = gr.Button("Submit", variant="primary")

#     with gr.Row():
#         gr.Markdown("And see the story take shape here")
#         output_text = gr.Textbox(label="History")
    
#     submit_button.click(fn=show_output_text, inputs=input_text,outputs=output_text)
#     clear_button.click(fn=clear_textbox,outputs=[input_text,output_text])
# # Lancer l'interface