who-is-leo / app.py
j2moreno's picture
Update
936709d
raw
history blame
2.7 kB
import gradio as gr
# def greet(name):
# return "Hello " + name + "!!"
# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
# iface.launch()
# import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, pipeline, set_seed
device = "cuda:0" if torch.cuda.is_available() else "cpu"
repo_id = "j2moreno/test-model"
model = AutoModelForCausalLM.from_pretrained(repo_id).to(device)
tokenizer = AutoTokenizer.from_pretrained(repo_id)
SEED = 42
default_text = "Ask me about Leonardo Moreno"
title = "Who is Leonardo Moreno"
### This Space demonstrates model [Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) by Meta, a Llama 2 model with 70B parameters fine-tuned for chat instructions. This space is running on Inference Endpoints using text-generation-inference library. If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://ui.endpoints.huggingface.co/).
description = """
This Space is... [placeholder]
"""
css = """.toast-wrap { display: none !important } """
examples=[
['Who is Leonardo Moreno?'],
['Describe Leonardo Moreno\'s professional background.'],
['What projects has Leonardo Moreno worked on?'],
["What are Leonardo Moreno's core technical skills?"],
['How has Leonardo Moreno integrated AI in his work?'],
]
# def vote(data: gr.LikeData):
# if data.liked:
# print("You upvoted this response: " + data.value)
# else:
# print("You downvoted this response: " + data.value)
# @spaces.GPU
def generate_response(message, chatbot, system_prompt="",):
set_seed(SEED)
tokenized_prompt = tokenizer(message, return_tensors="pt", padding=True, truncation=True, max_length=128)
print(tokenized_prompt)
output_sequences = model.generate(**tokenized_prompt, max_length=1024, num_return_sequences=1)
decoded_output = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
print(decoded_output)
yield decoded_output
chatbot_stream = gr.Chatbot()
chat_interface_stream = gr.ChatInterface(generate_response,
title=title,
description=description,
textbox=gr.Textbox(),
chatbot=chatbot_stream,
css=css,
examples=examples,
#cache_examples=True,
#additional_inputs=additional_inputs,
)
# Gradio Demo
with gr.Blocks() as demo:
# streaming chatbot
# chatbot_stream.like(vote, None, None)
chat_interface_stream.render()
if __name__ == "__main__":
demo.queue().launch(share=True)