Spaces:
Runtime error
Runtime error
import gradio as gr | |
# def greet(name): | |
# return "Hello " + name + "!!" | |
# iface = gr.Interface(fn=greet, inputs="text", outputs="text") | |
# iface.launch() | |
# import spaces | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, pipeline, set_seed | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
repo_id = "j2moreno/test-model" | |
model = AutoModelForCausalLM.from_pretrained(repo_id).to(device) | |
tokenizer = AutoTokenizer.from_pretrained(repo_id) | |
SEED = 42 | |
default_text = "Ask me about Leonardo Moreno" | |
title = "Who is Leonardo Moreno" | |
### This Space demonstrates model [Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) by Meta, a Llama 2 model with 70B parameters fine-tuned for chat instructions. This space is running on Inference Endpoints using text-generation-inference library. If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://ui.endpoints.huggingface.co/). | |
description = """ | |
This Space is... [placeholder] | |
""" | |
css = """.toast-wrap { display: none !important } """ | |
examples=[ | |
['Who is Leonardo Moreno?'], | |
['Describe Leonardo Moreno\'s professional background.'], | |
['What projects has Leonardo Moreno worked on?'], | |
["What are Leonardo Moreno's core technical skills?"], | |
['How has Leonardo Moreno integrated AI in his work?'], | |
] | |
# def vote(data: gr.LikeData): | |
# if data.liked: | |
# print("You upvoted this response: " + data.value) | |
# else: | |
# print("You downvoted this response: " + data.value) | |
# @spaces.GPU | |
def generate_response(message, chatbot, system_prompt="",): | |
set_seed(SEED) | |
tokenized_prompt = tokenizer(message, return_tensors="pt", padding=True, truncation=True, max_length=128) | |
print(tokenized_prompt) | |
output_sequences = model.generate(**tokenized_prompt, max_length=1024, num_return_sequences=1) | |
decoded_output = tokenizer.decode(output_sequences[0], skip_special_tokens=True) | |
print(decoded_output) | |
yield decoded_output | |
chatbot_stream = gr.Chatbot() | |
chat_interface_stream = gr.ChatInterface(generate_response, | |
title=title, | |
description=description, | |
textbox=gr.Textbox(), | |
chatbot=chatbot_stream, | |
css=css, | |
examples=examples, | |
#cache_examples=True, | |
#additional_inputs=additional_inputs, | |
) | |
# Gradio Demo | |
with gr.Blocks() as demo: | |
# streaming chatbot | |
# chatbot_stream.like(vote, None, None) | |
chat_interface_stream.render() | |
if __name__ == "__main__": | |
demo.queue().launch(share=True) |