Spaces:

j2moreno
/

who-is-leo

Runtime error

App Files Files Community

who-is-leo / app.py

j2moreno

Update

936709d 10 months ago

raw

history blame

2.7 kB

	import gradio as gr

	# def greet(name):
	# return "Hello " + name + "!!"

	# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
	# iface.launch()

	# import spaces
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, pipeline, set_seed

	device = "cuda:0" if torch.cuda.is_available() else "cpu"

	repo_id = "j2moreno/test-model"

	model = AutoModelForCausalLM.from_pretrained(repo_id).to(device)
	tokenizer = AutoTokenizer.from_pretrained(repo_id)

	SEED = 42

	default_text = "Ask me about Leonardo Moreno"
	title = "Who is Leonardo Moreno"

	### This Space demonstrates model [Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) by Meta, a Llama 2 model with 70B parameters fine-tuned for chat instructions. This space is running on Inference Endpoints using text-generation-inference library. If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://ui.endpoints.huggingface.co/).

	description = """
	This Space is... [placeholder]
	"""
	css = """.toast-wrap { display: none !important } """
	examples=[
	['Who is Leonardo Moreno?'],
	['Describe Leonardo Moreno\'s professional background.'],
	['What projects has Leonardo Moreno worked on?'],
	["What are Leonardo Moreno's core technical skills?"],
	['How has Leonardo Moreno integrated AI in his work?'],
	]

	# def vote(data: gr.LikeData):
	# if data.liked:
	# print("You upvoted this response: " + data.value)
	# else:
	# print("You downvoted this response: " + data.value)

	# @spaces.GPU
	def generate_response(message, chatbot, system_prompt="",):
	set_seed(SEED)

	tokenized_prompt = tokenizer(message, return_tensors="pt", padding=True, truncation=True, max_length=128)
	print(tokenized_prompt)

	output_sequences = model.generate(**tokenized_prompt, max_length=1024, num_return_sequences=1)
	decoded_output = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
	print(decoded_output)

	yield decoded_output

	chatbot_stream = gr.Chatbot()
	chat_interface_stream = gr.ChatInterface(generate_response,
	title=title,
	description=description,
	textbox=gr.Textbox(),
	chatbot=chatbot_stream,
	css=css,
	examples=examples,
	#cache_examples=True,
	#additional_inputs=additional_inputs,
	)

	# Gradio Demo
	with gr.Blocks() as demo:
	# streaming chatbot
	# chatbot_stream.like(vote, None, None)
	chat_interface_stream.render()

	if __name__ == "__main__":
	demo.queue().launch(share=True)