Spaces:

teragron
/

llama_tokenizer

Runtime error

llama_tokenizer / app.py

Update app.py

a2cc32e over 1 year ago

1.14 kB

	import gradio as gr
	from transformers import LlamaTokenizer
	import io
	import json

	# Load the tokenizer from the specific folder
	tokenizer = LlamaTokenizer.from_pretrained("llama_tokenizer")

	def tokenize(prompt, file):
	if file is not None:
	# Read the contents of the uploaded file
	content = file.read()
	# Assuming the uploaded file is in JSON format, you can modify this as needed.
	data = json.loads(content)
	# Combine the prompt with the contents of the file
	full_text = prompt + "\n" + data.get("text", "")
	else:
	full_text = prompt

	tokens = tokenizer.encode(full_text, add_special_tokens=False)
	num_tokens = len(tokens)
	return num_tokens

	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# Token Counter for LLAMA
	""")
	with gr.Row():
	inp = gr.Textbox(placeholder="Enter prompt")
	file_input = gr.File(label="Upload File", type="file")
	with gr.Column():
	out = gr.Textbox(label="Number of tokens")
	btn = gr.Button("Run")
	btn.click(fn=tokenize, inputs=[inp, file_input], outputs=out)

	demo.launch()