llama_tokenizer / app.py
teragron's picture
Update app.py
a2cc32e
raw
history blame
1.14 kB
import gradio as gr
from transformers import LlamaTokenizer
import io
import json
# Load the tokenizer from the specific folder
tokenizer = LlamaTokenizer.from_pretrained("llama_tokenizer")
def tokenize(prompt, file):
if file is not None:
# Read the contents of the uploaded file
content = file.read()
# Assuming the uploaded file is in JSON format, you can modify this as needed.
data = json.loads(content)
# Combine the prompt with the contents of the file
full_text = prompt + "\n" + data.get("text", "")
else:
full_text = prompt
tokens = tokenizer.encode(full_text, add_special_tokens=False)
num_tokens = len(tokens)
return num_tokens
with gr.Blocks() as demo:
gr.Markdown(
"""
# Token Counter for LLAMA
""")
with gr.Row():
inp = gr.Textbox(placeholder="Enter prompt")
file_input = gr.File(label="Upload File", type="file")
with gr.Column():
out = gr.Textbox(label="Number of tokens")
btn = gr.Button("Run")
btn.click(fn=tokenize, inputs=[inp, file_input], outputs=out)
demo.launch()