Spaces:

teragron
/

llama_tokenizer

Runtime error

File size: 1,141 Bytes

25e2a11
 
a2cc32e
04a6c29
25e2a11
 
 
 
a2cc32e
 
 
 
 
 
 
 
 
 
 
 
25e2a11
22e320d
25e2a11
 
22e320d
 
 
 
25e2a11
a2cc32e
 
22e320d
 
 
a2cc32e
25e2a11
04a6c29

import gradio as gr
from transformers import LlamaTokenizer
import io
import json

# Load the tokenizer from the specific folder
tokenizer = LlamaTokenizer.from_pretrained("llama_tokenizer")

def tokenize(prompt, file):
    if file is not None:
        # Read the contents of the uploaded file
        content = file.read()
        # Assuming the uploaded file is in JSON format, you can modify this as needed.
        data = json.loads(content)
        # Combine the prompt with the contents of the file
        full_text = prompt + "\n" + data.get("text", "")
    else:
        full_text = prompt

    tokens = tokenizer.encode(full_text, add_special_tokens=False)
    num_tokens = len(tokens)
    return num_tokens

with gr.Blocks() as demo:
    gr.Markdown(
    """
    # Token Counter for LLAMA
    """)
    with gr.Row():
        inp = gr.Textbox(placeholder="Enter prompt")
        file_input = gr.File(label="Upload File", type="file")
        with gr.Column():
            out = gr.Textbox(label="Number of tokens")
            btn = gr.Button("Run")
    btn.click(fn=tokenize, inputs=[inp, file_input], outputs=out)

demo.launch()