File size: 1,063 Bytes
25e2a11
 
6e1ffb9
 
 
25e2a11
db50e26
 
 
9a4d588
2739751
25e2a11
 
 
9940f62
 
6e1ffb9
 
a2cc32e
9940f62
a2cc32e
 
25e2a11
22e320d
25e2a11
 
22e320d
 
 
 
25e2a11
9940f62
6e1ffb9
22e320d
 
9940f62
 
25e2a11
891d7a5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import gradio as gr
from transformers import LlamaTokenizer
import io
import json


os.system("pip uninstall -y gradio")
os.system("pip install gradio==4.9.0")

print("grd version:", gr.__version__)

# Load the tokenizer from the specific folder
tokenizer = LlamaTokenizer.from_pretrained("llama_tokenizer")

def tokenize(input_text, file=None):
    if file:
        with open(file, encoding="utf-8") as f:
            full_text = "".join(f.readlines())
    else:
        full_text = input_text

    tokens = tokenizer.encode(full_text, add_special_tokens=False)
    num_tokens = len(tokens)
    return num_tokens

with gr.Blocks() as demo:
    gr.Markdown(
    """
    # Token Counter for LLAMA
    """)
    with gr.Row():
        text_input = gr.Textbox(placeholder="Enter prompt")
        file_input = gr.File(label="Upload File", type="filepath")
        with gr.Column():
            out = gr.Textbox(label="Number of tokens")
            run_btn = gr.Button("Run")
    run_btn.click(fn=tokenize, inputs=[text_input, file_input], outputs=out)

demo.launch()