teragron commited on
Commit
04a6c29
·
1 Parent(s): 22e320d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -1,25 +1,34 @@
1
  import gradio as gr
2
  from transformers import LlamaTokenizer
 
3
 
4
  # Load the tokenizer from the specific folder
5
  tokenizer = LlamaTokenizer.from_pretrained("llama_tokenizer")
6
 
7
-
8
- def tokenize(prompt):
9
- tokens = tokenizer.encode(prompt, add_special_tokens=False)
10
  num_tokens = len(tokens)
11
  return num_tokens
12
 
 
 
 
 
 
 
 
 
 
13
  with gr.Blocks() as demo:
14
  gr.Markdown(
15
  """
16
  # Token Counter for LLAMA
17
  """)
18
  with gr.Row():
19
- inp = gr.Textbox(placeholder="Enter prompt")
20
  with gr.Column():
21
  out = gr.Textbox(label="Number of tokens")
22
  btn = gr.Button("Run")
23
- btn.click(fn=tokenize, inputs=inp, outputs=out)
24
 
25
- demo.launch()
 
1
  import gradio as gr
2
  from transformers import LlamaTokenizer
3
+ import json
4
 
5
  # Load the tokenizer from the specific folder
6
  tokenizer = LlamaTokenizer.from_pretrained("llama_tokenizer")
7
 
8
+ def tokenize(input_text):
9
+ tokens = tokenizer.encode(input_text, add_special_tokens=False)
 
10
  num_tokens = len(tokens)
11
  return num_tokens
12
 
13
+ def count_tokens_from_file(file_path):
14
+ try:
15
+ with open(file_path, 'r') as file:
16
+ input_text = file.read()
17
+ num_tokens = tokenize(input_text)
18
+ return num_tokens
19
+ except Exception as e:
20
+ return str(e)
21
+
22
  with gr.Blocks() as demo:
23
  gr.Markdown(
24
  """
25
  # Token Counter for LLAMA
26
  """)
27
  with gr.Row():
28
+ inp = gr.Textbox(placeholder="Enter prompt or file path")
29
  with gr.Column():
30
  out = gr.Textbox(label="Number of tokens")
31
  btn = gr.Button("Run")
32
+ btn.click(fn=count_tokens_from_file, inputs=inp, outputs=out)
33
 
34
+ demo.launch()