teragron commited on
Commit
a2cc32e
·
1 Parent(s): 04a6c29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -13
app.py CHANGED
@@ -1,34 +1,37 @@
1
  import gradio as gr
2
  from transformers import LlamaTokenizer
 
3
  import json
4
 
5
  # Load the tokenizer from the specific folder
6
  tokenizer = LlamaTokenizer.from_pretrained("llama_tokenizer")
7
 
8
- def tokenize(input_text):
9
- tokens = tokenizer.encode(input_text, add_special_tokens=False)
 
 
 
 
 
 
 
 
 
 
10
  num_tokens = len(tokens)
11
  return num_tokens
12
 
13
- def count_tokens_from_file(file_path):
14
- try:
15
- with open(file_path, 'r') as file:
16
- input_text = file.read()
17
- num_tokens = tokenize(input_text)
18
- return num_tokens
19
- except Exception as e:
20
- return str(e)
21
-
22
  with gr.Blocks() as demo:
23
  gr.Markdown(
24
  """
25
  # Token Counter for LLAMA
26
  """)
27
  with gr.Row():
28
- inp = gr.Textbox(placeholder="Enter prompt or file path")
 
29
  with gr.Column():
30
  out = gr.Textbox(label="Number of tokens")
31
  btn = gr.Button("Run")
32
- btn.click(fn=count_tokens_from_file, inputs=inp, outputs=out)
33
 
34
  demo.launch()
 
1
  import gradio as gr
2
  from transformers import LlamaTokenizer
3
+ import io
4
  import json
5
 
6
  # Load the tokenizer from the specific folder
7
  tokenizer = LlamaTokenizer.from_pretrained("llama_tokenizer")
8
 
9
+ def tokenize(prompt, file):
10
+ if file is not None:
11
+ # Read the contents of the uploaded file
12
+ content = file.read()
13
+ # Assuming the uploaded file is in JSON format, you can modify this as needed.
14
+ data = json.loads(content)
15
+ # Combine the prompt with the contents of the file
16
+ full_text = prompt + "\n" + data.get("text", "")
17
+ else:
18
+ full_text = prompt
19
+
20
+ tokens = tokenizer.encode(full_text, add_special_tokens=False)
21
  num_tokens = len(tokens)
22
  return num_tokens
23
 
 
 
 
 
 
 
 
 
 
24
  with gr.Blocks() as demo:
25
  gr.Markdown(
26
  """
27
  # Token Counter for LLAMA
28
  """)
29
  with gr.Row():
30
+ inp = gr.Textbox(placeholder="Enter prompt")
31
+ file_input = gr.File(label="Upload File", type="file")
32
  with gr.Column():
33
  out = gr.Textbox(label="Number of tokens")
34
  btn = gr.Button("Run")
35
+ btn.click(fn=tokenize, inputs=[inp, file_input], outputs=out)
36
 
37
  demo.launch()