teragron commited on
Commit
9940f62
·
1 Parent(s): a2cc32e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -13
app.py CHANGED
@@ -6,16 +6,12 @@ import json
6
  # Load the tokenizer from the specific folder
7
  tokenizer = LlamaTokenizer.from_pretrained("llama_tokenizer")
8
 
9
- def tokenize(prompt, file):
10
- if file is not None:
11
- # Read the contents of the uploaded file
12
- content = file.read()
13
- # Assuming the uploaded file is in JSON format, you can modify this as needed.
14
- data = json.loads(content)
15
- # Combine the prompt with the contents of the file
16
- full_text = prompt + "\n" + data.get("text", "")
17
  else:
18
- full_text = prompt
19
 
20
  tokens = tokenizer.encode(full_text, add_special_tokens=False)
21
  num_tokens = len(tokens)
@@ -27,11 +23,11 @@ with gr.Blocks() as demo:
27
  # Token Counter for LLAMA
28
  """)
29
  with gr.Row():
30
- inp = gr.Textbox(placeholder="Enter prompt")
31
- file_input = gr.File(label="Upload File", type="file")
32
  with gr.Column():
33
  out = gr.Textbox(label="Number of tokens")
34
- btn = gr.Button("Run")
35
- btn.click(fn=tokenize, inputs=[inp, file_input], outputs=out)
36
 
37
  demo.launch()
 
6
  # Load the tokenizer from the specific folder
7
  tokenizer = LlamaTokenizer.from_pretrained("llama_tokenizer")
8
 
9
+ def tokenize(input_text, file=None):
10
+ if file:
11
+ with open(file, encoding="utf-8") as f:
12
+ full_text = "".join(f.readlines())
 
 
 
 
13
  else:
14
+ full_text = input_text
15
 
16
  tokens = tokenizer.encode(full_text, add_special_tokens=False)
17
  num_tokens = len(tokens)
 
23
  # Token Counter for LLAMA
24
  """)
25
  with gr.Row():
26
+ text_input = gr.Textbox(placeholder="Enter prompt")
27
+ file_input = gr.File(label="Upload File", type="filepath")
28
  with gr.Column():
29
  out = gr.Textbox(label="Number of tokens")
30
+ run_btn = gr.Button("Run")
31
+ run_btn.click(fn=tokenize, inputs=[text_input, file_input], outputs=out)
32
 
33
  demo.launch()