Spaces:

teragron
/

llama_tokenizer

Runtime error

teragron commited on Oct 8, 2023

Commit

a2cc32e

1 Parent(s): 04a6c29

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,34 +1,37 @@
 import gradio as gr
 from transformers import LlamaTokenizer
 import json
 # Load the tokenizer from the specific folder
 tokenizer = LlamaTokenizer.from_pretrained("llama_tokenizer")
-def tokenize(input_text):
-    tokens = tokenizer.encode(input_text, add_special_tokens=False)
     num_tokens = len(tokens)
     return num_tokens
-def count_tokens_from_file(file_path):
-    try:
-        with open(file_path, 'r') as file:
-            input_text = file.read()
-            num_tokens = tokenize(input_text)
-            return num_tokens
-    except Exception as e:
-        return str(e)
 with gr.Blocks() as demo:
     gr.Markdown(
     """
     # Token Counter for LLAMA
     """)
     with gr.Row():
-        inp = gr.Textbox(placeholder="Enter prompt or file path")
         with gr.Column():
             out = gr.Textbox(label="Number of tokens")
             btn = gr.Button("Run")
-    btn.click(fn=count_tokens_from_file, inputs=inp, outputs=out)
 demo.launch()

 import gradio as gr
 from transformers import LlamaTokenizer
+import io
 import json
 # Load the tokenizer from the specific folder
 tokenizer = LlamaTokenizer.from_pretrained("llama_tokenizer")
+def tokenize(prompt, file):
+    if file is not None:
+        # Read the contents of the uploaded file
+        content = file.read()
+        # Assuming the uploaded file is in JSON format, you can modify this as needed.
+        data = json.loads(content)
+        # Combine the prompt with the contents of the file
+        full_text = prompt + "\n" + data.get("text", "")
+    else:
+        full_text = prompt
+    tokens = tokenizer.encode(full_text, add_special_tokens=False)
     num_tokens = len(tokens)
     return num_tokens
 with gr.Blocks() as demo:
     gr.Markdown(
     """
     # Token Counter for LLAMA
     """)
     with gr.Row():
+        inp = gr.Textbox(placeholder="Enter prompt")
+        file_input = gr.File(label="Upload File", type="file")
         with gr.Column():
             out = gr.Textbox(label="Number of tokens")
             btn = gr.Button("Run")
+    btn.click(fn=tokenize, inputs=[inp, file_input], outputs=out)
 demo.launch()