Spaces:

teragron
/

llama_tokenizer

Runtime error

teragron commited on Dec 14, 2023

Commit

9940f62

1 Parent(s): a2cc32e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,16 +6,12 @@ import json
 # Load the tokenizer from the specific folder
 tokenizer = LlamaTokenizer.from_pretrained("llama_tokenizer")
-def tokenize(prompt, file):
-    if file is not None:
-        # Read the contents of the uploaded file
-        content = file.read()
-        # Assuming the uploaded file is in JSON format, you can modify this as needed.
-        data = json.loads(content)
-        # Combine the prompt with the contents of the file
-        full_text = prompt + "\n" + data.get("text", "")
     else:
-        full_text = prompt
     tokens = tokenizer.encode(full_text, add_special_tokens=False)
     num_tokens = len(tokens)
@@ -27,11 +23,11 @@ with gr.Blocks() as demo:
     # Token Counter for LLAMA
     """)
     with gr.Row():
-        inp = gr.Textbox(placeholder="Enter prompt")
-        file_input = gr.File(label="Upload File", type="file")
         with gr.Column():
             out = gr.Textbox(label="Number of tokens")
-            btn = gr.Button("Run")
-    btn.click(fn=tokenize, inputs=[inp, file_input], outputs=out)
 demo.launch()

 # Load the tokenizer from the specific folder
 tokenizer = LlamaTokenizer.from_pretrained("llama_tokenizer")
+def tokenize(input_text, file=None):
+    if file:
+        with open(file, encoding="utf-8") as f:
+            full_text = "".join(f.readlines())
     else:
+        full_text = input_text
     tokens = tokenizer.encode(full_text, add_special_tokens=False)
     num_tokens = len(tokens)
     # Token Counter for LLAMA
     """)
     with gr.Row():
+        text_input = gr.Textbox(placeholder="Enter prompt")
+        file_input = gr.File(label="Upload File", type="filepath")
         with gr.Column():
             out = gr.Textbox(label="Number of tokens")
+            run_btn = gr.Button("Run")
+    run_btn.click(fn=tokenize, inputs=[text_input, file_input], outputs=out)
 demo.launch()