Spaces:

PartAI
/

Dorna-Llama3-8B-Instruct

Running on L4

tabedini commited on Jul 20

Commit

a4ba217

•

1 Parent(s): 7b824e3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,6 +10,10 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import time
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@@ -54,6 +58,9 @@ langfuse = Langfuse(
 )
 def execution_time_calculator(start_time, log=True):
     delta = time.time() - start_time
     if log:
@@ -123,6 +130,7 @@ def generate(
         temperature=temperature,
         num_beams=1,
         repetition_penalty=repetition_penalty,
     )
     start_time = time.time()
@@ -227,3 +235,4 @@ with gr.Blocks(css=custom_css, fill_height=False) as demo:
 if __name__ == "__main__":
     demo.queue(max_size=20).launch()

 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import time
+from utils import load_list_from_json
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 )
+REJECTED_VOCAB = load_list_from_json("rejected_vocab_extended.json")
 def execution_time_calculator(start_time, log=True):
     delta = time.time() - start_time
     if log:
         temperature=temperature,
         num_beams=1,
         repetition_penalty=repetition_penalty,
+        bad_words_ids=REJECTED_VOCAB,
     )
     start_time = time.time()
 if __name__ == "__main__":
     demo.queue(max_size=20).launch()