Spaces:
Running
on
L4
Running
on
L4
Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,10 @@ import torch
|
|
10 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
11 |
import time
|
12 |
|
|
|
|
|
|
|
|
|
13 |
MAX_MAX_NEW_TOKENS = 2048
|
14 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
15 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
@@ -54,6 +58,9 @@ langfuse = Langfuse(
|
|
54 |
)
|
55 |
|
56 |
|
|
|
|
|
|
|
57 |
def execution_time_calculator(start_time, log=True):
|
58 |
delta = time.time() - start_time
|
59 |
if log:
|
@@ -123,6 +130,7 @@ def generate(
|
|
123 |
temperature=temperature,
|
124 |
num_beams=1,
|
125 |
repetition_penalty=repetition_penalty,
|
|
|
126 |
)
|
127 |
|
128 |
start_time = time.time()
|
@@ -227,3 +235,4 @@ with gr.Blocks(css=custom_css, fill_height=False) as demo:
|
|
227 |
if __name__ == "__main__":
|
228 |
demo.queue(max_size=20).launch()
|
229 |
|
|
|
|
10 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
11 |
import time
|
12 |
|
13 |
+
from utils import load_list_from_json
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
MAX_MAX_NEW_TOKENS = 2048
|
18 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
19 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
|
58 |
)
|
59 |
|
60 |
|
61 |
+
REJECTED_VOCAB = load_list_from_json("rejected_vocab_extended.json")
|
62 |
+
|
63 |
+
|
64 |
def execution_time_calculator(start_time, log=True):
|
65 |
delta = time.time() - start_time
|
66 |
if log:
|
|
|
130 |
temperature=temperature,
|
131 |
num_beams=1,
|
132 |
repetition_penalty=repetition_penalty,
|
133 |
+
bad_words_ids=REJECTED_VOCAB,
|
134 |
)
|
135 |
|
136 |
start_time = time.time()
|
|
|
235 |
if __name__ == "__main__":
|
236 |
demo.queue(max_size=20).launch()
|
237 |
|
238 |
+
|