Spaces:
Sleeping
Sleeping
Elijahbodden
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -63,12 +63,14 @@ presets = {
|
|
63 |
def respond(
|
64 |
message,
|
65 |
history: list[tuple[str, str]],
|
66 |
-
max_tokens,
|
67 |
temperature,
|
68 |
mirostat_tau,
|
69 |
mirostat_eta,
|
70 |
frequency_penalty,
|
71 |
presence_penalty,
|
|
|
|
|
|
|
72 |
preset
|
73 |
):
|
74 |
|
@@ -83,8 +85,8 @@ def respond(
|
|
83 |
|
84 |
response = ""
|
85 |
|
86 |
-
convo = tokenizer.apply_chat_template(messages, tokenize=
|
87 |
-
print(convo)
|
88 |
for message in model.create_completion(
|
89 |
convo,
|
90 |
temperature=0.75,
|
@@ -96,6 +98,7 @@ def respond(
|
|
96 |
max_tokens=128,
|
97 |
frequency_penalty=frequency_penalty,
|
98 |
presence_penalty=presence_penalty,
|
|
|
99 |
):
|
100 |
token = message["choices"][0]["text"]
|
101 |
|
@@ -112,7 +115,6 @@ demo = gr.ChatInterface(
|
|
112 |
description="The model may take a while if it hasn't run recently or a lot of people are using it",
|
113 |
title="EliGPT v1.idon'tfuckingknow",
|
114 |
additional_inputs=[
|
115 |
-
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens", info="How many words can the model generate?"),
|
116 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature", info="How chaotic should the model be?"),
|
117 |
gr.Slider(
|
118 |
minimum=0.0,
|
@@ -146,6 +148,23 @@ demo = gr.ChatInterface(
|
|
146 |
label="Presence penalty",
|
147 |
info='"Use lots of diverse words"'
|
148 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
gr.Radio(presets.keys(), label="Preset", info="Gaslight the model into acting a certain way", value="none")
|
150 |
],
|
151 |
)
|
|
|
63 |
def respond(
|
64 |
message,
|
65 |
history: list[tuple[str, str]],
|
|
|
66 |
temperature,
|
67 |
mirostat_tau,
|
68 |
mirostat_eta,
|
69 |
frequency_penalty,
|
70 |
presence_penalty,
|
71 |
+
lp_start,
|
72 |
+
lp_decay,
|
73 |
+
max_tokens,
|
74 |
preset
|
75 |
):
|
76 |
|
|
|
85 |
|
86 |
response = ""
|
87 |
|
88 |
+
convo = tokenizer.apply_chat_template(messages, tokenize=True)
|
89 |
+
# print(convo)
|
90 |
for message in model.create_completion(
|
91 |
convo,
|
92 |
temperature=0.75,
|
|
|
98 |
max_tokens=128,
|
99 |
frequency_penalty=frequency_penalty,
|
100 |
presence_penalty=presence_penalty,
|
101 |
+
logits_processor=[ExponentialDecayLengthPenalty((lp_start, lp_decay), tokenizer.eos_token, convo.size()[1])]
|
102 |
):
|
103 |
token = message["choices"][0]["text"]
|
104 |
|
|
|
115 |
description="The model may take a while if it hasn't run recently or a lot of people are using it",
|
116 |
title="EliGPT v1.idon'tfuckingknow",
|
117 |
additional_inputs=[
|
|
|
118 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature", info="How chaotic should the model be?"),
|
119 |
gr.Slider(
|
120 |
minimum=0.0,
|
|
|
148 |
label="Presence penalty",
|
149 |
info='"Use lots of diverse words"'
|
150 |
),
|
151 |
+
gr.Slider(
|
152 |
+
minimum=0,
|
153 |
+
maximum=512,
|
154 |
+
value=10,
|
155 |
+
step=1,
|
156 |
+
label="Length penalty start",
|
157 |
+
info='When should the model start being more likely to shut up?'
|
158 |
+
),
|
159 |
+
gr.Slider(
|
160 |
+
minimum=0.5,
|
161 |
+
maximum=1.5,
|
162 |
+
value=1.02,
|
163 |
+
step=0.01,
|
164 |
+
label="Length penalty decay factor",
|
165 |
+
info='How fast should the stop likelihood increase?'
|
166 |
+
),
|
167 |
+
gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens", info="How many words can the model generate?"),
|
168 |
gr.Radio(presets.keys(), label="Preset", info="Gaslight the model into acting a certain way", value="none")
|
169 |
],
|
170 |
)
|