Spaces:

Elijahbodden
/

llama.cpp

Sleeping

App Files Files Community

Elijahbodden commited on May 25, 2024

Commit

befbf2f

verified ·

1 Parent(s): 37a6f17

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -4

app.py CHANGED Viewed

@@ -63,12 +63,14 @@ presets = {
 def respond(
     message,
     history: list[tuple[str, str]],
-    max_tokens,
     temperature,
     mirostat_tau,
     mirostat_eta,
     frequency_penalty,
     presence_penalty,
     preset
 ):
@@ -83,8 +85,8 @@ def respond(
     response = ""
-    convo = tokenizer.apply_chat_template(messages, tokenize=False)
-    print(convo)
     for message in model.create_completion(
         convo,
         temperature=0.75,
@@ -96,6 +98,7 @@ def respond(
         max_tokens=128,
         frequency_penalty=frequency_penalty,
         presence_penalty=presence_penalty,
     ):
         token = message["choices"][0]["text"]
@@ -112,7 +115,6 @@ demo = gr.ChatInterface(
     description="The model may take a while if it hasn't run recently or a lot of people are using it",
     title="EliGPT v1.idon'tfuckingknow",
     additional_inputs=[
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens", info="How many words can the model generate?"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature", info="How chaotic should the model be?"),
         gr.Slider(
             minimum=0.0,
@@ -146,6 +148,23 @@ demo = gr.ChatInterface(
             label="Presence penalty",
             info='"Use lots of diverse words"'
         ),
         gr.Radio(presets.keys(), label="Preset", info="Gaslight the model into acting a certain way", value="none")
     ],
 )

 def respond(
     message,
     history: list[tuple[str, str]],
     temperature,
     mirostat_tau,
     mirostat_eta,
     frequency_penalty,
     presence_penalty,
+    lp_start,
+    lp_decay,
+    max_tokens,
     preset
 ):
     response = ""
+    convo = tokenizer.apply_chat_template(messages, tokenize=True)
+    # print(convo)
     for message in model.create_completion(
         convo,
         temperature=0.75,
         max_tokens=128,
         frequency_penalty=frequency_penalty,
         presence_penalty=presence_penalty,
+        logits_processor=[ExponentialDecayLengthPenalty((lp_start, lp_decay), tokenizer.eos_token, convo.size()[1])]
     ):
         token = message["choices"][0]["text"]
     description="The model may take a while if it hasn't run recently or a lot of people are using it",
     title="EliGPT v1.idon'tfuckingknow",
     additional_inputs=[
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature", info="How chaotic should the model be?"),
         gr.Slider(
             minimum=0.0,
             label="Presence penalty",
             info='"Use lots of diverse words"'
         ),
+        gr.Slider(
+            minimum=0,
+            maximum=512,
+            value=10,
+            step=1,
+            label="Length penalty start",
+            info='When should the model start being more likely to shut up?'
+        ),
+        gr.Slider(
+            minimum=0.5,
+            maximum=1.5,
+            value=1.02,
+            step=0.01,
+            label="Length penalty decay factor",
+            info='How fast should the stop likelihood increase?'
+        ),
+        gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens", info="How many words can the model generate?"),
         gr.Radio(presets.keys(), label="Preset", info="Gaslight the model into acting a certain way", value="none")
     ],
 )