Spaces:

Elijahbodden
/

llama.cpp

Running

App Files Files Community

Elijahbodden commited on May 26, 2024

Commit

d2a0386

verified ·

1 Parent(s): 31e2bd9

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -7

app.py CHANGED Viewed

@@ -77,8 +77,7 @@ def respond(
     temperature,
     lp_start,
     lp_decay,
-    mirostat_eta,
-    mirostat_tau,
     frequency_penalty,
     presence_penalty,
     max_tokens
@@ -101,10 +100,10 @@ def respond(
         temperature=temperature,
         stream=True,
         stop=["<|im_end|>"],
-        mirostat_mode=2,
-        mirostat_tau=mirostat_tau,
-        mirostat_eta=mirostat_eta,
         max_tokens=max_tokens,
         frequency_penalty=frequency_penalty,
         presence_penalty=presence_penalty,
         logits_processor=lambda ids, logits: custom_lp_logits_processor(ids, logits, lp_start, lp_decay, len(convo))
@@ -132,8 +131,7 @@ demo = gr.ChatInterface(
         gr.Slider(minimum=0.1, maximum=4.0, value=0.8, step=0.1, label="Temperature", info="How chaotic should the model be?"),
         gr.Slider(minimum=0, maximum=512, value=32, step=1, label="Length penalty start", info='When should the model start being more likely to shut up?'),
         gr.Slider(minimum=0.5, maximum=1.5, value=1.02, step=0.01, label="Length penalty decay factor", info='How fast should that stop likelihood increase?'),
-        gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Mirostat eta", info="How grammatical the model is or something"),
-        gr.Slider(minimum=0.0, maximum=10.0, value=3.0, step=0.5, label="Mirostat tau", info="Lower number keeps hallucinations to a minimum"),
         gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Frequency penalty", info='"Don\'repeat yourself"'),
         gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Presence penalty", info='"Use lots of diverse words"'),
         gr.Slider(minimum=1, maximum=1024, value=1024, step=1, label="Max new tokens", info="How many words can the model generate at most?"),

     temperature,
     lp_start,
     lp_decay,
+    min_p,
     frequency_penalty,
     presence_penalty,
     max_tokens
         temperature=temperature,
         stream=True,
         stop=["<|im_end|>"],
+        min_p=min_p,
         max_tokens=max_tokens,
+        # Disable top-p pruning
+        top_k=100000000,
         frequency_penalty=frequency_penalty,
         presence_penalty=presence_penalty,
         logits_processor=lambda ids, logits: custom_lp_logits_processor(ids, logits, lp_start, lp_decay, len(convo))
         gr.Slider(minimum=0.1, maximum=4.0, value=0.8, step=0.1, label="Temperature", info="How chaotic should the model be?"),
         gr.Slider(minimum=0, maximum=512, value=32, step=1, label="Length penalty start", info='When should the model start being more likely to shut up?'),
         gr.Slider(minimum=0.5, maximum=1.5, value=1.02, step=0.01, label="Length penalty decay factor", info='How fast should that stop likelihood increase?'),
+        gr.Slider(minimum=0.0, maximum=10.0, value=3.0, step=0.5, label="Min_p", info="Lower values make it more random (ratio between lowest-probability and highest-probability tokens)"),
         gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Frequency penalty", info='"Don\'repeat yourself"'),
         gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Presence penalty", info='"Use lots of diverse words"'),
         gr.Slider(minimum=1, maximum=1024, value=1024, step=1, label="Max new tokens", info="How many words can the model generate at most?"),