Spaces:

Elijahbodden
/

llama.cpp

Running

Elijahbodden commited on May 29, 2024

Commit

747e4a2

verified ·

1 Parent(s): f0c3a0c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -76,7 +76,8 @@ def respond(
     response = ""
     print(tokenizer.apply_chat_template(messages, tokenize=False))
-    logger.info(tokenizer.apply_chat_template(messages, tokenize=False))
     convo = tokenizer.apply_chat_template(messages, tokenize=True)
     for message in model.create_completion(
@@ -97,7 +98,8 @@ def respond(
         response += token
         yield response
     print(response)
-    logger.info("RESP: " + response)
 ci = gr.ChatInterface(
     respond,
@@ -107,7 +109,7 @@ ci = gr.ChatInterface(
         # ("The model will become slow" is bc this uncaches the prompt and prompt processing is a big part of the generation time)
         gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Min_p", info="Lower values give it more \"personality\""),
         gr.Slider(minimum=0.1, maximum=4.0, value=1.5, step=0.1, label="Temperature", info="How chaotic should the model be?"),
-        gr.Slider(minimum=0, maximum=512, value=64, step=1, label="Length penalty start", info='When should the model start being more likely to shut up?'),
         gr.Slider(minimum=0.5, maximum=1.5, value=1.015, step=0.001, label="Length penalty decay factor", info='How fast should that stop likelihood increase?'),
         gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Frequency penalty", info='"Don\'repeat yourself"'),
         gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Presence penalty", info='"Use lots of diverse words"'),

     response = ""
     print(tokenizer.apply_chat_template(messages, tokenize=False))
+    with open('/data/log.txt', 'a') as f:
+        print(tokenizer.apply_chat_template(messages, tokenize=False), file=f)
     convo = tokenizer.apply_chat_template(messages, tokenize=True)
     for message in model.create_completion(
         response += token
         yield response
     print(response)
+    with open('/data/log.txt', 'a') as f:
+        print(response, file=f)
 ci = gr.ChatInterface(
     respond,
         # ("The model will become slow" is bc this uncaches the prompt and prompt processing is a big part of the generation time)
         gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Min_p", info="Lower values give it more \"personality\""),
         gr.Slider(minimum=0.1, maximum=4.0, value=1.5, step=0.1, label="Temperature", info="How chaotic should the model be?"),
+        gr.Slider(minimum=0, maximum=512, value=10, step=1, label="Length penalty start", info='When should the model start being more likely to shut up?'),
         gr.Slider(minimum=0.5, maximum=1.5, value=1.015, step=0.001, label="Length penalty decay factor", info='How fast should that stop likelihood increase?'),
         gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Frequency penalty", info='"Don\'repeat yourself"'),
         gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Presence penalty", info='"Use lots of diverse words"'),