File size: 4,097 Bytes
f8f9857
 
48b4e30
f8f9857
b33166a
f234576
f8f9857
 
 
 
 
 
 
2ddacc1
f8f9857
 
db111cc
f8f9857
 
 
 
 
 
 
 
 
2ddacc1
b33166a
f8f9857
 
 
 
 
 
 
48b4e30
f8f9857
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b33166a
48b4e30
f5a76a0
48b4e30
 
 
b33166a
 
 
 
 
 
f8f9857
 
b33166a
47c25cc
48b4e30
c5c5495
b33166a
 
02c3f6c
b33166a
02c3f6c
b33166a
02c3f6c
b33166a
 
 
d0a001b
00114ec
f8f9857
 
 
b33166a
f8f9857
 
 
 
 
b33166a
f8f9857
b33166a
 
 
 
 
 
 
 
 
42ddbf6
43874bd
392925d
ae9aaf2
b33166a
 
 
 
f8f9857
 
 
 
 
 
 
 
b33166a
f8f9857
 
 
b33166a
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# ADD DISCLAIMERS
# AND LOGGING
# Q: why is this model so fucking slow? A: because i'm not made of money

import gradio as gr
import os
os.system("apt install libopenblas-dev")
os.system("make clean && LLAMA_OPENBLAS=1 make")
os.system('CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python transformers')

from llama_cpp import Llama
from transformers import AutoTokenizer
model_id = "Elijahbodden/eliGPTv1.1"

# MODEL
model = Llama.from_pretrained(
    repo_id=model_id,
    filename="eliGPTv1.1-unsloth.Q5_K_M.gguf",
    verbose=True,
    n_threads = 2,
    n_threads_batch = 2,
    n_ctx=8192,
  )


# TOKENIZER AND TEMPLATE
tokenizer = AutoTokenizer.from_pretrained(model_id)

sys_prompt = """SUMMARY - ELIJAH:
Age: 16
Interests: space flight, cybernetics, consciousness, philosophy, psychonautism, biotech, AI
Likes: thinking and learning, building stuff, interesting conversations, red hot chili peppers and techno, humanism
Traits: incredibly intelligent, funny, interesting, caffeine fiend, very ambitious, militant atheist, self-taught/homeschooled, casual
Aspirations: creating transhumanist utopia, understanding the universe more, becoming smarter and better\n"""


your_name = "elijah"
custom_template = \
    f"{{% if {'sys_prompt' in locals()} %}}"\
      "{{ '<|im_start|>system\n" + sys_prompt + "<|im_end|>\n' }}"\
    "{% endif %}"\
    "{% for message in messages %}"\
        "{% if message['role'] == 'user' %}"\
            "{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n'}}"\
        "{% elif message['role'] == 'assistant' %}"\
            "{{'<|im_start|>" + your_name + "\n' + message['content'] + eos_token + '<|im_end|>\n' }}"\
        "{% else %}"\
            "{{ '<|im_start|>system\n' + message['content'] + '<|im_end|>\n' }}"\
        "{% endif %}"\
    "{% endfor %}"\
    "{{ '<|im_start|>" + your_name + "\n' }}"\

tokenizer.chat_template = custom_template

presets = {
    "emojis" : [{"role": "user", "content": "Wait I like you so much more when you use emojis more, keep doing it πŸ˜‚\n"}, {"role": "assistant", "content": "Ummm, ok, looks like i'll be using more emojis πŸ’€\n"}],
    "none" : []
}


def respond(
    message,
    history: list[tuple[str, str]],
    max_tokens,
    temperature,
    mirostat_tau,
    mirostat_eta,
):
    preset = "none"
    
    messages = presets[preset].copy()
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    convo = tokenizer.apply_chat_template(messages, tokenize=False)
    print(convo)
    for message in model.create_completion(
        convo,
        temperature=0.75,
        stream=True,
        stop=["<|im_end|>"],
        mirostat_mode=1,
        mirostat_tau=mirostat_tau,
        mirostat_eta=mirostat_eta,
        max_tokens=128
    ):
        token = message["choices"][0]["text"]

        response += token
        yield response

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs_accordion="The juicy stuff (settings)",
    css=".bubble-gap {gap: 6px !important}",
    description="The model may take a while if it hasn't run recently or a lot of people are using it",
    title="EliGPT v1.idon'tfuckingknow",
    additional_inputs=[
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.0,
            maximum=10.0,
            value=3.0,
            step=0.5,
            label="Mirostat tau",
        ),
        gr.Slider(
            minimum=0.0,
            maximum=1.0,
            value=0.1,
            step=0.01,
            label="Mirostat eta",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()