Elijahbodden commited on
Commit
f8f9857
·
verified ·
1 Parent(s): 3e9b037

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -27
app.py CHANGED
@@ -1,48 +1,88 @@
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
  import os
4
- os.system("pip install transformers torch")
5
- from transformers import AutoTokenizer, TextIteratorStreamer, GenerationConfig, AutoModelForCausalLM
6
- import torch
 
 
 
 
7
 
8
- model_id = "Elijahbodden/eliGPTv1.2"
 
 
 
 
 
 
 
 
 
 
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
- sober_model = AutoModelForCausalLM.from_pretrained(model_id)
11
 
12
- """
13
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
14
- """
15
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
 
18
  def respond(
19
  message,
20
  history: list[tuple[str, str]],
21
- system_message,
22
  max_tokens,
23
  temperature,
24
- top_p,
 
25
  ):
26
- messages = [{"role": "system", "content": system_message}]
27
-
28
  for val in history:
29
  if val[0]:
30
- messages.append({"role": "user", "content": val[0]})
31
  if val[1]:
32
- messages.append({"role": "assistant", "content": val[1]})
33
 
34
- messages.append({"role": "user", "content": message})
35
 
36
  response = ""
37
 
38
- for message in client.chat_completion(
39
- messages,
40
- max_tokens=max_tokens,
 
 
41
  stream=True,
42
- temperature=temperature,
43
- top_p=top_p,
 
 
 
44
  ):
45
- token = message.choices[0].delta.content
46
 
47
  response += token
48
  yield response
@@ -57,11 +97,18 @@ demo = gr.ChatInterface(
57
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
58
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
59
  gr.Slider(
60
- minimum=0.1,
 
 
 
 
 
 
 
61
  maximum=1.0,
62
- value=0.95,
63
- step=0.05,
64
- label="Top-p (nucleus sampling)",
65
  ),
66
  ],
67
  )
 
1
+ # ADD DISCLAIMERS
2
+ # AND LOGGING
3
+
4
+
5
  import gradio as gr
 
6
  import os
7
+ os.system("apt install libopenblas-dev")
8
+ os.system("make clean && LLAMA_OPENBLAS=1 make")
9
+ os.system('CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python transformers')
10
+
11
+ from llama_cpp import Llama
12
+ from transformers import AutoTokenizer
13
+ model_id = "Elijahbodden/eliGPTv1.1"
14
 
15
+ # MODEL
16
+ model = Llama.from_pretrained(
17
+ repo_id=model_id",
18
+ filename="eliGPTv1.1-unsloth.Q5_K_M.gguf",
19
+ verbose=True,
20
+ n_threads = 2,
21
+ n_threads_batch = 2,
22
+ n_ctx=8192,
23
+ )
24
+
25
+
26
+ # TOKENIZER AND TEMPLATE
27
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
28
 
29
+ sys_prompt = """SUMMARY - ELIJAH:
30
+ Age: 16
31
+ Interests: space flight, cybernetics, consciousness, philosophy, psychonautism, biotech, AI
32
+ Likes: thinking and learning, building stuff, interesting conversations, red hot chili peppers and techno, humanism
33
+ Traits: incredibly intelligent, funny, interesting, caffeine fiend, very ambitious, militant atheist, self-taught/homeschooled, casual
34
+ Aspirations: creating transhumanist utopia, understanding the universe more, becoming smarter and better\n"""
35
+
36
+ your_name = "elijah"
37
+ custom_template = \
38
+ f"{{% if {'sys_prompt' in locals()} %}}"\
39
+ "{{ '<|im_start|>system\n" + sys_prompt + "<|im_end|>\n' }}"\
40
+ "{% endif %}"\
41
+ "{% for message in messages %}"\
42
+ "{% if message['role'] == 'user' %}"\
43
+ "{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n'}}"\
44
+ "{% elif message['role'] == 'assistant' %}"\
45
+ "{{'<|im_start|>" + your_name + "\n' + message['content'] + eos_token + '<|im_end|>\n' }}"\
46
+ "{% else %}"\
47
+ "{{ '<|im_start|>system\n' + message['content'] + '<|im_end|>\n' }}"\
48
+ "{% endif %}"\
49
+ "{% endfor %}"\
50
+ "{{ '<|im_start|>" + your_name + "\n' }}"\
51
+
52
+ tokenizer.chat_template = custom_template
53
 
54
 
55
  def respond(
56
  message,
57
  history: list[tuple[str, str]],
 
58
  max_tokens,
59
  temperature,
60
+ mirostat_tau,
61
+ mirostat_eta,
62
  ):
 
 
63
  for val in history:
64
  if val[0]:
65
+ messages.append({"from": "human", "content": val[0]})
66
  if val[1]:
67
+ messages.append({"from": "gpt", "content": val[1]})
68
 
69
+ messages.append({"from": "human", "content": message})
70
 
71
  response = ""
72
 
73
+ tokenizer.apply_chat_template(messages, tokenize=False)
74
+
75
+ for message in model.create_completion(
76
+ convo,
77
+ temperature=0.75,
78
  stream=True,
79
+ stop=["<|im_end|>"],
80
+ mirostat_mode=1,
81
+ mirostat_tau=mirostat_tau,
82
+ mirostat_eta=mirostat_eta,
83
+ max_tokens=128
84
  ):
85
+ token = message["choices"][0]["text"]
86
 
87
  response += token
88
  yield response
 
97
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
98
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
99
  gr.Slider(
100
+ minimum=0.0,
101
+ maximum=10.0,
102
+ value=3.0,
103
+ step=0.5,
104
+ label="Mirostat tau",
105
+ ),
106
+ gr.Slider(
107
+ minimum=0.0,
108
  maximum=1.0,
109
+ value=0.1,
110
+ step=0.01,
111
+ label="Mirostat eta",
112
  ),
113
  ],
114
  )