AFischer1985 commited on
Commit
f41f088
1 Parent(s): 66886a6

Added history

Browse files
Files changed (1) hide show
  1. run.py +80 -16
run.py CHANGED
@@ -2,7 +2,7 @@
2
  # Title: Gradio Interface to LLM-chatbot (for recommending AI) with RAG-funcionality and ChromaDB on HF-Hub
3
  # Author: Andreas Fischer
4
  # Date: December 30th, 2023
5
- # Last update: February 27th, 2024
6
  ##############################################################################################################
7
 
8
 
@@ -57,14 +57,53 @@ print(collection.count())
57
 
58
  # Model
59
  #-------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- from huggingface_hub import InferenceClient
62
- import gradio as gr
63
- modelPath="mistralai/Mixtral-8x7B-Instruct-v0.1"
64
- client = InferenceClient(
65
- modelPath
66
- #"mistralai/Mistral-7B-Instruct-v0.1"
67
- )
 
 
 
 
 
 
 
 
68
 
69
 
70
  # Gradio-GUI
@@ -72,19 +111,30 @@ client = InferenceClient(
72
 
73
  import gradio as gr
74
  import json
 
75
 
76
- def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=False):
77
  startOfString=""
78
  if zeichenlimit is None: zeichenlimit=1000000000 # :-)
79
  template0=" [INST]{system}\n [/INST] </s>"
80
  template1=" [INST] {message} [/INST]"
81
  template2=" {response}</s>"
 
 
 
 
 
82
  if("Gemma-" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
83
  template0="<start_of_turn>user{system}</end_of_turn>"
84
  template1="<start_of_turn>user{message}</end_of_turn><start_of_turn>model"
85
- template2="{response}</end_of_turn>"
86
- if("Mixtral-8x7b-instruct" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
87
  startOfString="<s>"
 
 
 
 
 
88
  template0=" [INST]{system}\n [/INST] </s>"
89
  template1=" [INST] {message} [/INST]"
90
  template2=" {response}</s>"
@@ -100,7 +150,11 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
100
  if(("Discolm_german_7b" in modelPath) or ("SauerkrautLM-7b-HerO" in modelPath)): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
101
  template0="<|im_start|>system\n{system}<|im_end|>\n"
102
  template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
103
- template2="{response}<|im_end|>\n"
 
 
 
 
104
  if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
105
  template0="{system} " #<s>
106
  template1="USER: {message} ASSISTANT: "
@@ -128,8 +182,9 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
128
  return startOfString+prompt
129
 
130
 
 
131
  def response(
132
- prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
133
  ):
134
  temperature = float(temperature)
135
  if temperature < 1e-2: temperature = 1e-2
@@ -144,7 +199,7 @@ def response(
144
  )
145
  addon=""
146
  results=collection.query(
147
- query_texts=[prompt],
148
  n_results=2,
149
  #where={"source": "google-docs"}
150
  #where_document={"$contains":"search_string"}
@@ -157,9 +212,18 @@ def response(
157
  print(combination)
158
  if(len(results)>1):
159
  addon=" Bitte berücksichtige bei deiner Antwort ggf. folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
160
- system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem, das zu jedem Anliegen möglichst geeignete KI-Tools empfiehlt."+addon+"\n\nUser-Anliegen:"
161
  #body={"prompt":system+"### Instruktion:\n"+message+"\n\n### Antwort:","max_tokens":500, "echo":"False","stream":"True"} #e.g. SauerkrautLM
162
- formatted_prompt = extend_prompt(system+"\n"+prompt, None) #history)
 
 
 
 
 
 
 
 
 
163
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
164
  output = ""
165
  for response in stream:
 
2
  # Title: Gradio Interface to LLM-chatbot (for recommending AI) with RAG-funcionality and ChromaDB on HF-Hub
3
  # Author: Andreas Fischer
4
  # Date: December 30th, 2023
5
+ # Last update: May 27th, 2024
6
  ##############################################################################################################
7
 
8
 
 
57
 
58
  # Model
59
  #-------
60
+ onPrem=False
61
+ myModel="mistralai/Mixtral-8x7B-Instruct-v0.1"
62
+ if(onPrem==False):
63
+ modelPath=myModel
64
+ from huggingface_hub import InferenceClient
65
+ import gradio as gr
66
+ client = InferenceClient(
67
+ model=modelPath,
68
+ #token="hf_..."
69
+ )
70
+ else:
71
+ import os
72
+ import requests
73
+ import subprocess
74
+ #modelPath="/home/af/gguf/models/c4ai-command-r-v01-Q4_0.gguf"
75
+ #modelPath="/home/af/gguf/models/Discolm_german_7b_v1.Q4_0.gguf"
76
+ modelPath="/home/af/gguf/models/Mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
77
+ if(os.path.exists(modelPath)==False):
78
+ #url="https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q4_0.gguf?download=true"
79
+ url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
80
+ response = requests.get(url)
81
+ with open("./Mixtral-8x7b-instruct.gguf", mode="wb") as file:
82
+ file.write(response.content)
83
+ print("Model downloaded")
84
+ modelPath="./Mixtral-8x7b-instruct.gguf"
85
+ print(modelPath)
86
+ n="20"
87
+ if("Mixtral-8x7b-instruct" in modelPath): n="0" # mixtral seems to cause problems here...
88
+ command = ["python3", "-m", "llama_cpp.server", "--model", modelPath, "--host", "0.0.0.0", "--port", "2600", "--n_threads", "8", "--n_gpu_layers", n]
89
+ subprocess.Popen(command)
90
+ print("Server ready!")
91
 
92
+
93
+ # Check template
94
+ #----------------
95
+ if(False):
96
+ from transformers import AutoTokenizer
97
+ #mod="mistralai/Mixtral-8x22B-Instruct-v0.1"
98
+ #mod="mistralai/Mixtral-8x7b-instruct-v0.1"
99
+ mod="VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct"
100
+ tok=AutoTokenizer.from_pretrained(mod) #,token="hf_...")
101
+ cha=[{"role":"system","content":"A"},{"role":"user","content":"B"},{"role":"assistant","content":"C"}]
102
+ res=tok.apply_chat_template(cha)
103
+ print(tok.decode(res))
104
+ cha=[{"role":"user","content":"U1"},{"role":"assistant","content":"A1"},{"role":"user","content":"U2"},{"role":"assistant","content":"A2"}]
105
+ res=tok.apply_chat_template(cha)
106
+ print(tok.decode(res))
107
 
108
 
109
  # Gradio-GUI
 
111
 
112
  import gradio as gr
113
  import json
114
+ import re
115
 
116
+ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=True):
117
  startOfString=""
118
  if zeichenlimit is None: zeichenlimit=1000000000 # :-)
119
  template0=" [INST]{system}\n [/INST] </s>"
120
  template1=" [INST] {message} [/INST]"
121
  template2=" {response}</s>"
122
+ if("command-r" in modelPath): #https://huggingface.co/CohereForAI/c4ai-command-r-v01
123
+ ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
124
+ template0="<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|> {system}<|END_OF_TURN_TOKEN|>"
125
+ template1="<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{message}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
126
+ template2="{response}<|END_OF_TURN_TOKEN|>"
127
  if("Gemma-" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
128
  template0="<start_of_turn>user{system}</end_of_turn>"
129
  template1="<start_of_turn>user{message}</end_of_turn><start_of_turn>model"
130
+ template2="{response}</end_of_turn>"
131
+ if("Mixtral-8x22B-Instruct" in modelPath): # AutoTokenizer: <s>[INST] U1[/INST] A1</s>[INST] U2[/INST] A2</s>
132
  startOfString="<s>"
133
+ template0="[INST]{system}\n [/INST] </s>"
134
+ template1="[INST] {message}[/INST]"
135
+ template2=" {response}</s>"
136
+ if("Mixtral-8x7b-instruct" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
137
+ startOfString="<s>" # AutoTokenzizer: <s> [INST] U1 [/INST]A1</s> [INST] U2 [/INST]A2</s>
138
  template0=" [INST]{system}\n [/INST] </s>"
139
  template1=" [INST] {message} [/INST]"
140
  template2=" {response}</s>"
 
150
  if(("Discolm_german_7b" in modelPath) or ("SauerkrautLM-7b-HerO" in modelPath)): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
151
  template0="<|im_start|>system\n{system}<|im_end|>\n"
152
  template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
153
+ template2="{response}<|im_end|>\n"
154
+ if("Llama-3-SauerkrautLM-8b-Instruct" in modelPath): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
155
+ template0="<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system}<|eot_id|>"
156
+ template1="<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
157
+ template2="{response}<|eot_id|>\n"
158
  if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
159
  template0="{system} " #<s>
160
  template1="USER: {message} ASSISTANT: "
 
182
  return startOfString+prompt
183
 
184
 
185
+
186
  def response(
187
+ message, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
188
  ):
189
  temperature = float(temperature)
190
  if temperature < 1e-2: temperature = 1e-2
 
199
  )
200
  addon=""
201
  results=collection.query(
202
+ query_texts=[message],
203
  n_results=2,
204
  #where={"source": "google-docs"}
205
  #where_document={"$contains":"search_string"}
 
212
  print(combination)
213
  if(len(results)>1):
214
  addon=" Bitte berücksichtige bei deiner Antwort ggf. folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
215
+ system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem, das zu jedem Anliegen möglichst geeignete KI-Tools empfiehlt." #+addon #+"\n\nUser-Anliegen:"
216
  #body={"prompt":system+"### Instruktion:\n"+message+"\n\n### Antwort:","max_tokens":500, "echo":"False","stream":"True"} #e.g. SauerkrautLM
217
+ #formatted_prompt = extend_prompt(system+"\n"+prompt, None) #history)
218
+ prompt=extend_prompt(
219
+ message, # current message of the user
220
+ history, # complete history
221
+ system, # system prompt
222
+ addon, # RAG-component added to the system prompt
223
+ None, # fictive first words of the AI (neither displayed nor stored)
224
+ historylimit=4, # number of past messages to consider for response to current message
225
+ removeHTML=True # remove HTML-components from History (to prevent bugs with Markdown)
226
+ )
227
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
228
  output = ""
229
  for response in stream: