Sean-Case commited on
Commit
114048b
·
1 Parent(s): 9795699

gpu_layers should now update correctly. Added code for creating distribution.

Browse files
Files changed (5) hide show
  1. .gitignore +3 -1
  2. app.py +23 -9
  3. bootstrapper.py +63 -0
  4. chatfuncs/chatfuncs.py +26 -30
  5. requirements.txt +0 -1
.gitignore CHANGED
@@ -1,3 +1,5 @@
1
  *.pyc
2
  *.ipynb
3
- *.pdf
 
 
 
1
  *.pyc
2
  *.ipynb
3
+ *.pdf
4
+ */build
5
+ */dist
app.py CHANGED
@@ -2,6 +2,7 @@
2
 
3
  # +
4
  import os
 
5
 
6
  # Need to overwrite version of gradio present in Huggingface spaces as it doesn't have like buttons/avatars (Oct 2023)
7
  #os.system("pip uninstall -y gradio")
@@ -69,18 +70,31 @@ import chatfuncs.chatfuncs as chatf
69
  chatf.embeddings = load_embeddings(embeddings_name)
70
  chatf.vectorstore = get_faiss_store(faiss_vstore_folder="faiss_embedding",embeddings=globals()["embeddings"])
71
 
 
 
72
 
 
 
 
 
 
 
 
73
 
74
- def load_model(model_type, gpu_layers, CtransInitConfig_gpu=chatf.CtransInitConfig_gpu, CtransInitConfig_cpu=chatf.CtransInitConfig_cpu, torch_device=chatf.torch_device):
75
- print("Loading model")
76
  if model_type == "Orca Mini":
77
- CtransInitConfig_gpu.gpu_layers = gpu_layers
78
- CtransInitConfig_cpu.gpu_layers = gpu_layers
 
 
 
 
 
 
79
 
80
  try:
81
- model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(CtransInitConfig_gpu()))
82
  except:
83
- model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(CtransInitConfig_cpu()))
84
 
85
  tokenizer = []
86
 
@@ -119,10 +133,10 @@ def load_model(model_type, gpu_layers, CtransInitConfig_gpu=chatf.CtransInitConf
119
  # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
120
  model_type = "Orca Mini"
121
 
122
- load_model(model_type, chatf.gpu_layers, chatf.CtransInitConfig_gpu, chatf.CtransInitConfig_cpu, chatf.torch_device)
123
 
124
  model_type = "Flan Alpaca"
125
- load_model(model_type, 0, chatf.CtransInitConfig_gpu, chatf.CtransInitConfig_cpu, chatf.torch_device)
126
 
127
  def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
128
 
@@ -207,7 +221,7 @@ with block:
207
 
208
  with gr.Tab("Advanced features"):
209
  model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
210
- gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (please don't change if you don't know what you're doing).", value=0, minimum=0, maximum=6, step = 1)
211
 
212
  gr.HTML(
213
  "<center>This app is based on the models Flan Alpaca and Orca Mini. It powered by Gradio, Transformers, Ctransformers, and Langchain.</a></center>"
 
2
 
3
  # +
4
  import os
5
+ import copy
6
 
7
  # Need to overwrite version of gradio present in Huggingface spaces as it doesn't have like buttons/avatars (Oct 2023)
8
  #os.system("pip uninstall -y gradio")
 
70
  chatf.embeddings = load_embeddings(embeddings_name)
71
  chatf.vectorstore = get_faiss_store(faiss_vstore_folder="faiss_embedding",embeddings=globals()["embeddings"])
72
 
73
+ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_device=None):
74
+ print("Loading model")
75
 
76
+ # Default values inside the function
77
+ if gpu_config is None:
78
+ gpu_config = chatf.gpu_config
79
+ if cpu_config is None:
80
+ cpu_config = chatf.cpu_config
81
+ if torch_device is None:
82
+ torch_device = chatf.torch_device
83
 
 
 
84
  if model_type == "Orca Mini":
85
+
86
+ gpu_config.update_gpu(gpu_layers)
87
+ cpu_config.update_gpu(gpu_layers)
88
+
89
+ print("Loading with", cpu_config.gpu_layers, "model layers sent to GPU.")
90
+
91
+ print(vars(gpu_config))
92
+ print(vars(cpu_config))
93
 
94
  try:
95
+ model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
96
  except:
97
+ model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(gpu_config)) #**asdict(CtransRunConfig_gpu())
98
 
99
  tokenizer = []
100
 
 
133
  # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
134
  model_type = "Orca Mini"
135
 
136
+ load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
137
 
138
  model_type = "Flan Alpaca"
139
+ load_model(model_type, 0, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
140
 
141
  def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
142
 
 
221
 
222
  with gr.Tab("Advanced features"):
223
  model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
224
+ gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (please don't change if you don't know what you're doing).", value=0, minimum=0, maximum=6, step = 1, scale = 0)
225
 
226
  gr.HTML(
227
  "<center>This app is based on the models Flan Alpaca and Orca Mini. It powered by Gradio, Transformers, Ctransformers, and Langchain.</a></center>"
bootstrapper.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import subprocess
4
+ import logging
5
+
6
+ # Set up logging
7
+ logging.basicConfig(filename='bootstrapper.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
8
+
9
+ ENV_DIR = "app_env"
10
+
11
+ def create_virtual_env():
12
+ logging.info("Checking for virtual environment at {}".format(ENV_DIR))
13
+
14
+ if not os.path.exists(ENV_DIR):
15
+ logging.info("Virtual environment not found. Creating a new one.")
16
+
17
+ # Import virtualenv and create a new environment
18
+ import virtualenv
19
+ virtualenv.create_environment(ENV_DIR)
20
+
21
+ def install_dependencies():
22
+ logging.info("Installing dependencies.")
23
+
24
+ # Ensure the requirements.txt file is bundled with your application
25
+ requirements_path = "requirements.txt"
26
+
27
+ # pip executable within the virtual environment
28
+ pip_path = os.path.join(ENV_DIR, 'Scripts', 'pip')
29
+
30
+ try:
31
+ subprocess.check_call([pip_path, "install", "-r", requirements_path])
32
+ logging.info("Dependencies installed successfully.")
33
+ except Exception as e:
34
+ logging.error("Error installing dependencies: {}".format(e))
35
+
36
+ def main():
37
+ #try:
38
+ # create_virtual_env()
39
+ #except Exception as e:
40
+ # logging.error("An error occurred in the bootstrapper: {}".format(e), exc_info=True)
41
+
42
+ try:
43
+ import langchain
44
+ except ImportError:
45
+ logging.warning("Some dependencies are missing. Attempting to install.")
46
+ install_dependencies()
47
+
48
+ # Now you can run your main application logic.
49
+ # If it's in another file, you can use exec as shown before.
50
+ try:
51
+ with open('app.py', 'r') as file:
52
+ exec(file.read())
53
+ logging.info("Main application executed successfully.")
54
+ except Exception as e:
55
+ logging.error("Error executing main application: {}".format(e))
56
+
57
+ if __name__ == "__main__":
58
+ logging.info("Bootstrapper started.")
59
+ try:
60
+ main()
61
+ logging.info("Bootstrapper finished.")
62
+ except Exception as e:
63
+ logging.error("An error occurred in the bootstrapper: {}".format(e))
chatfuncs/chatfuncs.py CHANGED
@@ -95,38 +95,34 @@ context_length:int = 4096
95
  sample = True
96
 
97
 
98
- @dataclass
99
  class CtransInitConfig_gpu:
100
- temperature: float = temperature
101
- top_k: int = top_k
102
- top_p: float = top_p
103
- repetition_penalty: float = repetition_penalty
104
- last_n_tokens: int = last_n_tokens
105
- max_new_tokens: int = max_new_tokens
106
- seed: int = seed
107
- reset: bool = reset
108
- stream: bool = stream
109
- threads: int = threads
110
- batch_size:int = batch_size
111
- context_length:int = context_length
112
- gpu_layers:int = gpu_layers
113
- #stop: list[str] = field(default_factory=lambda: [stop_string])
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- class CtransInitConfig_cpu:
116
- temperature: float = temperature
117
- top_k: int = top_k
118
- top_p: float = top_p
119
- repetition_penalty: float = repetition_penalty
120
- last_n_tokens: int = last_n_tokens
121
- max_new_tokens: int = max_new_tokens
122
- seed: int = seed
123
- reset: bool = reset
124
- stream: bool = stream
125
- threads: int = threads
126
- batch_size:int = batch_size
127
- context_length:int = context_length
128
- gpu_layers:int = 0
129
- #stop: list[str] = field(default_factory=lambda: [stop_string])
130
 
131
  @dataclass
132
  class CtransGenGenerationConfig:
 
95
  sample = True
96
 
97
 
 
98
  class CtransInitConfig_gpu:
99
+ def __init__(self, temperature=0.1, top_k=3, top_p=1, repetition_penalty=1.05, last_n_tokens=64, max_new_tokens=125, seed=42, reset=False, stream=True, threads=None, batch_size=1024, context_length=4096, gpu_layers=None):
100
+ self.temperature = temperature
101
+ self.top_k = top_k
102
+ self.top_p = top_p
103
+ self.repetition_penalty = repetition_penalty
104
+ self.last_n_tokens = last_n_tokens
105
+ self.max_new_tokens = max_new_tokens
106
+ self.seed = seed
107
+ self.reset = reset
108
+ self.stream = stream
109
+ self.threads = threads
110
+ self.batch_size = batch_size
111
+ self.context_length = context_length
112
+ self.gpu_layers = gpu_layers
113
+ # self.stop: list[str] = field(default_factory=lambda: [stop_string])
114
+
115
+ def update_gpu(self, new_value):
116
+ self.gpu_layers = new_value
117
+
118
+ class CtransInitConfig_cpu(CtransInitConfig_gpu):
119
+ def __init__(self):
120
+ super().__init__()
121
+ self.gpu_layers = 0
122
+
123
+ gpu_config = CtransInitConfig_gpu()
124
+ cpu_config = CtransInitConfig_cpu()
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  @dataclass
128
  class CtransGenGenerationConfig:
requirements.txt CHANGED
@@ -6,7 +6,6 @@ transformers
6
  torch
7
  sentence_transformers
8
  faiss-cpu
9
- bitsandbytes
10
  pypdf
11
  python-docx
12
  ctransformers[cuda]
 
6
  torch
7
  sentence_transformers
8
  faiss-cpu
 
9
  pypdf
10
  python-docx
11
  ctransformers[cuda]