seanpedrickcase commited on
Commit
63067b7
·
1 Parent(s): 0a726d1

Adding some compatibility with Zero GPU spaces

Browse files
Files changed (3) hide show
  1. app.py +6 -0
  2. requirements.txt +2 -1
  3. tools/chatfuncs.py +2 -1
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import socket
 
3
  from tools.helper_functions import ensure_output_folder_exists, add_folder_to_path, put_columns_in_df, get_connection_params, output_folder, get_or_create_env_var, reveal_feedback_buttons, wipe_logs, model_full_names, view_table, empty_output_vars_extract_topics, empty_output_vars_summarise, RUN_LOCAL_MODEL
4
  from tools.aws_functions import upload_file_to_s3, RUN_AWS_FUNCTIONS
5
  from tools.llm_api_call import extract_topics, load_in_data_file, load_in_previous_data_files, sample_reference_table_summaries, summarise_output_topics, batch_size_default
@@ -20,6 +21,7 @@ today_rev = datetime.now().strftime("%Y%m%d")
20
  ensure_output_folder_exists()
21
 
22
  host_name = socket.gethostname()
 
23
 
24
  access_logs_data_folder = 'logs/' + today_rev + '/' + host_name + '/'
25
  feedback_data_folder = 'feedback/' + today_rev + '/' + host_name + '/'
@@ -35,11 +37,15 @@ print("Is a CUDA device available on this computer?", backends.cudnn.enabled)
35
  if cuda.is_available():
36
  torch_device = "cuda"
37
  os.system("nvidia-smi")
 
 
38
  else:
39
  torch_device = "cpu"
40
 
41
  print("Device used is: ", torch_device)
42
 
 
 
43
  def load_model(local_model_type:str, gpu_layers:int, max_context_length:int, gpu_config:llama_cpp_init_config_gpu=chatf.gpu_config, cpu_config:llama_cpp_init_config_cpu=chatf.cpu_config, torch_device:str=chatf.torch_device):
44
  '''
45
  Load in a model from Hugging Face hub via the transformers package, or using llama_cpp_python by downloading a GGUF file from Huggingface Hub.
 
1
  import os
2
  import socket
3
+ import spaces
4
  from tools.helper_functions import ensure_output_folder_exists, add_folder_to_path, put_columns_in_df, get_connection_params, output_folder, get_or_create_env_var, reveal_feedback_buttons, wipe_logs, model_full_names, view_table, empty_output_vars_extract_topics, empty_output_vars_summarise, RUN_LOCAL_MODEL
5
  from tools.aws_functions import upload_file_to_s3, RUN_AWS_FUNCTIONS
6
  from tools.llm_api_call import extract_topics, load_in_data_file, load_in_previous_data_files, sample_reference_table_summaries, summarise_output_topics, batch_size_default
 
21
  ensure_output_folder_exists()
22
 
23
  host_name = socket.gethostname()
24
+ print("host_name is:", host_name)
25
 
26
  access_logs_data_folder = 'logs/' + today_rev + '/' + host_name + '/'
27
  feedback_data_folder = 'feedback/' + today_rev + '/' + host_name + '/'
 
37
  if cuda.is_available():
38
  torch_device = "cuda"
39
  os.system("nvidia-smi")
40
+ elif "spaces" in host_name:
41
+ torch_device = "cuda"
42
  else:
43
  torch_device = "cpu"
44
 
45
  print("Device used is: ", torch_device)
46
 
47
+
48
+ @spaces.GPU
49
  def load_model(local_model_type:str, gpu_layers:int, max_context_length:int, gpu_config:llama_cpp_init_config_gpu=chatf.gpu_config, cpu_config:llama_cpp_init_config_cpu=chatf.cpu_config, torch_device:str=chatf.torch_device):
50
  '''
51
  Load in a model from Hugging Face hub via the transformers package, or using llama_cpp_python by downloading a GGUF file from Huggingface Hub.
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  pandas==2.2.3
2
  gradio==5.8.0
 
3
  boto3==1.35.71
4
  pyarrow==18.1.0
5
  openpyxl==3.1.3
@@ -10,7 +11,7 @@ google-generativeai==0.8.3
10
  html5lib==1.1
11
  beautifulsoup4==4.12.3
12
  rapidfuzz==3.10.1
13
- torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121
14
  llama-cpp-python==0.2.90 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
15
  transformers==4.47.0
16
  numpy==1.26.4
 
1
  pandas==2.2.3
2
  gradio==5.8.0
3
+ spaces==0.31.0
4
  boto3==1.35.71
5
  pyarrow==18.1.0
6
  openpyxl==3.1.3
 
11
  html5lib==1.1
12
  beautifulsoup4==4.12.3
13
  rapidfuzz==3.10.1
14
+ torch==2.4.1 --extra-index-url https://download.pytorch.org/whl/cu121
15
  llama-cpp-python==0.2.90 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
16
  transformers==4.47.0
17
  numpy==1.26.4
tools/chatfuncs.py CHANGED
@@ -5,6 +5,7 @@ from typing import TypeVar
5
  import torch.cuda
6
  from transformers import pipeline
7
  import time
 
8
 
9
  torch.cuda.empty_cache()
10
 
@@ -132,7 +133,7 @@ def llama_cpp_streaming(history, full_prompt, temperature=temperature):
132
  print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
133
  print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
134
 
135
-
136
  def call_llama_cpp_model(formatted_string, gen_config):
137
  """
138
  Calls your generation model with parameters from the CtransGenGenerationConfig object.
 
5
  import torch.cuda
6
  from transformers import pipeline
7
  import time
8
+ import spaces
9
 
10
  torch.cuda.empty_cache()
11
 
 
133
  print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
134
  print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
135
 
136
+ @spaces.GPU
137
  def call_llama_cpp_model(formatted_string, gen_config):
138
  """
139
  Calls your generation model with parameters from the CtransGenGenerationConfig object.