Spaces:

seanpedrickcase
/

llm_topic_modelling

Running on Zero

App Files Files Community

seanpedrickcase commited on Dec 11, 2024

Commit

63067b7

1 Parent(s): 0a726d1

Adding some compatibility with Zero GPU spaces

Browse files

Files changed (3) hide show

app.py +6 -0
requirements.txt +2 -1
tools/chatfuncs.py +2 -1

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import socket
 from tools.helper_functions import ensure_output_folder_exists, add_folder_to_path, put_columns_in_df, get_connection_params, output_folder, get_or_create_env_var, reveal_feedback_buttons, wipe_logs, model_full_names, view_table, empty_output_vars_extract_topics, empty_output_vars_summarise, RUN_LOCAL_MODEL
 from tools.aws_functions import upload_file_to_s3, RUN_AWS_FUNCTIONS
 from tools.llm_api_call import extract_topics, load_in_data_file, load_in_previous_data_files, sample_reference_table_summaries, summarise_output_topics, batch_size_default
@@ -20,6 +21,7 @@ today_rev = datetime.now().strftime("%Y%m%d")
 ensure_output_folder_exists()
 host_name = socket.gethostname()
 access_logs_data_folder = 'logs/' + today_rev + '/' + host_name + '/'
 feedback_data_folder = 'feedback/' + today_rev + '/' + host_name + '/'
@@ -35,11 +37,15 @@ print("Is a CUDA device available on this computer?", backends.cudnn.enabled)
 if cuda.is_available():
     torch_device = "cuda"
     os.system("nvidia-smi")
 else:
     torch_device =  "cpu"
 print("Device used is: ", torch_device)
 def load_model(local_model_type:str, gpu_layers:int, max_context_length:int, gpu_config:llama_cpp_init_config_gpu=chatf.gpu_config, cpu_config:llama_cpp_init_config_cpu=chatf.cpu_config, torch_device:str=chatf.torch_device):
     '''
     Load in a model from Hugging Face hub via the transformers package, or using llama_cpp_python by downloading a GGUF file from Huggingface Hub.

 import os
 import socket
+import spaces
 from tools.helper_functions import ensure_output_folder_exists, add_folder_to_path, put_columns_in_df, get_connection_params, output_folder, get_or_create_env_var, reveal_feedback_buttons, wipe_logs, model_full_names, view_table, empty_output_vars_extract_topics, empty_output_vars_summarise, RUN_LOCAL_MODEL
 from tools.aws_functions import upload_file_to_s3, RUN_AWS_FUNCTIONS
 from tools.llm_api_call import extract_topics, load_in_data_file, load_in_previous_data_files, sample_reference_table_summaries, summarise_output_topics, batch_size_default
 ensure_output_folder_exists()
 host_name = socket.gethostname()
+print("host_name is:", host_name)
 access_logs_data_folder = 'logs/' + today_rev + '/' + host_name + '/'
 feedback_data_folder = 'feedback/' + today_rev + '/' + host_name + '/'
 if cuda.is_available():
     torch_device = "cuda"
     os.system("nvidia-smi")
+elif "spaces" in host_name:
+    torch_device = "cuda"
 else:
     torch_device =  "cpu"
 print("Device used is: ", torch_device)
+@spaces.GPU
 def load_model(local_model_type:str, gpu_layers:int, max_context_length:int, gpu_config:llama_cpp_init_config_gpu=chatf.gpu_config, cpu_config:llama_cpp_init_config_cpu=chatf.cpu_config, torch_device:str=chatf.torch_device):
     '''
     Load in a model from Hugging Face hub via the transformers package, or using llama_cpp_python by downloading a GGUF file from Huggingface Hub.

requirements.txt CHANGED Viewed

@@ -1,5 +1,6 @@
 pandas==2.2.3
 gradio==5.8.0
 boto3==1.35.71
 pyarrow==18.1.0
 openpyxl==3.1.3
@@ -10,7 +11,7 @@ google-generativeai==0.8.3
 html5lib==1.1
 beautifulsoup4==4.12.3
 rapidfuzz==3.10.1
-torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121
 llama-cpp-python==0.2.90 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
 transformers==4.47.0
 numpy==1.26.4

 pandas==2.2.3
 gradio==5.8.0
+spaces==0.31.0
 boto3==1.35.71
 pyarrow==18.1.0
 openpyxl==3.1.3
 html5lib==1.1
 beautifulsoup4==4.12.3
 rapidfuzz==3.10.1
+torch==2.4.1 --extra-index-url https://download.pytorch.org/whl/cu121
 llama-cpp-python==0.2.90 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
 transformers==4.47.0
 numpy==1.26.4

tools/chatfuncs.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import TypeVar
 import torch.cuda
 from transformers import pipeline
 import time
 torch.cuda.empty_cache()
@@ -132,7 +133,7 @@ def llama_cpp_streaming(history, full_prompt, temperature=temperature):
     print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
     print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
 def call_llama_cpp_model(formatted_string, gen_config):
     """
     Calls your generation model with parameters from the CtransGenGenerationConfig object.

 import torch.cuda
 from transformers import pipeline
 import time
+import spaces
 torch.cuda.empty_cache()
     print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
     print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
+@spaces.GPU
 def call_llama_cpp_model(formatted_string, gen_config):
     """
     Calls your generation model with parameters from the CtransGenGenerationConfig object.