lukestanley commited on
Commit
88e6118
1 Parent(s): 358cd20

Add n_gpu_layers parameter to Llama initialization

Browse files
Files changed (1) hide show
  1. utils.py +1 -1
utils.py CHANGED
@@ -35,7 +35,7 @@ else:
35
 
36
  if in_memory_llm is None and USE_HTTP_SERVER is False:
37
  print("Loading model into memory. If you didn't want this, set the USE_HTTP_SERVER environment variable to 'true'.")
38
- in_memory_llm = Llama(model_path=LLM_MODEL_PATH, n_ctx=4096)
39
 
40
  def llm_streaming(
41
  prompt: str, pydantic_model_class, return_pydantic_object=False
 
35
 
36
  if in_memory_llm is None and USE_HTTP_SERVER is False:
37
  print("Loading model into memory. If you didn't want this, set the USE_HTTP_SERVER environment variable to 'true'.")
38
+ in_memory_llm = Llama(model_path=LLM_MODEL_PATH, n_ctx=4096, n_gpu_layers=20)
39
 
40
  def llm_streaming(
41
  prompt: str, pydantic_model_class, return_pydantic_object=False