Spaces:
Sleeping
Sleeping
lukestanley
commited on
Commit
•
88e6118
1
Parent(s):
358cd20
Add n_gpu_layers parameter to Llama initialization
Browse files
utils.py
CHANGED
@@ -35,7 +35,7 @@ else:
|
|
35 |
|
36 |
if in_memory_llm is None and USE_HTTP_SERVER is False:
|
37 |
print("Loading model into memory. If you didn't want this, set the USE_HTTP_SERVER environment variable to 'true'.")
|
38 |
-
in_memory_llm = Llama(model_path=LLM_MODEL_PATH, n_ctx=4096)
|
39 |
|
40 |
def llm_streaming(
|
41 |
prompt: str, pydantic_model_class, return_pydantic_object=False
|
|
|
35 |
|
36 |
if in_memory_llm is None and USE_HTTP_SERVER is False:
|
37 |
print("Loading model into memory. If you didn't want this, set the USE_HTTP_SERVER environment variable to 'true'.")
|
38 |
+
in_memory_llm = Llama(model_path=LLM_MODEL_PATH, n_ctx=4096, n_gpu_layers=20)
|
39 |
|
40 |
def llm_streaming(
|
41 |
prompt: str, pydantic_model_class, return_pydantic_object=False
|