Spaces:
Sleeping
Sleeping
lukestanley
commited on
Commit
·
e327a9e
1
Parent(s):
e01e28e
Update default GPU layer, temperature values
Browse files
utils.py
CHANGED
@@ -19,12 +19,12 @@ from llama_cpp import Llama, LlamaGrammar, json_schema_to_gbnf
|
|
19 |
URL = "http://localhost:5834/v1/chat/completions"
|
20 |
in_memory_llm = None
|
21 |
|
22 |
-
N_GPU_LAYERS = env.get("N_GPU_LAYERS",
|
23 |
CONTEXT_SIZE = int(env.get("CONTEXT_SIZE", 4096))
|
24 |
LLM_MODEL_PATH = env.get("LLM_MODEL_PATH", None)
|
25 |
USE_HTTP_SERVER = env.get("USE_HTTP_SERVER", "false").lower() == "true"
|
26 |
MAX_TOKENS = int(env.get("MAX_TOKENS", 1000))
|
27 |
-
TEMPERATURE = float(env.get("TEMPERATURE", 0.
|
28 |
|
29 |
if LLM_MODEL_PATH and len(LLM_MODEL_PATH) > 0:
|
30 |
print(f"Using local model from {LLM_MODEL_PATH}")
|
|
|
19 |
URL = "http://localhost:5834/v1/chat/completions"
|
20 |
in_memory_llm = None
|
21 |
|
22 |
+
N_GPU_LAYERS = env.get("N_GPU_LAYERS", -1) # Default to -1, which means use all layers if available
|
23 |
CONTEXT_SIZE = int(env.get("CONTEXT_SIZE", 4096))
|
24 |
LLM_MODEL_PATH = env.get("LLM_MODEL_PATH", None)
|
25 |
USE_HTTP_SERVER = env.get("USE_HTTP_SERVER", "false").lower() == "true"
|
26 |
MAX_TOKENS = int(env.get("MAX_TOKENS", 1000))
|
27 |
+
TEMPERATURE = float(env.get("TEMPERATURE", 0.3))
|
28 |
|
29 |
if LLM_MODEL_PATH and len(LLM_MODEL_PATH) > 0:
|
30 |
print(f"Using local model from {LLM_MODEL_PATH}")
|