Leetmonkey In Action via Inference
Browse files
app.py
CHANGED
@@ -34,7 +34,7 @@ model_path = download_model(MODEL_NAME)
|
|
34 |
llm = Llama(
|
35 |
model_path=model_path,
|
36 |
n_ctx=1024,
|
37 |
-
n_threads=
|
38 |
n_gpu_layers=-1, # Use all available GPU layers
|
39 |
verbose=False,
|
40 |
n_batch=512,
|
@@ -48,11 +48,11 @@ train_dataset = dataset["train"]
|
|
48 |
|
49 |
# Generation parameters
|
50 |
generation_kwargs = {
|
51 |
-
"max_tokens":
|
52 |
"stop": ["```", "### Instruction:", "### Response:"],
|
53 |
"echo": False,
|
54 |
-
"temperature": 0.
|
55 |
-
"top_k":
|
56 |
"top_p": 0.9,
|
57 |
"repeat_penalty": 1.1
|
58 |
}
|
|
|
34 |
llm = Llama(
|
35 |
model_path=model_path,
|
36 |
n_ctx=1024,
|
37 |
+
n_threads=8,
|
38 |
n_gpu_layers=-1, # Use all available GPU layers
|
39 |
verbose=False,
|
40 |
n_batch=512,
|
|
|
48 |
|
49 |
# Generation parameters
|
50 |
generation_kwargs = {
|
51 |
+
"max_tokens": 512,
|
52 |
"stop": ["```", "### Instruction:", "### Response:"],
|
53 |
"echo": False,
|
54 |
+
"temperature": 0.05,
|
55 |
+
"top_k": 10,
|
56 |
"top_p": 0.9,
|
57 |
"repeat_penalty": 1.1
|
58 |
}
|