sugiv commited on
Commit
70f3c4a
1 Parent(s): 14e48d1

Reverting to max one minutes setting

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -35,9 +35,9 @@ llm = Llama(
35
  model_path=model_path,
36
  n_ctx=1024,
37
  n_threads=8,
38
- n_gpu_layers=1, # Use all available GPU layers
39
  verbose=False,
40
- n_batch=1024,
41
  mlock=True
42
  )
43
  logger.info("8-bit model loaded successfully")
@@ -48,11 +48,11 @@ train_dataset = dataset["train"]
48
 
49
  # Generation parameters
50
  generation_kwargs = {
51
- "max_tokens": 256,
52
  "stop": ["```", "### Instruction:", "### Response:"],
53
  "echo": False,
54
- "temperature": 0.01,
55
- "top_k": 5,
56
  "top_p": 0.9,
57
  "repeat_penalty": 1.1
58
  }
 
35
  model_path=model_path,
36
  n_ctx=1024,
37
  n_threads=8,
38
+ n_gpu_layers=-1, # Use all available GPU layers
39
  verbose=False,
40
+ n_batch=512,
41
  mlock=True
42
  )
43
  logger.info("8-bit model loaded successfully")
 
48
 
49
  # Generation parameters
50
  generation_kwargs = {
51
+ "max_tokens": 512,
52
  "stop": ["```", "### Instruction:", "### Response:"],
53
  "echo": False,
54
+ "temperature": 0.05,
55
+ "top_k": 10,
56
  "top_p": 0.9,
57
  "repeat_penalty": 1.1
58
  }