sugiv commited on
Commit
5080cee
·
1 Parent(s): 8846773

Leetmonkey In Action via Inference

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -33,10 +33,12 @@ def download_model(model_name):
33
  model_path = download_model(MODEL_NAME)
34
  llm = Llama(
35
  model_path=model_path,
36
- n_ctx=2048,
37
  n_threads=4,
38
  n_gpu_layers=-1, # Use all available GPU layers
39
- verbose=False
 
 
40
  )
41
  logger.info("8-bit model loaded successfully")
42
 
@@ -46,12 +48,12 @@ train_dataset = dataset["train"]
46
 
47
  # Generation parameters
48
  generation_kwargs = {
49
- "max_tokens": 2048,
50
  "stop": ["```", "### Instruction:", "### Response:"],
51
  "echo": False,
52
- "temperature": 0.2,
53
- "top_k": 50,
54
- "top_p": 0.95,
55
  "repeat_penalty": 1.1
56
  }
57
 
 
33
  model_path = download_model(MODEL_NAME)
34
  llm = Llama(
35
  model_path=model_path,
36
+ n_ctx=1024,
37
  n_threads=4,
38
  n_gpu_layers=-1, # Use all available GPU layers
39
+ verbose=False,
40
+ n_batch=512,
41
+ mlock=True
42
  )
43
  logger.info("8-bit model loaded successfully")
44
 
 
48
 
49
  # Generation parameters
50
  generation_kwargs = {
51
+ "max_tokens": 1024,
52
  "stop": ["```", "### Instruction:", "### Response:"],
53
  "echo": False,
54
+ "temperature": 0.1,
55
+ "top_k": 20,
56
+ "top_p": 0.9,
57
  "repeat_penalty": 1.1
58
  }
59