sugiv commited on
Commit
d6ab42e
1 Parent(s): ae69077

Leetmonkey In Action via Inference

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -33,11 +33,11 @@ def download_model(model_name):
33
  model_path = download_model(MODEL_NAME)
34
  llm = Llama(
35
  model_path=model_path,
36
- n_ctx=1024,
37
  n_threads=8,
38
- n_gpu_layers=-1, # Use all available GPU layers
39
  verbose=False,
40
- n_batch=512,
41
  mlock=True
42
  )
43
  logger.info("8-bit model loaded successfully")
@@ -48,11 +48,11 @@ train_dataset = dataset["train"]
48
 
49
  # Generation parameters
50
  generation_kwargs = {
51
- "max_tokens": 512,
52
  "stop": ["```", "### Instruction:", "### Response:"],
53
  "echo": False,
54
- "temperature": 0.05,
55
- "top_k": 10,
56
  "top_p": 0.9,
57
  "repeat_penalty": 1.1
58
  }
 
33
  model_path = download_model(MODEL_NAME)
34
  llm = Llama(
35
  model_path=model_path,
36
+ n_ctx=512,
37
  n_threads=8,
38
+ n_gpu_layers=1, # Use all available GPU layers
39
  verbose=False,
40
+ n_batch=1024,
41
  mlock=True
42
  )
43
  logger.info("8-bit model loaded successfully")
 
48
 
49
  # Generation parameters
50
  generation_kwargs = {
51
+ "max_tokens": 256,
52
  "stop": ["```", "### Instruction:", "### Response:"],
53
  "echo": False,
54
+ "temperature": 0.01,
55
+ "top_k": 5,
56
  "top_p": 0.9,
57
  "repeat_penalty": 1.1
58
  }