DanielHafezi commited on
Commit
9f300f4
·
verified ·
1 Parent(s): 0b6a9e8

Upload deploy.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. deploy.json +13 -4
deploy.json CHANGED
@@ -1,14 +1,23 @@
1
  {
2
  "parameters": {
3
- "max_input_length": 1024,
4
  "max_total_tokens": 2048,
 
 
 
 
 
5
  "max_batch_size": 1,
6
- "max_concurrent_requests": 1
7
  },
8
  "hardware": {
 
9
  "accelerator": "gpu",
10
- "use_flash_attention": false
 
 
11
  },
12
  "framework_type": "pytorch",
13
- "trust_remote_code": true
 
 
14
  }
 
1
  {
2
  "parameters": {
 
3
  "max_total_tokens": 2048,
4
+ "max_input_length": 1024,
5
+ "max_batch_total_tokens": 8192,
6
+ "max_concurrent_requests": 1,
7
+ "max_best_of": 1,
8
+ "max_stop_sequences": 4,
9
  "max_batch_size": 1,
10
+ "waiting_served_ratio": 1.2
11
  },
12
  "hardware": {
13
+ "task_type": "text-generation",
14
  "accelerator": "gpu",
15
+ "num_gpus": 1,
16
+ "gpu_memory_gb": 24,
17
+ "distributed_setup": false
18
  },
19
  "framework_type": "pytorch",
20
+ "torch_compile": false,
21
+ "trust_remote_code": true,
22
+ "disable_custom_kernels": true
23
  }