{
  "parameters": {
    "max_total_tokens": 2048,
    "max_input_length": 1024,
    "max_batch_total_tokens": 8192,
    "max_concurrent_requests": 1,
    "max_best_of": 1,
    "max_stop_sequences": 4,
    "max_batch_size": 1,
    "waiting_served_ratio": 1.2
  },
  "hardware": {
    "task_type": "text-generation",
    "accelerator": "gpu",
    "num_gpus": 1,
    "gpu_memory_gb": 24,
    "distributed_setup": false
  },
  "framework_type": "pytorch",
  "torch_compile": false,
  "trust_remote_code": true,
  "disable_custom_kernels": true
}